diff mbox

[v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

Message ID 1480549373-2123-1-git-send-email-dhdang@apm.com
State Not Applicable
Headers show

Commit Message

Duc Dang Nov. 30, 2016, 11:42 p.m. UTC
PCIe controllers in X-Gene SoCs is not ECAM compliant: software
needs to configure additional controller's register to address
device at bus:dev:function.

The quirk will only be applied for X-Gene PCIe MCFG table with
OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).

The quirk declares the X-Gene PCIe controller register space as 64KB
fixed memory resource with name "PCIe CSR". This name will be showed
next to the resource range in the output of "cat /proc/iomem".

Signed-off-by: Duc Dang <dhdang@apm.com>
---
v3:
  - Rebase on top of pci/ecam-v6 tree.
  - Use DEFINE_RES_MEM_NAMED to declare controller register space
  with name "PCIe CSR"
v2:
  RFC v2: https://patchwork.ozlabs.org/patch/686846/
v1:
  RFC v1: https://patchwork.kernel.org/patch/9337115/

 drivers/acpi/pci_mcfg.c      |  31 ++++++++
 drivers/pci/host/pci-xgene.c | 165 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/pci-ecam.h     |   7 ++
 3 files changed, 200 insertions(+), 3 deletions(-)

Comments

Mark Salter Dec. 1, 2016, 3:08 p.m. UTC | #1
On Wed, 2016-11-30 at 15:42 -0800, Duc Dang wrote:
> PCIe controllers in X-Gene SoCs is not ECAM compliant: software
> needs to configure additional controller's register to address
> device at bus:dev:function.
> 
> The quirk will only be applied for X-Gene PCIe MCFG table with
> OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).
> 
> The quirk declares the X-Gene PCIe controller register space as 64KB
> fixed memory resource with name "PCIe CSR". This name will be showed
> next to the resource range in the output of "cat /proc/iomem".
> 
> Signed-off-by: Duc Dang <dhdang@apm.com>
> ---
> v3:
>   - Rebase on top of pci/ecam-v6 tree.
>   - Use DEFINE_RES_MEM_NAMED to declare controller register space
>   with name "PCIe CSR"
> v2:
>   RFC v2: https://patchwork.ozlabs.org/patch/686846/
> v1:
>   RFC v1: https://patchwork.kernel.org/patch/9337115/
> 
>  drivers/acpi/pci_mcfg.c      |  31 ++++++++
>  drivers/pci/host/pci-xgene.c | 165 ++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/pci-ecam.h     |   7 ++
>  3 files changed, 200 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
> index ac21db3..eb6125b 100644
> --- a/drivers/acpi/pci_mcfg.c
> +++ b/drivers/acpi/pci_mcfg.c
> @@ -57,6 +57,37 @@ struct mcfg_fixup {
>  	{ "QCOM  ", "QDF2432 ", 1, 5, MCFG_BUS_ANY, &pci_32b_ops },
>  	{ "QCOM  ", "QDF2432 ", 1, 6, MCFG_BUS_ANY, &pci_32b_ops },
>  	{ "QCOM  ", "QDF2432 ", 1, 7, MCFG_BUS_ANY, &pci_32b_ops },
> +
> +#ifdef CONFIG_PCI_XGENE
> +#define XGENE_V1_ECAM_MCFG(rev, seg) \
> +	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> +		&xgene_v1_pcie_ecam_ops }
> +#define XGENE_V2_1_ECAM_MCFG(rev, seg) \
> +	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> +		&xgene_v2_1_pcie_ecam_ops }
> +#define XGENE_V2_2_ECAM_MCFG(rev, seg) \
> +	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> +		&xgene_v2_2_pcie_ecam_ops }
> +
> +	/* X-Gene SoC with v1 PCIe controller */
> +	XGENE_V1_ECAM_MCFG(1, 0),
> +	XGENE_V1_ECAM_MCFG(1, 1),
> +	XGENE_V1_ECAM_MCFG(1, 2),
> +	XGENE_V1_ECAM_MCFG(1, 3),
> +	XGENE_V1_ECAM_MCFG(1, 4),
> +	XGENE_V1_ECAM_MCFG(2, 0),
> +	XGENE_V1_ECAM_MCFG(2, 1),
> +	XGENE_V1_ECAM_MCFG(2, 2),
> +	XGENE_V1_ECAM_MCFG(2, 3),
> +	XGENE_V1_ECAM_MCFG(2, 4),
> +	/* X-Gene SoC with v2.1 PCIe controller */
> +	XGENE_V2_1_ECAM_MCFG(3, 0),
> +	XGENE_V2_1_ECAM_MCFG(3, 1),
> +	/* X-Gene SoC with v2.2 PCIe controller */
> +	XGENE_V2_2_ECAM_MCFG(4, 0),
> +	XGENE_V2_2_ECAM_MCFG(4, 1),
> +	XGENE_V2_2_ECAM_MCFG(4, 2),
> +#endif
>  };
>  
>  static char mcfg_oem_id[ACPI_OEM_ID_SIZE];
> diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
> index 1de23d7..43d7c69 100644
> --- a/drivers/pci/host/pci-xgene.c
> +++ b/drivers/pci/host/pci-xgene.c
> @@ -27,6 +27,8 @@
>  #include <linux/of_irq.h>
>  #include <linux/of_pci.h>
>  #include <linux/pci.h>
> +#include <linux/pci-acpi.h>
> +#include <linux/pci-ecam.h>
>  #include <linux/platform_device.h>
>  #include <linux/slab.h>
>  
> @@ -64,6 +66,7 @@
>  /* PCIe IP version */
>  #define XGENE_PCIE_IP_VER_UNKN		0
>  #define XGENE_PCIE_IP_VER_1		1
> +#define XGENE_PCIE_IP_VER_2		2
>  
>  struct xgene_pcie_port {
>  	struct device_node	*node;
> @@ -97,7 +100,15 @@ static inline u32 pcie_bar_low_val(u32 addr, u32 flags)
>   */
>  static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
>  {
> -	struct xgene_pcie_port *port = bus->sysdata;
> +	struct pci_config_window *cfg;
> +	struct xgene_pcie_port *port;
> +
> +	if (acpi_disabled)
> +		port = bus->sysdata;
> +	else {
> +		cfg = bus->sysdata;
> +		port = cfg->priv;
> +	}
>  
>  	if (bus->number >= (bus->primary + 1))
>  		return port->cfg_base + AXI_EP_CFG_ACCESS;
> @@ -111,10 +122,18 @@ static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
>   */
>  static void xgene_pcie_set_rtdid_reg(struct pci_bus *bus, uint devfn)
>  {
> -	struct xgene_pcie_port *port = bus->sysdata;
> +	struct pci_config_window *cfg;
> +	struct xgene_pcie_port *port;
>  	unsigned int b, d, f;
>  	u32 rtdid_val = 0;
>  
> +	if (acpi_disabled)
> +		port = bus->sysdata;
> +	else {
> +		cfg = bus->sysdata;
> +		port = cfg->priv;
> +	}
> +
>  	b = bus->number;
>  	d = PCI_SLOT(devfn);
>  	f = PCI_FUNC(devfn);
> @@ -158,7 +177,15 @@ static void __iomem *xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
>  static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
>  				    int where, int size, u32 *val)
>  {
> -	struct xgene_pcie_port *port = bus->sysdata;
> +	struct pci_config_window *cfg;
> +	struct xgene_pcie_port *port;
> +
> +	if (acpi_disabled)
> +		port = bus->sysdata;
> +	else {
> +		cfg = bus->sysdata;
> +		port = cfg->priv;
> +	}
>  
>  	if (pci_generic_config_read32(bus, devfn, where & ~0x3, 4, val) !=
>  	    PCIBIOS_SUCCESSFUL)
> @@ -189,6 +216,138 @@ static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
>  	.write = pci_generic_config_write32,
>  };
>  
> +#ifdef CONFIG_ACPI
> +static struct resource xgene_v1_csr_res[] = {
> +	[0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> +	[1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
> +	[2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
> +	[3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
> +	[4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
> +};
> +
> +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
> +{
> +	struct acpi_device *adev = to_acpi_device(cfg->parent);
> +	struct acpi_pci_root *root = acpi_driver_data(adev);
> +	struct device *dev = cfg->parent;
> +	struct xgene_pcie_port *port;
> +	struct resource *csr;
> +
> +	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> +	if (!port)
> +		return -ENOMEM;
> +
> +	csr = &xgene_v1_csr_res[root->segment];

This hard-coded assumption that segment N uses controller N breaks
for m400 where segment 0 is using controller 3.

> +	port->csr_base = devm_ioremap_resource(dev, csr);
> +	if (IS_ERR(port->csr_base)) {
> +		kfree(port);
> +		return -ENOMEM;
> +	}
> +
> +	port->cfg_base = cfg->win;
> +	port->version = XGENE_PCIE_IP_VER_1;
> +
> +	cfg->priv = port;
> +
> +	return 0;
> +}
> +
> +struct pci_ecam_ops xgene_v1_pcie_ecam_ops = {
> +	.bus_shift      = 16,
> +	.init           = xgene_v1_pcie_ecam_init,
> +	.pci_ops        = {
> +		.map_bus        = xgene_pcie_map_bus,
> +		.read           = xgene_pcie_config_read32,
> +		.write          = pci_generic_config_write,
> +	}
> +};
> +
> +static struct resource xgene_v2_1_csr_res[] = {
> +	[0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> +	[1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
> +};
> +
> +static int xgene_v2_1_pcie_ecam_init(struct pci_config_window *cfg)
> +{
> +	struct acpi_device *adev = to_acpi_device(cfg->parent);
> +	struct acpi_pci_root *root = acpi_driver_data(adev);
> +	struct device *dev = cfg->parent;
> +	struct xgene_pcie_port *port;
> +	struct resource *csr;
> +
> +	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> +	if (!port)
> +		return -ENOMEM;
> +
> +	csr = &xgene_v2_1_csr_res[root->segment];
> +	port->csr_base = devm_ioremap_resource(dev, csr);
> +	if (IS_ERR(port->csr_base)) {
> +		kfree(port);
> +		return -ENOMEM;
> +	}
> +
> +	port->cfg_base = cfg->win;
> +	port->version = XGENE_PCIE_IP_VER_2;
> +
> +	cfg->priv = port;
> +
> +	return 0;
> +}
> +
> +struct pci_ecam_ops xgene_v2_1_pcie_ecam_ops = {
> +	.bus_shift      = 16,
> +	.init           = xgene_v2_1_pcie_ecam_init,
> +	.pci_ops        = {
> +		.map_bus        = xgene_pcie_map_bus,
> +		.read           = xgene_pcie_config_read32,
> +		.write          = pci_generic_config_write,
> +	}
> +};
> +
> +static struct resource xgene_v2_2_csr_res[] = {
> +	[0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> +	[1] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
> +	[2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
> +};
> +
> +static int xgene_v2_2_pcie_ecam_init(struct pci_config_window *cfg)
> +{
> +	struct acpi_device *adev = to_acpi_device(cfg->parent);
> +	struct acpi_pci_root *root = acpi_driver_data(adev);
> +	struct device *dev = cfg->parent;
> +	struct xgene_pcie_port *port;
> +	struct resource *csr;
> +
> +	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> +	if (!port)
> +		return -ENOMEM;
> +
> +	csr = &xgene_v2_2_csr_res[root->segment];
> +	port->csr_base = devm_ioremap_resource(dev, csr);
> +	if (IS_ERR(port->csr_base)) {
> +		kfree(port);
> +		return -ENOMEM;
> +	}
> +
> +	port->cfg_base = cfg->win;
> +	port->version = XGENE_PCIE_IP_VER_2;
> +
> +	cfg->priv = port;
> +
> +	return 0;
> +}
> +
> +struct pci_ecam_ops xgene_v2_2_pcie_ecam_ops = {
> +	.bus_shift      = 16,
> +	.init           = xgene_v2_2_pcie_ecam_init,
> +	.pci_ops        = {
> +		.map_bus        = xgene_pcie_map_bus,
> +		.read           = xgene_pcie_config_read32,
> +		.write          = pci_generic_config_write,
> +	}
> +};
> +#endif
> +
>  static u64 xgene_pcie_set_ib_mask(struct xgene_pcie_port *port, u32 addr,
>  				  u32 flags, u64 size)
>  {
> diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
> index f5740b7..47ab947 100644
> --- a/include/linux/pci-ecam.h
> +++ b/include/linux/pci-ecam.h
> @@ -62,6 +62,13 @@ void __iomem *pci_ecam_map_bus(struct pci_bus *bus, unsigned int devfn,
>  /* ops for buggy ECAM that supports only 32-bit accesses */
>  extern struct pci_ecam_ops pci_32b_ops;
>  
> +/* ECAM ops for known quirks */
> +#ifdef CONFIG_PCI_XGENE
> +extern struct pci_ecam_ops xgene_v1_pcie_ecam_ops;
> +extern struct pci_ecam_ops xgene_v2_1_pcie_ecam_ops;
> +extern struct pci_ecam_ops xgene_v2_2_pcie_ecam_ops;
> +#endif
> +
>  #ifdef CONFIG_PCI_HOST_GENERIC
>  /* for DT-based PCI controllers that support ECAM */
>  int pci_host_common_probe(struct platform_device *pdev,

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Dec. 1, 2016, 6:33 p.m. UTC | #2
Hi Duc,

On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
> PCIe controllers in X-Gene SoCs is not ECAM compliant: software
> needs to configure additional controller's register to address
> device at bus:dev:function.
> 
> The quirk will only be applied for X-Gene PCIe MCFG table with
> OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).
> 
> The quirk declares the X-Gene PCIe controller register space as 64KB
> fixed memory resource with name "PCIe CSR". This name will be showed
> next to the resource range in the output of "cat /proc/iomem".
> 
> Signed-off-by: Duc Dang <dhdang@apm.com>
> ---
> v3:
>   - Rebase on top of pci/ecam-v6 tree.
>   - Use DEFINE_RES_MEM_NAMED to declare controller register space
>   with name "PCIe CSR"
> v2:
>   RFC v2: https://patchwork.ozlabs.org/patch/686846/
> v1:
>   RFC v1: https://patchwork.kernel.org/patch/9337115/
> 
>  drivers/acpi/pci_mcfg.c      |  31 ++++++++
>  drivers/pci/host/pci-xgene.c | 165 ++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/pci-ecam.h     |   7 ++
>  3 files changed, 200 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
> index ac21db3..eb6125b 100644
> --- a/drivers/acpi/pci_mcfg.c
> +++ b/drivers/acpi/pci_mcfg.c
> @@ -57,6 +57,37 @@ struct mcfg_fixup {
>  	{ "QCOM  ", "QDF2432 ", 1, 5, MCFG_BUS_ANY, &pci_32b_ops },
>  	{ "QCOM  ", "QDF2432 ", 1, 6, MCFG_BUS_ANY, &pci_32b_ops },
>  	{ "QCOM  ", "QDF2432 ", 1, 7, MCFG_BUS_ANY, &pci_32b_ops },
> +
> +#ifdef CONFIG_PCI_XGENE

As you've no doubt noticed, I'm proposing to add these quirks without
CONFIG_PCI_XGENE, so we don't have to select each device when building
a generic ACPI kernel.

I'm also proposing some Kconfig and Makefile changes so we don't build
the platform driver part in a generic ACPI kernel (unless we *also*
explicitly select the platform driver).

Here's an example:
https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/commit/?h=pci/ecam&id=f80edf4d6c05

> +#define XGENE_V1_ECAM_MCFG(rev, seg) \
> +	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> +		&xgene_v1_pcie_ecam_ops }
> +#define XGENE_V2_1_ECAM_MCFG(rev, seg) \
> +	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> +		&xgene_v2_1_pcie_ecam_ops }
> +#define XGENE_V2_2_ECAM_MCFG(rev, seg) \
> +	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> +		&xgene_v2_2_pcie_ecam_ops }
> +
> +	/* X-Gene SoC with v1 PCIe controller */
> +	XGENE_V1_ECAM_MCFG(1, 0),
> +	XGENE_V1_ECAM_MCFG(1, 1),

> @@ -64,6 +66,7 @@
>  /* PCIe IP version */
>  #define XGENE_PCIE_IP_VER_UNKN		0
>  #define XGENE_PCIE_IP_VER_1		1
> +#define XGENE_PCIE_IP_VER_2		2

This isn't used anywhere, which makes me wonder whether it's worth
keeping it.

>  static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
>  {
> -	struct xgene_pcie_port *port = bus->sysdata;
> +	struct pci_config_window *cfg;
> +	struct xgene_pcie_port *port;
> +
> +	if (acpi_disabled)
> +		port = bus->sysdata;
> +	else {
> +		cfg = bus->sysdata;
> +		port = cfg->priv;
> +	}

I would really, really like to figure out a way to get rid of these
"if (acpi_disabled)" checks sprinkled through here.  Is there any way
we can set up bus->sysdata to be the same, regardless of whether we're
using this as a platform driver or an ACPI quirk?

> +#ifdef CONFIG_ACPI

You've probably noticed that I've been using

  #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)

in this situation, mostly to make it clear that this is part of a
workaround.  I don't want people to blindly copy this stuff without
realizing that it's a workaround for a hardware issue.

> +static struct resource xgene_v1_csr_res[] = {
> +	[0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> +	[1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
> +	[2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
> +	[3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
> +	[4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),

I assume these ranges are not the actual ECAM space, right?
If they *were* ECAM, I assume you would have included them in the
quirk itself in the mcfg_quirks[] table.

> +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
> +{
> +	struct acpi_device *adev = to_acpi_device(cfg->parent);
> +	struct acpi_pci_root *root = acpi_driver_data(adev);
> +	struct device *dev = cfg->parent;
> +	struct xgene_pcie_port *port;
> +	struct resource *csr;
> +
> +	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> +	if (!port)
> +		return -ENOMEM;
> +
> +	csr = &xgene_v1_csr_res[root->segment];

This makes me nervous because root->segment comes from the ACPI _SEG,
and if firmware gives us junk in _SEG, we will reference something in
the weeds.

> +	port->csr_base = devm_ioremap_resource(dev, csr);
> +	if (IS_ERR(port->csr_base)) {
> +		kfree(port);
> +		return -ENOMEM;
> +	}
> +
> +	port->cfg_base = cfg->win;
> +	port->version = XGENE_PCIE_IP_VER_1;
> +
> +	cfg->priv = port;

All these init functions are almost identical.  Can we factor this out
by having wrappers that do nothing more than pass in the table and
version, and put the kzalloc and ioremap in a shared back-end?

We're so close I can taste it!  I can't wait to see all this work come
to fruition.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Masters Dec. 1, 2016, 7:17 p.m. UTC | #3
On 12/01/2016 10:08 AM, Mark Salter wrote:
> On Wed, 2016-11-30 at 15:42 -0800, Duc Dang wrote:

>> +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
>> +{
>> +	struct acpi_device *adev = to_acpi_device(cfg->parent);
>> +	struct acpi_pci_root *root = acpi_driver_data(adev);
>> +	struct device *dev = cfg->parent;
>> +	struct xgene_pcie_port *port;
>> +	struct resource *csr;
>> +
>> +	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
>> +	if (!port)
>> +		return -ENOMEM;
>> +
>> +	csr = &xgene_v1_csr_res[root->segment];
> 
> This hard-coded assumption that segment N uses controller N breaks
> for m400 where segment 0 is using controller 3.

This seems very fragile. So in addition to Bjorn's comment about not
trusting firmware provided data for the segment offset in the CSR list,
you will want to also determine the controller from the ACPI tree. The
existing code walks the ACPI hierarchy and finds the CSR that way.
Obviously, the goal is to avoid that in the latest incarnation, but you
could still determine which controller matches a given device.

Jon.
Mark Salter Dec. 1, 2016, 7:20 p.m. UTC | #4
On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:
> Hi Duc,
> 
> On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
> > 
> > PCIe controllers in X-Gene SoCs is not ECAM compliant: software
> > needs to configure additional controller's register to address
> > device at bus:dev:function.
> > 
> > The quirk will only be applied for X-Gene PCIe MCFG table with
> > OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).
> > 
> > The quirk declares the X-Gene PCIe controller register space as 64KB
> > fixed memory resource with name "PCIe CSR". This name will be showed
> > next to the resource range in the output of "cat /proc/iomem".
> > 
> > Signed-off-by: Duc Dang <dhdang@apm.com>
> > ---
> > v3:
> >   - Rebase on top of pci/ecam-v6 tree.
> >   - Use DEFINE_RES_MEM_NAMED to declare controller register space
> >   with name "PCIe CSR"
> > v2:
> >   RFC v2: https://patchwork.ozlabs.org/patch/686846/
> > v1:
> >   RFC v1: https://patchwork.kernel.org/patch/9337115/
> > 
> >  drivers/acpi/pci_mcfg.c      |  31 ++++++++
> >  drivers/pci/host/pci-xgene.c | 165 ++++++++++++++++++++++++++++++++++++++++++-
> >  include/linux/pci-ecam.h     |   7 ++
> >  3 files changed, 200 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
> > index ac21db3..eb6125b 100644
> > --- a/drivers/acpi/pci_mcfg.c
> > +++ b/drivers/acpi/pci_mcfg.c
> > @@ -57,6 +57,37 @@ struct mcfg_fixup {
> >  	{ "QCOM  ", "QDF2432 ", 1, 5, MCFG_BUS_ANY, &pci_32b_ops },
> >  	{ "QCOM  ", "QDF2432 ", 1, 6, MCFG_BUS_ANY, &pci_32b_ops },
> >  	{ "QCOM  ", "QDF2432 ", 1, 7, MCFG_BUS_ANY, &pci_32b_ops },
> > +
> > +#ifdef CONFIG_PCI_XGENE
> As you've no doubt noticed, I'm proposing to add these quirks without
> CONFIG_PCI_XGENE, so we don't have to select each device when building
> a generic ACPI kernel.
> 
> I'm also proposing some Kconfig and Makefile changes so we don't build
> the platform driver part in a generic ACPI kernel (unless we *also*
> explicitly select the platform driver).
> 
> Here's an example:
> https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/commit/?h=pci/ecam&id=f80edf4d6c05
> 
> > 
> > +#define XGENE_V1_ECAM_MCFG(rev, seg) \
> > +	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> > +		&xgene_v1_pcie_ecam_ops }
> > +#define XGENE_V2_1_ECAM_MCFG(rev, seg) \
> > +	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> > +		&xgene_v2_1_pcie_ecam_ops }
> > +#define XGENE_V2_2_ECAM_MCFG(rev, seg) \
> > +	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> > +		&xgene_v2_2_pcie_ecam_ops }
> > +
> > +	/* X-Gene SoC with v1 PCIe controller */
> > +	XGENE_V1_ECAM_MCFG(1, 0),
> > +	XGENE_V1_ECAM_MCFG(1, 1),
> > 
> > @@ -64,6 +66,7 @@
> >  /* PCIe IP version */
> >  #define XGENE_PCIE_IP_VER_UNKN		0
> >  #define XGENE_PCIE_IP_VER_1		1
> > +#define XGENE_PCIE_IP_VER_2		2
> This isn't used anywhere, which makes me wonder whether it's worth
> keeping it.
> 
> > 
> >  static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
> >  {
> > -	struct xgene_pcie_port *port = bus->sysdata;
> > +	struct pci_config_window *cfg;
> > +	struct xgene_pcie_port *port;
> > +
> > +	if (acpi_disabled)
> > +		port = bus->sysdata;
> > +	else {
> > +		cfg = bus->sysdata;
> > +		port = cfg->priv;
> > +	}
> I would really, really like to figure out a way to get rid of these
> "if (acpi_disabled)" checks sprinkled through here.  Is there any way
> we can set up bus->sysdata to be the same, regardless of whether we're
> using this as a platform driver or an ACPI quirk?
> 
> > 
> > +#ifdef CONFIG_ACPI
> You've probably noticed that I've been using
> 
>   #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
> 
> in this situation, mostly to make it clear that this is part of a
> workaround.  I don't want people to blindly copy this stuff without
> realizing that it's a workaround for a hardware issue.
> 
> > 
> > +static struct resource xgene_v1_csr_res[] = {
> > +	[0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> > +	[1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
> > +	[2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
> > +	[3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
> > +	[4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
> I assume these ranges are not the actual ECAM space, right?
> If they *were* ECAM, I assume you would have included them in the
> quirk itself in the mcfg_quirks[] table.

These are base addresses for some RC mmio registers.
> 
> > 
> > +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
> > +{
> > +	struct acpi_device *adev = to_acpi_device(cfg->parent);
> > +	struct acpi_pci_root *root = acpi_driver_data(adev);
> > +	struct device *dev = cfg->parent;
> > +	struct xgene_pcie_port *port;
> > +	struct resource *csr;
> > +
> > +	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> > +	if (!port)
> > +		return -ENOMEM;
> > +
> > +	csr = &xgene_v1_csr_res[root->segment];
> This makes me nervous because root->segment comes from the ACPI _SEG,
> and if firmware gives us junk in _SEG, we will reference something in
> the weeds.

The SoC provide some number of RC bridges, each with a different base
for some mmio registers. Even if segment is legitimate in MCFG, there
is still a problem if a platform doesn't use the segment ordering
implied by the code. But the PNP0A03 _CRS does have this base address
as the first memory resource, so we could get it from there and not
have hard-coded addresses and implied ording in the quirk code.

I have tested a modified version of these quirks using this to
get the CSR base and it works on the 3 different platforms I have
access to.

static int xgene_pcie_get_csr(struct device *dev, struct resource *r)
{
	struct acpi_device *adev = to_acpi_device(dev);
	unsigned long flags;
	struct list_head list;
	struct resource_entry *entry;
	int ret;

	INIT_LIST_HEAD(&list);
	flags = IORESOURCE_MEM;
	ret = acpi_dev_get_resources(adev, &list,
				     acpi_dev_filter_resource_type_cb,
				     (void *)flags);
	if (ret < 0) {
		dev_err(dev, "failed to parse _CRS, error: %d\n", ret);
		return ret;
	} else if (ret == 0) {
		dev_err(dev, "no memory resources present in _CRS\n");
		return -EINVAL;
	}

	entry = list_first_entry(&list, struct resource_entry, node);
	*r = *entry->res;
	acpi_dev_free_resource_list(&list);
	return 0;
}

> 
> > 
> > +	port->csr_base = devm_ioremap_resource(dev, csr);
> > +	if (IS_ERR(port->csr_base)) {
> > +		kfree(port);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	port->cfg_base = cfg->win;
> > +	port->version = XGENE_PCIE_IP_VER_1;
> > +
> > +	cfg->priv = port;
> All these init functions are almost identical.  Can we factor this out
> by having wrappers that do nothing more than pass in the table and
> version, and put the kzalloc and ioremap in a shared back-end?
> 
> We're so close I can taste it!  I can't wait to see all this work come
> to fruition.
> 
> Bjorn

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Masters Dec. 1, 2016, 7:26 p.m. UTC | #5
On 12/01/2016 02:20 PM, Mark Salter wrote:
> On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:

>>> +	csr = &xgene_v1_csr_res[root->segment];
>> This makes me nervous because root->segment comes from the ACPI _SEG,
>> and if firmware gives us junk in _SEG, we will reference something in
>> the weeds.
> 
> The SoC provide some number of RC bridges, each with a different base
> for some mmio registers. Even if segment is legitimate in MCFG, there
> is still a problem if a platform doesn't use the segment ordering
> implied by the code. But the PNP0A03 _CRS does have this base address
> as the first memory resource, so we could get it from there and not
> have hard-coded addresses and implied ording in the quirk code.
> 
> I have tested a modified version of these quirks using this to
> get the CSR base and it works on the 3 different platforms I have
> access to.
> 
> static int xgene_pcie_get_csr(struct device *dev, struct resource *r)
> {
> 	struct acpi_device *adev = to_acpi_device(dev);
> 	unsigned long flags;
> 	struct list_head list;
> 	struct resource_entry *entry;
> 	int ret;
> 
> 	INIT_LIST_HEAD(&list);
> 	flags = IORESOURCE_MEM;
> 	ret = acpi_dev_get_resources(adev, &list,
> 				     acpi_dev_filter_resource_type_cb,
> 				     (void *)flags);
> 	if (ret < 0) {
> 		dev_err(dev, "failed to parse _CRS, error: %d\n", ret);
> 		return ret;
> 	} else if (ret == 0) {
> 		dev_err(dev, "no memory resources present in _CRS\n");
> 		return -EINVAL;
> 	}
> 
> 	entry = list_first_entry(&list, struct resource_entry, node);
> 	*r = *entry->res;
> 	acpi_dev_free_resource_list(&list);
> 	return 0;
> }

This seems a lot safer. At some point trusting firmware to provide the
correct _CRS for the RC in use is better than hard coding for every
possible implementation configuration of an X-Gene SoC.

Jon.
Bjorn Helgaas Dec. 1, 2016, 7:41 p.m. UTC | #6
On Thu, Dec 01, 2016 at 02:20:53PM -0500, Mark Salter wrote:
> On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:
> > On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:

> > > +static struct resource xgene_v1_csr_res[] = {
> > > +	[0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> > > +	[1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
> > > +	[2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
> > > +	[3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
> > > +	[4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
> > I assume these ranges are not the actual ECAM space, right?
> > If they *were* ECAM, I assume you would have included them in the
> > quirk itself in the mcfg_quirks[] table.
> 
> These are base addresses for some RC mmio registers.
> > 
> > > 
> > > +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
> > > +{
> > > +	struct acpi_device *adev = to_acpi_device(cfg->parent);
> > > +	struct acpi_pci_root *root = acpi_driver_data(adev);
> > > +	struct device *dev = cfg->parent;
> > > +	struct xgene_pcie_port *port;
> > > +	struct resource *csr;
> > > +
> > > +	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> > > +	if (!port)
> > > +		return -ENOMEM;
> > > +
> > > +	csr = &xgene_v1_csr_res[root->segment];
> > This makes me nervous because root->segment comes from the ACPI _SEG,
> > and if firmware gives us junk in _SEG, we will reference something in
> > the weeds.
> 
> The SoC provide some number of RC bridges, each with a different base
> for some mmio registers. Even if segment is legitimate in MCFG, there
> is still a problem if a platform doesn't use the segment ordering
> implied by the code. But the PNP0A03 _CRS does have this base address
> as the first memory resource, so we could get it from there and not
> have hard-coded addresses and implied ording in the quirk code.

I'm confused.  Doesn't the current code treat every item in PNP0A03
_CRS as a window?  Do you mean the first resource is handled
differently somehow?  The Consumer/Producer bit could allow us to do
this by marking the RC MMIO space as "Consumer", but I didn't think
that strategy was quite working yet.

> I have tested a modified version of these quirks using this to
> get the CSR base and it works on the 3 different platforms I have
> access to.
> 
> static int xgene_pcie_get_csr(struct device *dev, struct resource *r)
> {
> 	struct acpi_device *adev = to_acpi_device(dev);
> 	unsigned long flags;
> 	struct list_head list;
> 	struct resource_entry *entry;
> 	int ret;
> 
> 	INIT_LIST_HEAD(&list);
> 	flags = IORESOURCE_MEM;
> 	ret = acpi_dev_get_resources(adev, &list,
> 				     acpi_dev_filter_resource_type_cb,
> 				     (void *)flags);
> 	if (ret < 0) {
> 		dev_err(dev, "failed to parse _CRS, error: %d\n", ret);
> 		return ret;
> 	} else if (ret == 0) {
> 		dev_err(dev, "no memory resources present in _CRS\n");
> 		return -EINVAL;
> 	}
> 
> 	entry = list_first_entry(&list, struct resource_entry, node);
> 	*r = *entry->res;
> 	acpi_dev_free_resource_list(&list);
> 	return 0;
> }

The code above is identical to acpi_get_rc_addr(), which is used in
the acpi_get_rc_resources() path by the other quirks.  Can you use
that path, too, instead of reimplementing it here?
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duc Dang Dec. 1, 2016, 7:58 p.m. UTC | #7
On Thu, Dec 1, 2016 at 11:17 AM, Jon Masters <jcm@redhat.com> wrote:
> On 12/01/2016 10:08 AM, Mark Salter wrote:
>> On Wed, 2016-11-30 at 15:42 -0800, Duc Dang wrote:
>
>>> +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
>>> +{
>>> +    struct acpi_device *adev = to_acpi_device(cfg->parent);
>>> +    struct acpi_pci_root *root = acpi_driver_data(adev);
>>> +    struct device *dev = cfg->parent;
>>> +    struct xgene_pcie_port *port;
>>> +    struct resource *csr;
>>> +
>>> +    port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
>>> +    if (!port)
>>> +            return -ENOMEM;
>>> +
>>> +    csr = &xgene_v1_csr_res[root->segment];
>>
>> This hard-coded assumption that segment N uses controller N breaks
>> for m400 where segment 0 is using controller 3.

I think the latest firmware released from us a few months back use
segment 3 for PCIe controller 3 in MCFG table.
>
> This seems very fragile. So in addition to Bjorn's comment about not
> trusting firmware provided data for the segment offset in the CSR list,
> you will want to also determine the controller from the ACPI tree. The
> existing code walks the ACPI hierarchy and finds the CSR that way.
> Obviously, the goal is to avoid that in the latest incarnation, but you
> could still determine which controller matches a given device.

Yes, I will look into that more.

>
> Jon.
>
> --
> Computer Architect | Sent from my Fedora powered laptop
>
Regards,
Duc Dang.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duc Dang Dec. 1, 2016, 10:10 p.m. UTC | #8
On Thu, Dec 1, 2016 at 11:41 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> On Thu, Dec 01, 2016 at 02:20:53PM -0500, Mark Salter wrote:
>> On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:
>> > On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
>
>> > > +static struct resource xgene_v1_csr_res[] = {
>> > > + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
>> > > + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
>> > > + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
>> > > + [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
>> > > + [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
>> > I assume these ranges are not the actual ECAM space, right?
>> > If they *were* ECAM, I assume you would have included them in the
>> > quirk itself in the mcfg_quirks[] table.
>>
>> These are base addresses for some RC mmio registers.
>> >
>> > >
>> > > +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
>> > > +{
>> > > + struct acpi_device *adev = to_acpi_device(cfg->parent);
>> > > + struct acpi_pci_root *root = acpi_driver_data(adev);
>> > > + struct device *dev = cfg->parent;
>> > > + struct xgene_pcie_port *port;
>> > > + struct resource *csr;
>> > > +
>> > > + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
>> > > + if (!port)
>> > > +         return -ENOMEM;
>> > > +
>> > > + csr = &xgene_v1_csr_res[root->segment];
>> > This makes me nervous because root->segment comes from the ACPI _SEG,
>> > and if firmware gives us junk in _SEG, we will reference something in
>> > the weeds.
>>
>> The SoC provide some number of RC bridges, each with a different base
>> for some mmio registers. Even if segment is legitimate in MCFG, there
>> is still a problem if a platform doesn't use the segment ordering
>> implied by the code. But the PNP0A03 _CRS does have this base address
>> as the first memory resource, so we could get it from there and not
>> have hard-coded addresses and implied ording in the quirk code.
>
> I'm confused.  Doesn't the current code treat every item in PNP0A03
> _CRS as a window?  Do you mean the first resource is handled
> differently somehow?  The Consumer/Producer bit could allow us to do
> this by marking the RC MMIO space as "Consumer", but I didn't think
> that strategy was quite working yet.

The first resource is defined like below. It was introduced long time
ago to use with older version of X-Gene ECAM quirks.
Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )

The resource discovered during booting will be like following:
[    0.728117] ACPI: MCFG table detected, 1 entries
[    0.735330] ACPI: Power Resource [SCVR] (on)
[    0.767478] ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
[    0.774013] acpi PNP0A08:00: _OSC: OS supports [ExtendedConfig ASPM
ClockPM Segments MSI]
[    0.782864] acpi PNP0A08:00: _OSC: OS now controls [PCIeHotplug PME
AER PCIeCapability]
[    0.791331] acpi PNP0A08:00: MCFG quirk: ECAM at [mem
0xe0d0000000-0xe0dfffffff] for [bus 00-ff] with xgene_v1_pcie_ecam_ops
[    0.803207] acpi PNP0A08:00: ECAM at [mem
0xe0d0000000-0xe0dfffffff] for [bus 00-ff]
[    0.811399] Remapped I/O 0x000000e010000000 to [io  0x0000-0xffff window]
[    0.818678] PCI host bridge to bus 0000:00
[    0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
[    0.830257] pci_bus 0000:00: root bus resource [io  0x0000-0xffff
window] (bus address [0x10000000-0x1000ffff])
[    0.840917] pci_bus 0000:00: root bus resource [mem
0xe040000000-0xe07fffffff window] (bus address
[0x40000000-0x7fffffff])
[    0.852675] pci_bus 0000:00: root bus resource [mem
0xf000000000-0xffffffffff window]
[    0.860950] pci_bus 0000:00: root bus resource [bus 00-ff]
[    0.866761] pci 0000:00:00.0: [10e8:e004] type 01 class 0x060400
[    0.873175] pci 0000:00:00.0: supports D1 D2
[    0.877980] pci 0000:01:00.0: [15b3:1003] type 00 class 0x020000
[    0.884597] pci 0000:01:00.0: reg 0x10: [mem 0xe040000000-0xe0400fffff 64bit]
[    0.892337] pci 0000:01:00.0: reg 0x18: [mem
0xe042000000-0xe043ffffff 64bit pref]
[    0.900694] pci 0000:01:00.0: reg 0x30: [mem 0xfff00000-0xffffffff pref]
[    0.923853] pci_bus 0000:00: on NUMA node 0
[    0.928269] pci 0000:00:00.0: BAR 15: assigned [mem
0xf000000000-0xf001ffffff 64bit pref]
[    0.936908] pci 0000:00:00.0: BAR 14: assigned [mem
0xe040000000-0xe0401fffff]
[    0.944539] pci 0000:01:00.0: BAR 2: assigned [mem
0xf000000000-0xf001ffffff 64bit pref]
[    0.953210] pci 0000:01:00.0: BAR 0: assigned [mem
0xe040000000-0xe0400fffff 64bit]
[    0.961430] pci 0000:01:00.0: BAR 6: assigned [mem
0xe040100000-0xe0401fffff pref]
[    0.969438] pci 0000:00:00.0: PCI bridge to [bus 01]
[    0.974690] pci 0000:00:00.0:   bridge window [mem 0xe040000000-0xe0401fffff]
[    0.982231] pci 0000:00:00.0:   bridge window [mem
0xf000000000-0xf001ffffff 64bit pref]

>
>> I have tested a modified version of these quirks using this to
>> get the CSR base and it works on the 3 different platforms I have
>> access to.
>>
>> static int xgene_pcie_get_csr(struct device *dev, struct resource *r)
>> {
>>       struct acpi_device *adev = to_acpi_device(dev);
>>       unsigned long flags;
>>       struct list_head list;
>>       struct resource_entry *entry;
>>       int ret;
>>
>>       INIT_LIST_HEAD(&list);
>>       flags = IORESOURCE_MEM;
>>       ret = acpi_dev_get_resources(adev, &list,
>>                                    acpi_dev_filter_resource_type_cb,
>>                                    (void *)flags);
>>       if (ret < 0) {
>>               dev_err(dev, "failed to parse _CRS, error: %d\n", ret);
>>               return ret;
>>       } else if (ret == 0) {
>>               dev_err(dev, "no memory resources present in _CRS\n");
>>               return -EINVAL;
>>       }
>>
>>       entry = list_first_entry(&list, struct resource_entry, node);
>>       *r = *entry->res;
>>       acpi_dev_free_resource_list(&list);
>>       return 0;
>> }
>
> The code above is identical to acpi_get_rc_addr(), which is used in
> the acpi_get_rc_resources() path by the other quirks.  Can you use
> that path, too, instead of reimplementing it here?

I will post a new version using acpi_get_rc_resources and includes
other changes that you suggested.

Regards,
Duc Dang.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Masters Dec. 1, 2016, 10:31 p.m. UTC | #9
Thanks Duc! I will test quickly if you have it today :)
Bjorn Helgaas Dec. 1, 2016, 11:07 p.m. UTC | #10
On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:
> On Thu, Dec 1, 2016 at 11:41 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > On Thu, Dec 01, 2016 at 02:20:53PM -0500, Mark Salter wrote:
> >> On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:
> >> > On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
> >
> >> > > +static struct resource xgene_v1_csr_res[] = {
> >> > > + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> >> > > + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
> >> > > + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
> >> > > + [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
> >> > > + [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
> >> > I assume these ranges are not the actual ECAM space, right?
> >> > If they *were* ECAM, I assume you would have included them in the
> >> > quirk itself in the mcfg_quirks[] table.
> >>
> >> These are base addresses for some RC mmio registers.
> >> >
> >> > >
> >> > > +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
> >> > > +{
> >> > > + struct acpi_device *adev = to_acpi_device(cfg->parent);
> >> > > + struct acpi_pci_root *root = acpi_driver_data(adev);
> >> > > + struct device *dev = cfg->parent;
> >> > > + struct xgene_pcie_port *port;
> >> > > + struct resource *csr;
> >> > > +
> >> > > + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> >> > > + if (!port)
> >> > > +         return -ENOMEM;
> >> > > +
> >> > > + csr = &xgene_v1_csr_res[root->segment];
> >> > This makes me nervous because root->segment comes from the ACPI _SEG,
> >> > and if firmware gives us junk in _SEG, we will reference something in
> >> > the weeds.
> >>
> >> The SoC provide some number of RC bridges, each with a different base
> >> for some mmio registers. Even if segment is legitimate in MCFG, there
> >> is still a problem if a platform doesn't use the segment ordering
> >> implied by the code. But the PNP0A03 _CRS does have this base address
> >> as the first memory resource, so we could get it from there and not
> >> have hard-coded addresses and implied ording in the quirk code.
> >
> > I'm confused.  Doesn't the current code treat every item in PNP0A03
> > _CRS as a window?  Do you mean the first resource is handled
> > differently somehow?  The Consumer/Producer bit could allow us to do
> > this by marking the RC MMIO space as "Consumer", but I didn't think
> > that strategy was quite working yet.
> 
> The first resource is defined like below. It was introduced long time
> ago to use with older version of X-Gene ECAM quirks.
> Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )

> [    0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]

I think this is wrong.  The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
is available for use by devices on bus 0000:00, but I think you're
saying it is consumed by the bridge itself, not forwarded down to PCI.

What's in your /proc/iomem?  I see that your quirks do call
devm_ioremap_resource(), which calls devm_request_mem_region()
internally, so the driver does at least request that region, which
should keep us from assigning it to PCI devices.

But it still isn't quite right to tell the PCI core that the region is
available on the root bus.

Bjorn

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duc Dang Dec. 1, 2016, 11:22 p.m. UTC | #11
On Thu, Dec 1, 2016 at 3:07 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:
>> On Thu, Dec 1, 2016 at 11:41 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>> > On Thu, Dec 01, 2016 at 02:20:53PM -0500, Mark Salter wrote:
>> >> On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:
>> >> > On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
>> >
>> >> > > +static struct resource xgene_v1_csr_res[] = {
>> >> > > + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
>> >> > > + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
>> >> > > + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
>> >> > > + [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
>> >> > > + [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
>> >> > I assume these ranges are not the actual ECAM space, right?
>> >> > If they *were* ECAM, I assume you would have included them in the
>> >> > quirk itself in the mcfg_quirks[] table.
>> >>
>> >> These are base addresses for some RC mmio registers.
>> >> >
>> >> > >
>> >> > > +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
>> >> > > +{
>> >> > > + struct acpi_device *adev = to_acpi_device(cfg->parent);
>> >> > > + struct acpi_pci_root *root = acpi_driver_data(adev);
>> >> > > + struct device *dev = cfg->parent;
>> >> > > + struct xgene_pcie_port *port;
>> >> > > + struct resource *csr;
>> >> > > +
>> >> > > + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
>> >> > > + if (!port)
>> >> > > +         return -ENOMEM;
>> >> > > +
>> >> > > + csr = &xgene_v1_csr_res[root->segment];
>> >> > This makes me nervous because root->segment comes from the ACPI _SEG,
>> >> > and if firmware gives us junk in _SEG, we will reference something in
>> >> > the weeds.
>> >>
>> >> The SoC provide some number of RC bridges, each with a different base
>> >> for some mmio registers. Even if segment is legitimate in MCFG, there
>> >> is still a problem if a platform doesn't use the segment ordering
>> >> implied by the code. But the PNP0A03 _CRS does have this base address
>> >> as the first memory resource, so we could get it from there and not
>> >> have hard-coded addresses and implied ording in the quirk code.
>> >
>> > I'm confused.  Doesn't the current code treat every item in PNP0A03
>> > _CRS as a window?  Do you mean the first resource is handled
>> > differently somehow?  The Consumer/Producer bit could allow us to do
>> > this by marking the RC MMIO space as "Consumer", but I didn't think
>> > that strategy was quite working yet.
>>
>> The first resource is defined like below. It was introduced long time
>> ago to use with older version of X-Gene ECAM quirks.
>> Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )
>
>> [    0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
>
> I think this is wrong.  The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
> is available for use by devices on bus 0000:00, but I think you're
> saying it is consumed by the bridge itself, not forwarded down to PCI.
>
> What's in your /proc/iomem?  I see that your quirks do call
> devm_ioremap_resource(), which calls devm_request_mem_region()
> internally, so the driver does at least request that region, which
> should keep us from assigning it to PCI devices.
>
> But it still isn't quite right to tell the PCI core that the region is
> available on the root bus.

This is /proc/iomem output on my Mustang board. The 64K "PCIe CSR"
region is consumed completely.
1f2b0000-1f2bffff : PCI Bus 0000:00
  1f2b0000-1f2bffff : PCIe CSR

e040000000-e07fffffff : PCI Bus 0000:00
  e040000000-e0401fffff : PCI Bus 0000:01
    e040000000-e0400fffff : 0000:01:00.0
      e040000000-e0400fffff : mlx4_core
    e040100000-e0401fffff : 0000:01:00.0
e0d0000000-e0dfffffff : PCI ECAM
f000000000-ffffffffff : PCI Bus 0000:00
  f000000000-f001ffffff : PCI Bus 0000:01
    f000000000-f001ffffff : 0000:01:00.0
      f000000000-f001ffffff : mlx4_core

Using hard-coded resources for mmio space make the quirk rely on the
segment number passing from the firmware. Using Mark's method or
acpi_get_rc_resource can discover the mmio space and consume all of
the space, but as you mentioned, it leaves the defect that PCI core
considers the mmio space as available resource for secondary devices
although it will never allocate the mmio space to secondary devices as
the RC already reserves and consumes all of the space.

>
> Bjorn
>
Regards,
Duc Dang.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duc Dang Dec. 2, 2016, 2:52 a.m. UTC | #12
On Thu, Dec 1, 2016 at 10:33 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> Hi Duc,
>
> On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
>> PCIe controllers in X-Gene SoCs is not ECAM compliant: software
>> needs to configure additional controller's register to address
>> device at bus:dev:function.
>>
>> The quirk will only be applied for X-Gene PCIe MCFG table with
>> OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).
>>
>> The quirk declares the X-Gene PCIe controller register space as 64KB
>> fixed memory resource with name "PCIe CSR". This name will be showed
>> next to the resource range in the output of "cat /proc/iomem".
>>
>> Signed-off-by: Duc Dang <dhdang@apm.com>
>> ---
>> v3:
>>   - Rebase on top of pci/ecam-v6 tree.
>>   - Use DEFINE_RES_MEM_NAMED to declare controller register space
>>   with name "PCIe CSR"
>> v2:
>>   RFC v2: https://patchwork.ozlabs.org/patch/686846/
>> v1:
>>   RFC v1: https://patchwork.kernel.org/patch/9337115/
>>
>>  drivers/acpi/pci_mcfg.c      |  31 ++++++++
>>  drivers/pci/host/pci-xgene.c | 165 ++++++++++++++++++++++++++++++++++++++++++-
>>  include/linux/pci-ecam.h     |   7 ++
>>  3 files changed, 200 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
>> index ac21db3..eb6125b 100644
>> --- a/drivers/acpi/pci_mcfg.c
>> +++ b/drivers/acpi/pci_mcfg.c
>> @@ -57,6 +57,37 @@ struct mcfg_fixup {
>>       { "QCOM  ", "QDF2432 ", 1, 5, MCFG_BUS_ANY, &pci_32b_ops },
>>       { "QCOM  ", "QDF2432 ", 1, 6, MCFG_BUS_ANY, &pci_32b_ops },
>>       { "QCOM  ", "QDF2432 ", 1, 7, MCFG_BUS_ANY, &pci_32b_ops },
>> +
>> +#ifdef CONFIG_PCI_XGENE
>
> As you've no doubt noticed, I'm proposing to add these quirks without
> CONFIG_PCI_XGENE, so we don't have to select each device when building
> a generic ACPI kernel.
>
> I'm also proposing some Kconfig and Makefile changes so we don't build
> the platform driver part in a generic ACPI kernel (unless we *also*
> explicitly select the platform driver).
>
> Here's an example:
> https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/commit/?h=pci/ecam&id=f80edf4d6c05

I made similar changes in v4 patch. The ECAM quirk will be built when
ACPI and PCI_QUIRKS are enabled.

When building for DT only, the ECAM quirk won't be compiled.
>
>> +#define XGENE_V1_ECAM_MCFG(rev, seg) \
>> +     {"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
>> +             &xgene_v1_pcie_ecam_ops }
>> +#define XGENE_V2_1_ECAM_MCFG(rev, seg) \
>> +     {"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
>> +             &xgene_v2_1_pcie_ecam_ops }
>> +#define XGENE_V2_2_ECAM_MCFG(rev, seg) \
>> +     {"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
>> +             &xgene_v2_2_pcie_ecam_ops }
>> +
>> +     /* X-Gene SoC with v1 PCIe controller */
>> +     XGENE_V1_ECAM_MCFG(1, 0),
>> +     XGENE_V1_ECAM_MCFG(1, 1),
>
>> @@ -64,6 +66,7 @@
>>  /* PCIe IP version */
>>  #define XGENE_PCIE_IP_VER_UNKN               0
>>  #define XGENE_PCIE_IP_VER_1          1
>> +#define XGENE_PCIE_IP_VER_2          2
>
> This isn't used anywhere, which makes me wonder whether it's worth
> keeping it.

V2 controller will use this XGENE_PCIE_IP_VER_2 (port->version =
XGENE_PCIE_IP_VER_2). This will be used to indicate that the
controller is V2, and to enable configuration request retry status
feature (by not disable it like V1 controller).

>
>>  static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
>>  {
>> -     struct xgene_pcie_port *port = bus->sysdata;
>> +     struct pci_config_window *cfg;
>> +     struct xgene_pcie_port *port;
>> +
>> +     if (acpi_disabled)
>> +             port = bus->sysdata;
>> +     else {
>> +             cfg = bus->sysdata;
>> +             port = cfg->priv;
>> +     }
>
> I would really, really like to figure out a way to get rid of these
> "if (acpi_disabled)" checks sprinkled through here.  Is there any way
> we can set up bus->sysdata to be the same, regardless of whether we're
> using this as a platform driver or an ACPI quirk?

Right now, I created a inline function to extract xgene_pcie_port from
pci_bus. In order to get rid of acpi_disabled, I will need to make
sysdata in DT case also point to pci_config_window structure, which
means I will need to convert and test the DT driver to use ecam ops.
It is a separate patch itself. So I think I should do it at later time
(after this ECAM quirk patch). I hope you are ok with this.

>
>> +#ifdef CONFIG_ACPI
>
> You've probably noticed that I've been using
>
>   #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
>
> in this situation, mostly to make it clear that this is part of a
> workaround.  I don't want people to blindly copy this stuff without
> realizing that it's a workaround for a hardware issue.

Yes, I used defined(CONFIG_PCI_QUIRKS) in v4 patch as well.
>
>> +static struct resource xgene_v1_csr_res[] = {
>> +     [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
>> +     [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
>> +     [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
>> +     [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
>> +     [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
>
> I assume these ranges are not the actual ECAM space, right?
> If they *were* ECAM, I assume you would have included them in the
> quirk itself in the mcfg_quirks[] table.

Yes, as Mark also pointed out. These are MMIO space for controller registers.

>
>> +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
>> +{
>> +     struct acpi_device *adev = to_acpi_device(cfg->parent);
>> +     struct acpi_pci_root *root = acpi_driver_data(adev);
>> +     struct device *dev = cfg->parent;
>> +     struct xgene_pcie_port *port;
>> +     struct resource *csr;
>> +
>> +     port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
>> +     if (!port)
>> +             return -ENOMEM;
>> +
>> +     csr = &xgene_v1_csr_res[root->segment];
>
> This makes me nervous because root->segment comes from the ACPI _SEG,
> and if firmware gives us junk in _SEG, we will reference something in
> the weeds.

I use Mark's approach in v4 patch (discover the MMIO space using
acpi_dev_get_resources. Both approach have pros and cons. I can also
fallback to hard-coded resource array if you want to, but as you
mentioned right above, we will need to rely on firmware for correct
_SEG information.

I need to define the function (xgene_get_csr_resource()) inside
pci-xgene.c to duplicate the code of acpi_get_rc_addr. The reason is
X-Gene firmware does not have a dedicate PNP0C02 node to declare the
resource, and if I use acpi_get_rc_resources() with "PNP0A08", I got
error due to acpi_bus_get_device() returns error.

>
>> +     port->csr_base = devm_ioremap_resource(dev, csr);
>> +     if (IS_ERR(port->csr_base)) {
>> +             kfree(port);
>> +             return -ENOMEM;
>> +     }
>> +
>> +     port->cfg_base = cfg->win;
>> +     port->version = XGENE_PCIE_IP_VER_1;
>> +
>> +     cfg->priv = port;
>
> All these init functions are almost identical.  Can we factor this out
> by having wrappers that do nothing more than pass in the table and
> version, and put the kzalloc and ioremap in a shared back-end?

I refactor-ed these .init functions. And as a result, there are only 2
ecam ops left: xgene_v1_pcie_ecam_ops and xgene_v2_pcie_ecam_ops.

>
> We're so close I can taste it!  I can't wait to see all this work come
> to fruition.
>
> Bjorn
Regards,
Duc Dang.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Masters Dec. 2, 2016, 4:08 a.m. UTC | #13
Hi Bjorn, Duc, Mark,

I switched my brain to the on mode and went and read some specs, and a few
tables, so here's my 2 cents on this...

On 12/01/2016 06:22 PM, Duc Dang wrote:
> On Thu, Dec 1, 2016 at 3:07 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>> On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:

>>>>> The SoC provide some number of RC bridges, each with a different base
>>>>> for some mmio registers. Even if segment is legitimate in MCFG, there
>>>>> is still a problem if a platform doesn't use the segment ordering
>>>>> implied by the code. But the PNP0A03 _CRS does have this base address
>>>>> as the first memory resource, so we could get it from there and not
>>>>> have hard-coded addresses and implied ording in the quirk code.
>>>>
>>>> I'm confused.  Doesn't the current code treat every item in PNP0A03
>>>> _CRS as a window?  Do you mean the first resource is handled
>>>> differently somehow?  The Consumer/Producer bit could allow us to do
>>>> this by marking the RC MMIO space as "Consumer", but I didn't think
>>>> that strategy was quite working yet.

Let's see if I summarized this correctly...

1. The MMIO registers for the host bridge itself need to be described
   somewhere, especially if we need to find those in a quirk and poke
   them. Since those registers are very much part of the bridge device,
   it makes sense for them to be in the _CRS for PNP0A08/PNP0A03.

2. The address space covering these registers MUST be described as a
   ResourceConsumer in order to avoid accidentally exposing them as
   available for use by downstream devices on the PCI bus.

3. The ACPI specification allows for resources of the type "Memory32Fixed".
   This is a macro that doesn't have the notion of a producer or consumer.
   HOWEVER various interpretations seem to be that this could/should
   default to being interpreted as a consumed region.

4. At one point, a regression was added to the kernel:

   63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by
   host bridge itself")

   Which lead to a series on conversations about what should happen
   for bridge resources (e.g. https://lkml.org/lkml/2015/3/24/962 )

5. This resulted in the following commit reverting point 4:

   2c62e8492ed7 ("x86/PCI/ACPI: Make all resources except [io 0xcf8-0xcff]
   available on PCI bus")

   Which also stated that:

   "This solution will also ease the way to consolidate ACPI PCI host
    bridge common code from x86, ia64 and ARM64"

End of summary.

So it seems that generally there is an aversion to having bridge resources
be described in this manner and you would like to require that they be
described e.g. using QWordMemory with a ResourceConsumer type?

BUT if we were to do that, it would break existing shipping systems since
there are quirks out there that use this form to find the base CSR:

       if (acpi_res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) {
                fixed32 = &acpi_res->data.fixed_memory32;
                port->csr_base = ioremap(fixed32->address,
                                         fixed32->address_length);
                return AE_CTRL_TERMINATE;
        }

That's what's shipping in at least RHEL(SA) today, and probably in other
distros. So if we get vendors to take that out, existing stuff will break,
which will have the downside that customers will have to choose between
whether to run a given distro or be able to use upstream kernels. In
that sense, to me, there are shipping platforms out there, which may well
be doing the "wrong" thing, but they are deployed and they are doing it.

Which makes me wonder a couple of things (I think should NOT be done):

1. What would happen if we had both. A FixedMemory32 and the same region
   described again using the longer form as a consumed region. I doubt
   that's legal, and the current code would still add the region if it
   saw the FixedMemory32 first when walking the tree. I don't like it,
   but I'm mentioning it in case that leads to some helpful thinking.

2. What would happen if we had a difference policy on arm64 for such
   resources. x86 has an "exception" for accessing the config space
   using IO port 0xCF8-0xCFF (a fairly reasonable exception!) and
   we can make the rules for a new platform (i.e. actually prescribe
   exactly what the behavior is, rather than have it not be defined).
   This is of course terrible in that existing BIOS vendors and so on
   won't necessarily know this when working on ARM ACPI later on.

I don't like either of these obviously. I'm hoping there's some way we
can say that this is tolerated in this one quirk (allow the use of
FixedMemory32 in this case) on the grounds that the driver claims
this bridge region and can be annotated to explain such.

Once you let us know what you prefer, we will go and update the ARM
SBBR to spell out that future platforms should not make this mistake
again. We can prescribe whatever you'd like in terms of how bridge
resources consumed by the bridge are exposed. I have spoken about
this kind of situation within MS in the past, but they didn't have
specific guidance since they don't really tolerate such quirks. I
can, however, consult them before we change the SBBR as well.

>>> The first resource is defined like below. It was introduced long time
>>> ago to use with older version of X-Gene ECAM quirks.
>>> Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )

Indeed. And in the case of m400, it is currently this in shipping systems:

               Memory32Fixed (ReadWrite,
                    0x1F500000,         // Address Base
                    0x00010000,         // Address Length
                    )

The spec isn't clear on whether these are produced or consumed but the
implication is that these are consumed resources in most cases. Not that
this changes any of the above, but one can understand why it happened.

>>> [    0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
>>
>> I think this is wrong.  The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
>> is available for use by devices on bus 0000:00, but I think you're
>> saying it is consumed by the bridge itself, not forwarded down to PCI.

Indeed.

>> What's in your /proc/iomem?  I see that your quirks do call
>> devm_ioremap_resource(), which calls devm_request_mem_region()
>> internally, so the driver does at least request that region, which
>> should keep us from assigning it to PCI devices.

I'm hoping you can grant an exception on the grounds that the quirk will
keep the region from actually being used. And then somehow we document
this in the driver.

>> But it still isn't quite right to tell the PCI core that the region is
>> available on the root bus.
> 
> This is /proc/iomem output on my Mustang board. The 64K "PCIe CSR"
> region is consumed completely.
> 1f2b0000-1f2bffff : PCI Bus 0000:00
>   1f2b0000-1f2bffff : PCIe CSR
> 
> e040000000-e07fffffff : PCI Bus 0000:00
>   e040000000-e0401fffff : PCI Bus 0000:01
>     e040000000-e0400fffff : 0000:01:00.0
>       e040000000-e0400fffff : mlx4_core
>     e040100000-e0401fffff : 0000:01:00.0
> e0d0000000-e0dfffffff : PCI ECAM
> f000000000-ffffffffff : PCI Bus 0000:00
>   f000000000-f001ffffff : PCI Bus 0000:01
>     f000000000-f001ffffff : 0000:01:00.0
>       f000000000-f001ffffff : mlx4_core
> 
> Using hard-coded resources for mmio space make the quirk rely on the
> segment number passing from the firmware. Using Mark's method or
> acpi_get_rc_resource can discover the mmio space and consume all of
> the space, but as you mentioned, it leaves the defect that PCI core
> considers the mmio space as available resource for secondary devices
> although it will never allocate the mmio space to secondary devices as
> the RC already reserves and consumes all of the space.

Indeed. It's not clean, but perhaps we can get away with it on the
grounds that there are existing systems out there and this won't
be allowed to happen again in the future :)

Jon.
Jon Masters Dec. 2, 2016, 6:31 a.m. UTC | #14
Bjorn,

Although I think the below still applies (that we need to leave that
Memory32Fixed for existing deployments, and this is going to result
in /proc/iomem polution), I've done some more reading of your ecam
tree and the implementation of acpi_get_rc_resources you mentioned,
and in particular how the PNP0C02 devices actually get wired up.

I would like to be able to boot upstream on existing shipping and
deployed machines that are in the field (not to mention our labs), but
there's no reason we can't *also* get APM to add a new vendor specific
PNP0C02 to the ACPI namespace in future firmware updates (for at least
their own Mustang reference boards) matching segment to CSR, as in the
case of the HiSi patches. That might then allow for some later
preference to use that for the CSR rather than getting it from the RC
device. Still, it would be ideal to boot on machines that are shipping
from HPE and others at this moment, so I am still hopeful you'll
at least allow the approach from Duc's v4 for now (4.10).

Another nasty option for later consideration could then be having
the kernel fake up any missing PNP0C02 on existing machines, but
it would need special knowledge of the platform to generate that
so as to handle the problem Mark flagged earlier (segment vs
controller mismatch on some platforms). That could be done with a
DMI quirk that matched on a specific (e.g. HPE) machine. It would
only be needed on "broken" existing machines, and could be added
post-4.10 to clean this up if you really want to do that.

That's all very nasty...

Jon.

On 12/01/2016 11:08 PM, Jon Masters wrote:
> Hi Bjorn, Duc, Mark,
> 
> I switched my brain to the on mode and went and read some specs, and a few
> tables, so here's my 2 cents on this...
> 
> On 12/01/2016 06:22 PM, Duc Dang wrote:
>> On Thu, Dec 1, 2016 at 3:07 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>> On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:
> 
>>>>>> The SoC provide some number of RC bridges, each with a different base
>>>>>> for some mmio registers. Even if segment is legitimate in MCFG, there
>>>>>> is still a problem if a platform doesn't use the segment ordering
>>>>>> implied by the code. But the PNP0A03 _CRS does have this base address
>>>>>> as the first memory resource, so we could get it from there and not
>>>>>> have hard-coded addresses and implied ording in the quirk code.
>>>>>
>>>>> I'm confused.  Doesn't the current code treat every item in PNP0A03
>>>>> _CRS as a window?  Do you mean the first resource is handled
>>>>> differently somehow?  The Consumer/Producer bit could allow us to do
>>>>> this by marking the RC MMIO space as "Consumer", but I didn't think
>>>>> that strategy was quite working yet.
> 
> Let's see if I summarized this correctly...
> 
> 1. The MMIO registers for the host bridge itself need to be described
>    somewhere, especially if we need to find those in a quirk and poke
>    them. Since those registers are very much part of the bridge device,
>    it makes sense for them to be in the _CRS for PNP0A08/PNP0A03.
> 
> 2. The address space covering these registers MUST be described as a
>    ResourceConsumer in order to avoid accidentally exposing them as
>    available for use by downstream devices on the PCI bus.
> 
> 3. The ACPI specification allows for resources of the type "Memory32Fixed".
>    This is a macro that doesn't have the notion of a producer or consumer.
>    HOWEVER various interpretations seem to be that this could/should
>    default to being interpreted as a consumed region.
> 
> 4. At one point, a regression was added to the kernel:
> 
>    63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by
>    host bridge itself")
> 
>    Which lead to a series on conversations about what should happen
>    for bridge resources (e.g. https://lkml.org/lkml/2015/3/24/962 )
> 
> 5. This resulted in the following commit reverting point 4:
> 
>    2c62e8492ed7 ("x86/PCI/ACPI: Make all resources except [io 0xcf8-0xcff]
>    available on PCI bus")
> 
>    Which also stated that:
> 
>    "This solution will also ease the way to consolidate ACPI PCI host
>     bridge common code from x86, ia64 and ARM64"
> 
> End of summary.
> 
> So it seems that generally there is an aversion to having bridge resources
> be described in this manner and you would like to require that they be
> described e.g. using QWordMemory with a ResourceConsumer type?
> 
> BUT if we were to do that, it would break existing shipping systems since
> there are quirks out there that use this form to find the base CSR:
> 
>        if (acpi_res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) {
>                 fixed32 = &acpi_res->data.fixed_memory32;
>                 port->csr_base = ioremap(fixed32->address,
>                                          fixed32->address_length);
>                 return AE_CTRL_TERMINATE;
>         }
> 
> That's what's shipping in at least RHEL(SA) today, and probably in other
> distros. So if we get vendors to take that out, existing stuff will break,
> which will have the downside that customers will have to choose between
> whether to run a given distro or be able to use upstream kernels. In
> that sense, to me, there are shipping platforms out there, which may well
> be doing the "wrong" thing, but they are deployed and they are doing it.
> 
> Which makes me wonder a couple of things (I think should NOT be done):
> 
> 1. What would happen if we had both. A FixedMemory32 and the same region
>    described again using the longer form as a consumed region. I doubt
>    that's legal, and the current code would still add the region if it
>    saw the FixedMemory32 first when walking the tree. I don't like it,
>    but I'm mentioning it in case that leads to some helpful thinking.
> 
> 2. What would happen if we had a difference policy on arm64 for such
>    resources. x86 has an "exception" for accessing the config space
>    using IO port 0xCF8-0xCFF (a fairly reasonable exception!) and
>    we can make the rules for a new platform (i.e. actually prescribe
>    exactly what the behavior is, rather than have it not be defined).
>    This is of course terrible in that existing BIOS vendors and so on
>    won't necessarily know this when working on ARM ACPI later on.
> 
> I don't like either of these obviously. I'm hoping there's some way we
> can say that this is tolerated in this one quirk (allow the use of
> FixedMemory32 in this case) on the grounds that the driver claims
> this bridge region and can be annotated to explain such.
> 
> Once you let us know what you prefer, we will go and update the ARM
> SBBR to spell out that future platforms should not make this mistake
> again. We can prescribe whatever you'd like in terms of how bridge
> resources consumed by the bridge are exposed. I have spoken about
> this kind of situation within MS in the past, but they didn't have
> specific guidance since they don't really tolerate such quirks. I
> can, however, consult them before we change the SBBR as well.
> 
>>>> The first resource is defined like below. It was introduced long time
>>>> ago to use with older version of X-Gene ECAM quirks.
>>>> Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )
> 
> Indeed. And in the case of m400, it is currently this in shipping systems:
> 
>                Memory32Fixed (ReadWrite,
>                     0x1F500000,         // Address Base
>                     0x00010000,         // Address Length
>                     )
> 
> The spec isn't clear on whether these are produced or consumed but the
> implication is that these are consumed resources in most cases. Not that
> this changes any of the above, but one can understand why it happened.
> 
>>>> [    0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
>>>
>>> I think this is wrong.  The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
>>> is available for use by devices on bus 0000:00, but I think you're
>>> saying it is consumed by the bridge itself, not forwarded down to PCI.
> 
> Indeed.
> 
>>> What's in your /proc/iomem?  I see that your quirks do call
>>> devm_ioremap_resource(), which calls devm_request_mem_region()
>>> internally, so the driver does at least request that region, which
>>> should keep us from assigning it to PCI devices.
> 
> I'm hoping you can grant an exception on the grounds that the quirk will
> keep the region from actually being used. And then somehow we document
> this in the driver.
> 
>>> But it still isn't quite right to tell the PCI core that the region is
>>> available on the root bus.
>>
>> This is /proc/iomem output on my Mustang board. The 64K "PCIe CSR"
>> region is consumed completely.
>> 1f2b0000-1f2bffff : PCI Bus 0000:00
>>   1f2b0000-1f2bffff : PCIe CSR
>>
>> e040000000-e07fffffff : PCI Bus 0000:00
>>   e040000000-e0401fffff : PCI Bus 0000:01
>>     e040000000-e0400fffff : 0000:01:00.0
>>       e040000000-e0400fffff : mlx4_core
>>     e040100000-e0401fffff : 0000:01:00.0
>> e0d0000000-e0dfffffff : PCI ECAM
>> f000000000-ffffffffff : PCI Bus 0000:00
>>   f000000000-f001ffffff : PCI Bus 0000:01
>>     f000000000-f001ffffff : 0000:01:00.0
>>       f000000000-f001ffffff : mlx4_core
>>
>> Using hard-coded resources for mmio space make the quirk rely on the
>> segment number passing from the firmware. Using Mark's method or
>> acpi_get_rc_resource can discover the mmio space and consume all of
>> the space, but as you mentioned, it leaves the defect that PCI core
>> considers the mmio space as available resource for secondary devices
>> although it will never allocate the mmio space to secondary devices as
>> the RC already reserves and consumes all of the space.
> 
> Indeed. It's not clean, but perhaps we can get away with it on the
> grounds that there are existing systems out there and this won't
> be allowed to happen again in the future :)
> 
> Jon.
>
Duc Dang Dec. 2, 2016, 7:34 a.m. UTC | #15
On Thu, Dec 1, 2016 at 10:31 PM, Jon Masters <jcm@redhat.com> wrote:
> Bjorn,
>
> Although I think the below still applies (that we need to leave that
> Memory32Fixed for existing deployments, and this is going to result
> in /proc/iomem polution), I've done some more reading of your ecam
> tree and the implementation of acpi_get_rc_resources you mentioned,
> and in particular how the PNP0C02 devices actually get wired up.
>
> I would like to be able to boot upstream on existing shipping and
> deployed machines that are in the field (not to mention our labs), but
> there's no reason we can't *also* get APM to add a new vendor specific
> PNP0C02 to the ACPI namespace in future firmware updates (for at least
> their own Mustang reference boards) matching segment to CSR, as in the
> case of the HiSi patches. That might then allow for some later
> preference to use that for the CSR rather than getting it from the RC
> device. Still, it would be ideal to boot on machines that are shipping
> from HPE and others at this moment, so I am still hopeful you'll
> at least allow the approach from Duc's v4 for now (4.10).

APM X-Gene 1 and X-Gene 2 ACPI tables will absolutely have PNP0C02
nodes (in upcoming firmware release). I hope to have a solution that
works for both old buggy firmware and the future improved firmware. So
I am thinking the CSR discovery will be like this:

(1) Use  acpi_get_rc_resources() to discover CSR resource by checking
PNP0C02 nodes
(2) (1) should succeed with the new firmware
(3) If (1) fails, we can fall back to approach on v4 patch: calling
xgene_get_csr_resource() to discover the CSR described by
Memory32Fixed macro.

How do you feel about this? The drawback is the new firmware that does
not have the CSR space described with Memory32Fixed macro will fail on
the distro version that uses the old quirk (that relies on this
Memory32Fixed macro).

>
> Another nasty option for later consideration could then be having
> the kernel fake up any missing PNP0C02 on existing machines, but
> it would need special knowledge of the platform to generate that
> so as to handle the problem Mark flagged earlier (segment vs
> controller mismatch on some platforms). That could be done with a
> DMI quirk that matched on a specific (e.g. HPE) machine. It would
> only be needed on "broken" existing machines, and could be added
> post-4.10 to clean this up if you really want to do that.

Bjorn suggested similar approach (have a PNP quirk to fabricate a
PNP0C02 device and decleare all the required resources there) on
another thread. But as you said, this approach does not scale, it can
only applicable for a specific machine (by checking DMI information to
apply the PNP quirk).

>
> That's all very nasty...
>
> Jon.
>
> On 12/01/2016 11:08 PM, Jon Masters wrote:
>> Hi Bjorn, Duc, Mark,
>>
>> I switched my brain to the on mode and went and read some specs, and a few
>> tables, so here's my 2 cents on this...
>>
>> On 12/01/2016 06:22 PM, Duc Dang wrote:
>>> On Thu, Dec 1, 2016 at 3:07 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>>> On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:
>>
>>>>>>> The SoC provide some number of RC bridges, each with a different base
>>>>>>> for some mmio registers. Even if segment is legitimate in MCFG, there
>>>>>>> is still a problem if a platform doesn't use the segment ordering
>>>>>>> implied by the code. But the PNP0A03 _CRS does have this base address
>>>>>>> as the first memory resource, so we could get it from there and not
>>>>>>> have hard-coded addresses and implied ording in the quirk code.
>>>>>>
>>>>>> I'm confused.  Doesn't the current code treat every item in PNP0A03
>>>>>> _CRS as a window?  Do you mean the first resource is handled
>>>>>> differently somehow?  The Consumer/Producer bit could allow us to do
>>>>>> this by marking the RC MMIO space as "Consumer", but I didn't think
>>>>>> that strategy was quite working yet.
>>
>> Let's see if I summarized this correctly...
>>
>> 1. The MMIO registers for the host bridge itself need to be described
>>    somewhere, especially if we need to find those in a quirk and poke
>>    them. Since those registers are very much part of the bridge device,
>>    it makes sense for them to be in the _CRS for PNP0A08/PNP0A03.
>>
>> 2. The address space covering these registers MUST be described as a
>>    ResourceConsumer in order to avoid accidentally exposing them as
>>    available for use by downstream devices on the PCI bus.
>>
>> 3. The ACPI specification allows for resources of the type "Memory32Fixed".
>>    This is a macro that doesn't have the notion of a producer or consumer.
>>    HOWEVER various interpretations seem to be that this could/should
>>    default to being interpreted as a consumed region.
>>
>> 4. At one point, a regression was added to the kernel:
>>
>>    63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by
>>    host bridge itself")
>>
>>    Which lead to a series on conversations about what should happen
>>    for bridge resources (e.g. https://lkml.org/lkml/2015/3/24/962 )
>>
>> 5. This resulted in the following commit reverting point 4:
>>
>>    2c62e8492ed7 ("x86/PCI/ACPI: Make all resources except [io 0xcf8-0xcff]
>>    available on PCI bus")
>>
>>    Which also stated that:
>>
>>    "This solution will also ease the way to consolidate ACPI PCI host
>>     bridge common code from x86, ia64 and ARM64"
>>
>> End of summary.
>>
>> So it seems that generally there is an aversion to having bridge resources
>> be described in this manner and you would like to require that they be
>> described e.g. using QWordMemory with a ResourceConsumer type?
>>
>> BUT if we were to do that, it would break existing shipping systems since
>> there are quirks out there that use this form to find the base CSR:
>>
>>        if (acpi_res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) {
>>                 fixed32 = &acpi_res->data.fixed_memory32;
>>                 port->csr_base = ioremap(fixed32->address,
>>                                          fixed32->address_length);
>>                 return AE_CTRL_TERMINATE;
>>         }
>>
>> That's what's shipping in at least RHEL(SA) today, and probably in other
>> distros. So if we get vendors to take that out, existing stuff will break,
>> which will have the downside that customers will have to choose between
>> whether to run a given distro or be able to use upstream kernels. In
>> that sense, to me, there are shipping platforms out there, which may well
>> be doing the "wrong" thing, but they are deployed and they are doing it.
>>
>> Which makes me wonder a couple of things (I think should NOT be done):
>>
>> 1. What would happen if we had both. A FixedMemory32 and the same region
>>    described again using the longer form as a consumed region. I doubt
>>    that's legal, and the current code would still add the region if it
>>    saw the FixedMemory32 first when walking the tree. I don't like it,
>>    but I'm mentioning it in case that leads to some helpful thinking.
>>
>> 2. What would happen if we had a difference policy on arm64 for such
>>    resources. x86 has an "exception" for accessing the config space
>>    using IO port 0xCF8-0xCFF (a fairly reasonable exception!) and
>>    we can make the rules for a new platform (i.e. actually prescribe
>>    exactly what the behavior is, rather than have it not be defined).
>>    This is of course terrible in that existing BIOS vendors and so on
>>    won't necessarily know this when working on ARM ACPI later on.
>>
>> I don't like either of these obviously. I'm hoping there's some way we
>> can say that this is tolerated in this one quirk (allow the use of
>> FixedMemory32 in this case) on the grounds that the driver claims
>> this bridge region and can be annotated to explain such.
>>
>> Once you let us know what you prefer, we will go and update the ARM
>> SBBR to spell out that future platforms should not make this mistake
>> again. We can prescribe whatever you'd like in terms of how bridge
>> resources consumed by the bridge are exposed. I have spoken about
>> this kind of situation within MS in the past, but they didn't have
>> specific guidance since they don't really tolerate such quirks. I
>> can, however, consult them before we change the SBBR as well.
>>
>>>>> The first resource is defined like below. It was introduced long time
>>>>> ago to use with older version of X-Gene ECAM quirks.
>>>>> Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )
>>
>> Indeed. And in the case of m400, it is currently this in shipping systems:
>>
>>                Memory32Fixed (ReadWrite,
>>                     0x1F500000,         // Address Base
>>                     0x00010000,         // Address Length
>>                     )
>>
>> The spec isn't clear on whether these are produced or consumed but the
>> implication is that these are consumed resources in most cases. Not that
>> this changes any of the above, but one can understand why it happened.
>>
>>>>> [    0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
>>>>
>>>> I think this is wrong.  The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
>>>> is available for use by devices on bus 0000:00, but I think you're
>>>> saying it is consumed by the bridge itself, not forwarded down to PCI.
>>
>> Indeed.
>>
>>>> What's in your /proc/iomem?  I see that your quirks do call
>>>> devm_ioremap_resource(), which calls devm_request_mem_region()
>>>> internally, so the driver does at least request that region, which
>>>> should keep us from assigning it to PCI devices.
>>
>> I'm hoping you can grant an exception on the grounds that the quirk will
>> keep the region from actually being used. And then somehow we document
>> this in the driver.
>>
>>>> But it still isn't quite right to tell the PCI core that the region is
>>>> available on the root bus.
>>>
>>> This is /proc/iomem output on my Mustang board. The 64K "PCIe CSR"
>>> region is consumed completely.
>>> 1f2b0000-1f2bffff : PCI Bus 0000:00
>>>   1f2b0000-1f2bffff : PCIe CSR
>>>
>>> e040000000-e07fffffff : PCI Bus 0000:00
>>>   e040000000-e0401fffff : PCI Bus 0000:01
>>>     e040000000-e0400fffff : 0000:01:00.0
>>>       e040000000-e0400fffff : mlx4_core
>>>     e040100000-e0401fffff : 0000:01:00.0
>>> e0d0000000-e0dfffffff : PCI ECAM
>>> f000000000-ffffffffff : PCI Bus 0000:00
>>>   f000000000-f001ffffff : PCI Bus 0000:01
>>>     f000000000-f001ffffff : 0000:01:00.0
>>>       f000000000-f001ffffff : mlx4_core
>>>
>>> Using hard-coded resources for mmio space make the quirk rely on the
>>> segment number passing from the firmware. Using Mark's method or
>>> acpi_get_rc_resource can discover the mmio space and consume all of
>>> the space, but as you mentioned, it leaves the defect that PCI core
>>> considers the mmio space as available resource for secondary devices
>>> although it will never allocate the mmio space to secondary devices as
>>> the RC already reserves and consumes all of the space.
>>
>> Indeed. It's not clean, but perhaps we can get away with it on the
>> grounds that there are existing systems out there and this won't
>> be allowed to happen again in the future :)
>>
>> Jon.
>>
>
>
> --
> Computer Architect | Sent from my Fedora powered laptop
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Masters Dec. 2, 2016, 8:08 a.m. UTC | #16
Quick reply - sorry for top posting (it's 3am...) - I would favor keeping the existing Fixed32Memory _CRS but switching over to prefer the PNP entry as a good citizen. The trouble is that it would be unfortunate if existing distros stopped working on newer firmware and it would lead to IMO more pain than it is worth. Hopefully for this reason Bjorn will take your v4 as-is for now and let us all figure out the cleanest long term cleanup later.
Bjorn Helgaas Dec. 5, 2016, 9:53 p.m. UTC | #17
On Thu, Dec 01, 2016 at 06:52:23PM -0800, Duc Dang wrote:
> On Thu, Dec 1, 2016 at 10:33 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:

> I made similar changes in v4 patch. The ECAM quirk will be built when
> ACPI and PCI_QUIRKS are enabled.
> 
> When building for DT only, the ECAM quirk won't be compiled.

Perfect.

> >>  #define XGENE_PCIE_IP_VER_UNKN               0
> >>  #define XGENE_PCIE_IP_VER_1          1
> >> +#define XGENE_PCIE_IP_VER_2          2
> >
> > This isn't used anywhere, which makes me wonder whether it's worth
> > keeping it.
> 
> V2 controller will use this XGENE_PCIE_IP_VER_2 (port->version =
> XGENE_PCIE_IP_VER_2). This will be used to indicate that the
> controller is V2, and to enable configuration request retry status
> feature (by not disable it like V1 controller).

OK, I see.  You don't actually need XGENE_PCIE_IP_VER_2, you just need
port->version to be something other than XGENE_PCIE_IP_VER_1.  So this
is fine as it is.

> >>  static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
> >>  {
> >> -     struct xgene_pcie_port *port = bus->sysdata;
> >> +     struct pci_config_window *cfg;
> >> +     struct xgene_pcie_port *port;
> >> +
> >> +     if (acpi_disabled)
> >> +             port = bus->sysdata;
> >> +     else {
> >> +             cfg = bus->sysdata;
> >> +             port = cfg->priv;
> >> +     }
> >
> > I would really, really like to figure out a way to get rid of these
> > "if (acpi_disabled)" checks sprinkled through here.  Is there any way
> > we can set up bus->sysdata to be the same, regardless of whether we're
> > using this as a platform driver or an ACPI quirk?
> 
> Right now, I created a inline function to extract xgene_pcie_port from
> pci_bus. In order to get rid of acpi_disabled, I will need to make
> sysdata in DT case also point to pci_config_window structure, which
> means I will need to convert and test the DT driver to use ecam ops.
> It is a separate patch itself. So I think I should do it at later time
> (after this ECAM quirk patch). I hope you are ok with this.

OK.  I did the simple-minded version of leaving the DT ops the same
but making sysdata point to a dummy pci_config_window.  Your proposal
of using ECAM for DT would be much better.

It's interesting that you actually already use the same accessors
except that DT uses the 32-bit pci_generic_config_write32() and ACPI
uses the regular pci_generic_config_write(). I guess that means the
hardware actually *does* support sub-32 bit writes?

> I need to define the function (xgene_get_csr_resource()) inside
> pci-xgene.c to duplicate the code of acpi_get_rc_addr. The reason is
> X-Gene firmware does not have a dedicate PNP0C02 node to declare the
> resource, and if I use acpi_get_rc_resources() with "PNP0A08", I got
> error due to acpi_bus_get_device() returns error.

Looks good.

> > All these init functions are almost identical.  Can we factor this out
> > by having wrappers that do nothing more than pass in the table and
> > version, and put the kzalloc and ioremap in a shared back-end?
> 
> I refactor-ed these .init functions. And as a result, there are only 2
> ecam ops left: xgene_v1_pcie_ecam_ops and xgene_v2_pcie_ecam_ops.

Looks good.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duc Dang Dec. 5, 2016, 10:09 p.m. UTC | #18
On Mon, Dec 5, 2016 at 1:53 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> On Thu, Dec 01, 2016 at 06:52:23PM -0800, Duc Dang wrote:
>> On Thu, Dec 1, 2016 at 10:33 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>
>> I made similar changes in v4 patch. The ECAM quirk will be built when
>> ACPI and PCI_QUIRKS are enabled.
>>
>> When building for DT only, the ECAM quirk won't be compiled.
>
> Perfect.
>
>> >>  #define XGENE_PCIE_IP_VER_UNKN               0
>> >>  #define XGENE_PCIE_IP_VER_1          1
>> >> +#define XGENE_PCIE_IP_VER_2          2
>> >
>> > This isn't used anywhere, which makes me wonder whether it's worth
>> > keeping it.
>>
>> V2 controller will use this XGENE_PCIE_IP_VER_2 (port->version =
>> XGENE_PCIE_IP_VER_2). This will be used to indicate that the
>> controller is V2, and to enable configuration request retry status
>> feature (by not disable it like V1 controller).
>
> OK, I see.  You don't actually need XGENE_PCIE_IP_VER_2, you just need
> port->version to be something other than XGENE_PCIE_IP_VER_1.  So this
> is fine as it is.
>
>> >>  static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
>> >>  {
>> >> -     struct xgene_pcie_port *port = bus->sysdata;
>> >> +     struct pci_config_window *cfg;
>> >> +     struct xgene_pcie_port *port;
>> >> +
>> >> +     if (acpi_disabled)
>> >> +             port = bus->sysdata;
>> >> +     else {
>> >> +             cfg = bus->sysdata;
>> >> +             port = cfg->priv;
>> >> +     }
>> >
>> > I would really, really like to figure out a way to get rid of these
>> > "if (acpi_disabled)" checks sprinkled through here.  Is there any way
>> > we can set up bus->sysdata to be the same, regardless of whether we're
>> > using this as a platform driver or an ACPI quirk?
>>
>> Right now, I created a inline function to extract xgene_pcie_port from
>> pci_bus. In order to get rid of acpi_disabled, I will need to make
>> sysdata in DT case also point to pci_config_window structure, which
>> means I will need to convert and test the DT driver to use ecam ops.
>> It is a separate patch itself. So I think I should do it at later time
>> (after this ECAM quirk patch). I hope you are ok with this.
>
> OK.  I did the simple-minded version of leaving the DT ops the same
> but making sysdata point to a dummy pci_config_window.  Your proposal
> of using ECAM for DT would be much better.
>
> It's interesting that you actually already use the same accessors
> except that DT uses the 32-bit pci_generic_config_write32() and ACPI
> uses the regular pci_generic_config_write(). I guess that means the
> hardware actually *does* support sub-32 bit writes?

Yes, the hardware does support sub-32 bit writes (and reads). This is
another item in my TODO list for DT (which does not seem quite urgent
now): switch to use pci_generic_config_write for DT. But, well, I will
need to do that for read as well (for both ACPI and DT).

>
>> I need to define the function (xgene_get_csr_resource()) inside
>> pci-xgene.c to duplicate the code of acpi_get_rc_addr. The reason is
>> X-Gene firmware does not have a dedicate PNP0C02 node to declare the
>> resource, and if I use acpi_get_rc_resources() with "PNP0A08", I got
>> error due to acpi_bus_get_device() returns error.
>
> Looks good.
>
>> > All these init functions are almost identical.  Can we factor this out
>> > by having wrappers that do nothing more than pass in the table and
>> > version, and put the kzalloc and ioremap in a shared back-end?
>>
>> I refactor-ed these .init functions. And as a result, there are only 2
>> ecam ops left: xgene_v1_pcie_ecam_ops and xgene_v2_pcie_ecam_ops.
>
> Looks good.
>
> Bjorn
Regards,
Duc Dang.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index ac21db3..eb6125b 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -57,6 +57,37 @@  struct mcfg_fixup {
 	{ "QCOM  ", "QDF2432 ", 1, 5, MCFG_BUS_ANY, &pci_32b_ops },
 	{ "QCOM  ", "QDF2432 ", 1, 6, MCFG_BUS_ANY, &pci_32b_ops },
 	{ "QCOM  ", "QDF2432 ", 1, 7, MCFG_BUS_ANY, &pci_32b_ops },
+
+#ifdef CONFIG_PCI_XGENE
+#define XGENE_V1_ECAM_MCFG(rev, seg) \
+	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
+		&xgene_v1_pcie_ecam_ops }
+#define XGENE_V2_1_ECAM_MCFG(rev, seg) \
+	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
+		&xgene_v2_1_pcie_ecam_ops }
+#define XGENE_V2_2_ECAM_MCFG(rev, seg) \
+	{"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
+		&xgene_v2_2_pcie_ecam_ops }
+
+	/* X-Gene SoC with v1 PCIe controller */
+	XGENE_V1_ECAM_MCFG(1, 0),
+	XGENE_V1_ECAM_MCFG(1, 1),
+	XGENE_V1_ECAM_MCFG(1, 2),
+	XGENE_V1_ECAM_MCFG(1, 3),
+	XGENE_V1_ECAM_MCFG(1, 4),
+	XGENE_V1_ECAM_MCFG(2, 0),
+	XGENE_V1_ECAM_MCFG(2, 1),
+	XGENE_V1_ECAM_MCFG(2, 2),
+	XGENE_V1_ECAM_MCFG(2, 3),
+	XGENE_V1_ECAM_MCFG(2, 4),
+	/* X-Gene SoC with v2.1 PCIe controller */
+	XGENE_V2_1_ECAM_MCFG(3, 0),
+	XGENE_V2_1_ECAM_MCFG(3, 1),
+	/* X-Gene SoC with v2.2 PCIe controller */
+	XGENE_V2_2_ECAM_MCFG(4, 0),
+	XGENE_V2_2_ECAM_MCFG(4, 1),
+	XGENE_V2_2_ECAM_MCFG(4, 2),
+#endif
 };
 
 static char mcfg_oem_id[ACPI_OEM_ID_SIZE];
diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
index 1de23d7..43d7c69 100644
--- a/drivers/pci/host/pci-xgene.c
+++ b/drivers/pci/host/pci-xgene.c
@@ -27,6 +27,8 @@ 
 #include <linux/of_irq.h>
 #include <linux/of_pci.h>
 #include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -64,6 +66,7 @@ 
 /* PCIe IP version */
 #define XGENE_PCIE_IP_VER_UNKN		0
 #define XGENE_PCIE_IP_VER_1		1
+#define XGENE_PCIE_IP_VER_2		2
 
 struct xgene_pcie_port {
 	struct device_node	*node;
@@ -97,7 +100,15 @@  static inline u32 pcie_bar_low_val(u32 addr, u32 flags)
  */
 static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
 {
-	struct xgene_pcie_port *port = bus->sysdata;
+	struct pci_config_window *cfg;
+	struct xgene_pcie_port *port;
+
+	if (acpi_disabled)
+		port = bus->sysdata;
+	else {
+		cfg = bus->sysdata;
+		port = cfg->priv;
+	}
 
 	if (bus->number >= (bus->primary + 1))
 		return port->cfg_base + AXI_EP_CFG_ACCESS;
@@ -111,10 +122,18 @@  static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
  */
 static void xgene_pcie_set_rtdid_reg(struct pci_bus *bus, uint devfn)
 {
-	struct xgene_pcie_port *port = bus->sysdata;
+	struct pci_config_window *cfg;
+	struct xgene_pcie_port *port;
 	unsigned int b, d, f;
 	u32 rtdid_val = 0;
 
+	if (acpi_disabled)
+		port = bus->sysdata;
+	else {
+		cfg = bus->sysdata;
+		port = cfg->priv;
+	}
+
 	b = bus->number;
 	d = PCI_SLOT(devfn);
 	f = PCI_FUNC(devfn);
@@ -158,7 +177,15 @@  static void __iomem *xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
 static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
 				    int where, int size, u32 *val)
 {
-	struct xgene_pcie_port *port = bus->sysdata;
+	struct pci_config_window *cfg;
+	struct xgene_pcie_port *port;
+
+	if (acpi_disabled)
+		port = bus->sysdata;
+	else {
+		cfg = bus->sysdata;
+		port = cfg->priv;
+	}
 
 	if (pci_generic_config_read32(bus, devfn, where & ~0x3, 4, val) !=
 	    PCIBIOS_SUCCESSFUL)
@@ -189,6 +216,138 @@  static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
 	.write = pci_generic_config_write32,
 };
 
+#ifdef CONFIG_ACPI
+static struct resource xgene_v1_csr_res[] = {
+	[0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
+	[1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
+	[2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
+	[3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
+	[4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
+};
+
+static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
+{
+	struct acpi_device *adev = to_acpi_device(cfg->parent);
+	struct acpi_pci_root *root = acpi_driver_data(adev);
+	struct device *dev = cfg->parent;
+	struct xgene_pcie_port *port;
+	struct resource *csr;
+
+	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	csr = &xgene_v1_csr_res[root->segment];
+	port->csr_base = devm_ioremap_resource(dev, csr);
+	if (IS_ERR(port->csr_base)) {
+		kfree(port);
+		return -ENOMEM;
+	}
+
+	port->cfg_base = cfg->win;
+	port->version = XGENE_PCIE_IP_VER_1;
+
+	cfg->priv = port;
+
+	return 0;
+}
+
+struct pci_ecam_ops xgene_v1_pcie_ecam_ops = {
+	.bus_shift      = 16,
+	.init           = xgene_v1_pcie_ecam_init,
+	.pci_ops        = {
+		.map_bus        = xgene_pcie_map_bus,
+		.read           = xgene_pcie_config_read32,
+		.write          = pci_generic_config_write,
+	}
+};
+
+static struct resource xgene_v2_1_csr_res[] = {
+	[0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
+	[1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
+};
+
+static int xgene_v2_1_pcie_ecam_init(struct pci_config_window *cfg)
+{
+	struct acpi_device *adev = to_acpi_device(cfg->parent);
+	struct acpi_pci_root *root = acpi_driver_data(adev);
+	struct device *dev = cfg->parent;
+	struct xgene_pcie_port *port;
+	struct resource *csr;
+
+	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	csr = &xgene_v2_1_csr_res[root->segment];
+	port->csr_base = devm_ioremap_resource(dev, csr);
+	if (IS_ERR(port->csr_base)) {
+		kfree(port);
+		return -ENOMEM;
+	}
+
+	port->cfg_base = cfg->win;
+	port->version = XGENE_PCIE_IP_VER_2;
+
+	cfg->priv = port;
+
+	return 0;
+}
+
+struct pci_ecam_ops xgene_v2_1_pcie_ecam_ops = {
+	.bus_shift      = 16,
+	.init           = xgene_v2_1_pcie_ecam_init,
+	.pci_ops        = {
+		.map_bus        = xgene_pcie_map_bus,
+		.read           = xgene_pcie_config_read32,
+		.write          = pci_generic_config_write,
+	}
+};
+
+static struct resource xgene_v2_2_csr_res[] = {
+	[0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
+	[1] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
+	[2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
+};
+
+static int xgene_v2_2_pcie_ecam_init(struct pci_config_window *cfg)
+{
+	struct acpi_device *adev = to_acpi_device(cfg->parent);
+	struct acpi_pci_root *root = acpi_driver_data(adev);
+	struct device *dev = cfg->parent;
+	struct xgene_pcie_port *port;
+	struct resource *csr;
+
+	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	csr = &xgene_v2_2_csr_res[root->segment];
+	port->csr_base = devm_ioremap_resource(dev, csr);
+	if (IS_ERR(port->csr_base)) {
+		kfree(port);
+		return -ENOMEM;
+	}
+
+	port->cfg_base = cfg->win;
+	port->version = XGENE_PCIE_IP_VER_2;
+
+	cfg->priv = port;
+
+	return 0;
+}
+
+struct pci_ecam_ops xgene_v2_2_pcie_ecam_ops = {
+	.bus_shift      = 16,
+	.init           = xgene_v2_2_pcie_ecam_init,
+	.pci_ops        = {
+		.map_bus        = xgene_pcie_map_bus,
+		.read           = xgene_pcie_config_read32,
+		.write          = pci_generic_config_write,
+	}
+};
+#endif
+
 static u64 xgene_pcie_set_ib_mask(struct xgene_pcie_port *port, u32 addr,
 				  u32 flags, u64 size)
 {
diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index f5740b7..47ab947 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -62,6 +62,13 @@  void __iomem *pci_ecam_map_bus(struct pci_bus *bus, unsigned int devfn,
 /* ops for buggy ECAM that supports only 32-bit accesses */
 extern struct pci_ecam_ops pci_32b_ops;
 
+/* ECAM ops for known quirks */
+#ifdef CONFIG_PCI_XGENE
+extern struct pci_ecam_ops xgene_v1_pcie_ecam_ops;
+extern struct pci_ecam_ops xgene_v2_1_pcie_ecam_ops;
+extern struct pci_ecam_ops xgene_v2_2_pcie_ecam_ops;
+#endif
+
 #ifdef CONFIG_PCI_HOST_GENERIC
 /* for DT-based PCI controllers that support ECAM */
 int pci_host_common_probe(struct platform_device *pdev,