diff mbox series

[v3,17/17] PCI: dwc: Add Baikal-T1 PCIe controller support

Message ID 20220610085706.15741-18-Sergey.Semin@baikalelectronics.ru
State New
Headers show
Series PCI: dwc: Add generic resources and Baikal-T1 support | expand

Commit Message

Serge Semin June 10, 2022, 8:57 a.m. UTC
Baikal-T1 SoC is equipped with DWC PCIe v4.60a host controller. It can be
trained to work up to Gen.3 speed over up to x4 lanes. The host controller
is attached to the DW PCIe 3.0 PCS via the PIPE-4 interface, which in its
turn is connected to the DWC 10G PHY. The whole system is supposed to be
fed up with four clock sources: DBI peripheral clock, AXI application
clocks and external PHY/core reference clock generating the 100MHz signal.
In addition to that the platform provide a way to reset each part of the
controller: sticky/non-sticky bits, host controller core, PIPE interface,
PCS/PHY and Hot/Power reset signal. The driver also provides a way to
handle the GPIO-based PERST# signal.

Note due to the Baikal-T1 MMIO peculiarity we have to implement the DBI
interface accessors which make sure the IO operations are dword-aligned.

Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>

---

Changelog v2:
- Rename 'syscon' property to 'baikal,bt1-syscon'.

Changelog v3:
- Use the clocks/resets handlers defined in the DW PCIe core descriptor.
  (@Rob)
- Redefine PCI host bridge config space accessors with the generic
  pci_generic_config_read32() and pci_generic_config_write32() methods.
  (@Rob)
---
 drivers/pci/controller/dwc/Kconfig    |   9 +
 drivers/pci/controller/dwc/Makefile   |   1 +
 drivers/pci/controller/dwc/pcie-bt1.c | 649 ++++++++++++++++++++++++++
 3 files changed, 659 insertions(+)
 create mode 100644 drivers/pci/controller/dwc/pcie-bt1.c

Comments

Bjorn Helgaas June 15, 2022, 4:48 p.m. UTC | #1
On Fri, Jun 10, 2022 at 11:57:05AM +0300, Serge Semin wrote:
> Baikal-T1 SoC is equipped with DWC PCIe v4.60a host controller. It can be
> trained to work up to Gen.3 speed over up to x4 lanes. The host controller
> is attached to the DW PCIe 3.0 PCS via the PIPE-4 interface, which in its
> turn is connected to the DWC 10G PHY. The whole system is supposed to be
> fed up with four clock sources: DBI peripheral clock, AXI application
> clocks and external PHY/core reference clock generating the 100MHz signal.
> In addition to that the platform provide a way to reset each part of the
> controller: sticky/non-sticky bits, host controller core, PIPE interface,
> PCS/PHY and Hot/Power reset signal. The driver also provides a way to
> handle the GPIO-based PERST# signal.
> 
> Note due to the Baikal-T1 MMIO peculiarity we have to implement the DBI
> interface accessors which make sure the IO operations are dword-aligned.
> 
> Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>

> +static int bt1_pcie_start_ltssm(struct dw_pcie *pci)
> +{
> +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> +	u32 val;
> +	int ret;
> +
> +	/*
> +	 * Enable LTSSM and make sure it was able to establish both PHY and
> +	 * data links. This procedure shall work fine to reach 2.5 GT/s speed.
> +	 */
> +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> +			   BT1_CCU_PCIE_LTSSM_EN, BT1_CCU_PCIE_LTSSM_EN);
> +
> +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
> +				       (val & BT1_CCU_PCIE_SMLH_LINKUP),
> +				       1000, 1000000);
> +	if (ret) {
> +		dev_err(pci->dev, "LTSSM failed to set PHY link up\n");
> +		return ret;
> +	}
> +
> +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
> +				       (val & BT1_CCU_PCIE_RDLH_LINKUP),
> +				       1000, 1000000);
> +	if (ret) {
> +		dev_err(pci->dev, "LTSSM failed to set data link up\n");
> +		return ret;
> +	}
> +
> +	/*
> +	 * Activate direct speed change after the link is established in an
> +	 * attempt to reach a higher bus performance (up to Gen.3 - 8.0 GT/s).
> +	 * This is required at least to get 8.0 GT/s speed.
> +	 */
> +	val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
> +	val |= PORT_LOGIC_SPEED_CHANGE;
> +	dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> +
> +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
> +				       BT1_CCU_PCIE_LTSSM_LINKUP(val),
> +				       1000, 1000000);
> +	if (ret)
> +		dev_err(pci->dev, "LTSSM failed to get into L0 state\n");
> +
> +	return ret;
> +}
> +
> +static void bt1_pcie_stop_ltssm(struct dw_pcie *pci)
> +{
> +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> +
> +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> +			   BT1_CCU_PCIE_LTSSM_EN, 0);
> +}
> +
> +struct dw_pcie_ops bt1_pcie_dw_ops = {
> +	.read_dbi = bt1_pcie_read_dbi,
> +	.write_dbi = bt1_pcie_write_dbi,
> +	.write_dbi2 = bt1_pcie_write_dbi2,
> +	.start_link = bt1_pcie_start_ltssm,
> +	.stop_link = bt1_pcie_stop_ltssm,
> +};

Should be static and const.  Please rename to "dw_pcie_ops" as most
drivers use.  Please rename bt1_pcie_start_ltssm() and
bt1_pcie_stop_ltssm() to bt1_pcie_start_link() and
bt1_pcie_stop_link() for consistency with other drivers to make
maintenance easier.

> +static struct pci_ops bt1_pcie_ops = {
> +	.map_bus = dw_pcie_own_conf_map_bus,
> +	.read = pci_generic_config_read32,
> +	.write = pci_generic_config_write32,
> +};
> +
> +static int bt1_pcie_get_res(struct bt1_pcie *btpci)

Can you name this something similar to what other drivers use?  There
are a couple *_pcie_get_resources() functions (normally called from
*_pcie_probe()), but no *_get_res() yet.

> +{
> +	struct device *dev = btpci->dw.dev;
> +	int i, ret;
> +
> +	/* DBI access is supposed to be performed by the dword-aligned IOs */
> +	btpci->dw.pp.bridge->ops = &bt1_pcie_ops;
> +
> +	/* AXI-interface is configured with 64-bit address bus width */
> +	ret = dma_coerce_mask_and_coherent(&btpci->dw.pp.bridge->dev,
> +					   DMA_BIT_MASK(64));

Just to double-check since this is the first instance of
dma_coerce_mask_and_coherent() in drivers/pci -- I guess Baikal-T1 is
unique in needing this?

> +	if (ret) {
> +		ret = dma_set_mask_and_coherent(&btpci->dw.pp.bridge->dev,
> +						DMA_BIT_MASK(32));

Also the first instance of dma_set_mask_and_coherent() in dwc-based
drivers, so double-checking here, too.

> +		if (ret)
> +			return ret;
> +	}
> +
> +	/* These CSRs are in MMIO so we won't check the regmap-methods status */
> +	btpci->sys_regs =
> +		syscon_regmap_lookup_by_phandle(dev->of_node, "baikal,bt1-syscon");
> +	if (IS_ERR(btpci->sys_regs))
> +		return dev_err_probe(dev, PTR_ERR(btpci->sys_regs),
> +				     "Failed to get syscon\n");
> +
> +	/* Make sure all the required resources have been specified */
> +	for (i = 0; i < BT1_PCIE_NUM_APP_CLKS; i++) {
> +		if (!btpci->dw.app_clks[bt1_pcie_app_clks[i]].clk) {
> +			dev_err(dev, "App clocks set is incomplete\n");
> +			return -ENOENT;
> +		}
> +	}
> +
> +	for (i = 0; i < BT1_PCIE_NUM_CORE_CLKS; i++) {
> +		if (!btpci->dw.core_clks[bt1_pcie_core_clks[i]].clk) {
> +			dev_err(dev, "Core clocks set is incomplete\n");
> +			return -ENOENT;
> +		}
> +	}
> +
> +	for (i = 0; i < BT1_PCIE_NUM_APP_RSTS; i++) {
> +		if (!btpci->dw.app_rsts[bt1_pcie_app_rsts[i]].rstc) {
> +			dev_err(dev, "App resets set is incomplete\n");
> +			return -ENOENT;
> +		}
> +	}
> +
> +	for (i = 0; i < BT1_PCIE_NUM_CORE_RSTS; i++) {
> +		if (!btpci->dw.core_rsts[bt1_pcie_core_rsts[i]].rstc) {
> +			dev_err(dev, "Core resets set is incomplete\n");
> +			return -ENOENT;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static void bt1_pcie_full_stop_bus(struct bt1_pcie *btpci, bool init)

Can you name this something similar to what other drivers use?

> +{
> +	struct device *dev = btpci->dw.dev;
> +	struct dw_pcie *pci = &btpci->dw;
> +	int ret;
> +
> +	/* Disable LTSSM for sure */
> +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> +			   BT1_CCU_PCIE_LTSSM_EN, 0);
> +
> +	/*
> +	 * Application reset controls are trigger-based so de-assert the core
> +	 * resets only.
> +	 */
> +	ret = reset_control_bulk_assert(DW_PCIE_NUM_CORE_RSTS, pci->core_rsts);
> +	if (ret)
> +		dev_err(dev, "Failed to assert core resets\n");
> +
> +	/*
> +	 * Clocks are disabled by default at least in accordance with the clk
> +	 * enable counter value on init stage.
> +	 */
> +	if (!init) {
> +		clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
> +
> +		clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
> +	}
> +
> +	/* The peripheral devices are unavailable anyway so reset them too */
> +	gpiod_set_value_cansleep(pci->pe_rst, 1);
> +
> +	/* Make sure the reset is settled */
> +	usleep_range(1, 10);

Is this duration related to something in the PCIe spec?  Or the DWC
spec?  I'd really like to use named constants when possible, although
we have a ton of bare magic numbers currently.

Similar for the poll timeouts and the "state settled" sleep below.

> +}
> +
> +/*
> + * Implements the cold reset procedure in accordance with the reference manual
> + * and available PM signals.
> + */
> +static int bt1_pcie_cold_start_bus(struct bt1_pcie *btpci)
> +{
> +	struct device *dev = btpci->dw.dev;
> +	struct dw_pcie *pci = &btpci->dw;
> +	u32 val;
> +	int ret;
> +
> +	/* First get out of the Power/Hot reset state */
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert PHY reset\n");
> +		return ret;
> +	}
> +
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert hot reset\n");
> +		goto err_assert_pwr_rst;
> +	}
> +
> +	/* Wait for the PM-core to stop requesting the PHY reset */
> +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
> +				       !(val & BT1_CCU_PCIE_REQ_PHY_RST), 1, 1000);
> +	if (ret) {
> +		dev_err(dev, "Timed out waiting for PM to stop PHY resetting\n");
> +		goto err_assert_hot_rst;
> +	}
> +
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert PHY reset\n");
> +		goto err_assert_hot_rst;
> +	}
> +
> +	/* Clocks can be now enabled, but the ref one is crucial at this stage */
> +	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
> +	if (ret) {
> +		dev_err(dev, "Failed to enable app clocks\n");
> +		goto err_assert_phy_rst;
> +	}
> +
> +	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
> +	if (ret) {
> +		dev_err(dev, "Failed to enable ref clocks\n");
> +		goto err_disable_app_clk;
> +	}
> +
> +	/* Wait for the PM to stop requesting the controller core reset */
> +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
> +				       !(val & BT1_CCU_PCIE_REQ_CORE_RST), 1, 1000);
> +	if (ret) {
> +		dev_err(dev, "Timed out waiting for PM to stop core resetting\n");
> +		goto err_disable_core_clk;
> +	}
> +
> +	/* PCS-PIPE interface and controller core can be now activated */
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert PIPE reset\n");
> +		goto err_disable_core_clk;
> +	}
> +
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert core reset\n");
> +		goto err_assert_pipe_rst;
> +	}
> +
> +	/* It's recommended to reset the core and application logic together */
> +	ret = reset_control_bulk_reset(DW_PCIE_NUM_APP_RSTS, pci->app_rsts);
> +	if (ret) {
> +		dev_err(dev, "Failed to reset app domain\n");
> +		goto err_assert_core_rst;
> +	}
> +
> +	/* Sticky/Non-sticky CSR flags can be now unreset too */
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert sticky reset\n");
> +		goto err_assert_core_rst;
> +	}
> +
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_NON_STICKY_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert non-sticky reset\n");
> +		goto err_assert_sticky_rst;
> +	}
> +
> +	/* Activate the PCIe bus peripheral devices */
> +	gpiod_set_value_cansleep(pci->pe_rst, 0);
> +
> +	/* Make sure the state is settled (LTSSM is still disabled though) */
> +	usleep_range(1, 10);
> +
> +	return 0;
> +
> +err_assert_sticky_rst:
> +	reset_control_assert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
> +
> +err_assert_core_rst:
> +	reset_control_assert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
> +
> +err_assert_pipe_rst:
> +	reset_control_assert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
> +
> +err_disable_core_clk:
> +	clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
> +
> +err_disable_app_clk:
> +	clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
> +
> +err_assert_phy_rst:
> +	reset_control_assert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
> +
> +err_assert_hot_rst:
> +	reset_control_assert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
> +
> +err_assert_pwr_rst:
> +	reset_control_assert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
> +
> +	return ret;
> +}
> +
> +static int bt1_pcie_host_init(struct dw_pcie_rp *pp)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> +	int ret;
> +
> +	ret = bt1_pcie_get_res(btpci);
> +	if (ret)
> +		return ret;
> +
> +	bt1_pcie_full_stop_bus(btpci, true);
> +
> +	return bt1_pcie_cold_start_bus(btpci);

Generally I think the get_res-type stuff happens elsewhere.  I'm not
an expert in that, but this doesn't look much like other
*_pcie_host_init() functions, which mainly deal with enabling clocks,
reset assertion/deassertion, PHY init, interrupt enable, etc.

Maybe this is connected with your new common clocks/resets properties.
I'm certainly in favor of making as much of that common as is
practical!  I hope we can take advantage of that and make more
consistency across the dwc-based drivers as well.

> +}
> +
> +static void bt1_pcie_host_deinit(struct dw_pcie_rp *pp)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> +
> +	bt1_pcie_full_stop_bus(btpci, false);
> +}
> +
> +struct dw_pcie_host_ops bt1_pcie_host_ops = {
> +	.host_init = bt1_pcie_host_init,
> +	.host_deinit = bt1_pcie_host_deinit,
> +};
> +
> +static struct bt1_pcie *bt1_pcie_create_data(struct platform_device *pdev)
> +{
> +	struct bt1_pcie *btpci;
> +
> +	btpci = devm_kzalloc(&pdev->dev, sizeof(*btpci), GFP_KERNEL);
> +	if (!btpci)
> +		return ERR_PTR(-ENOMEM);
> +
> +	btpci->pdev = pdev;
> +
> +	platform_set_drvdata(pdev, btpci);
> +
> +	return btpci;

I don't think it's worth splitting this into a separate function.  I
think it would be better to use the same structure as other dwc-based
drivers and keep this in bt1_pcie_probe().

> +}
> +
> +static int bt1_pcie_add_dw_port(struct bt1_pcie *btpci)

All other dwc-based drivers call dw_pcie_host_init() from either
*_pcie_probe() or *_add_pcie_port().  Please use a similar convention.

> +{
> +	struct device *dev = &btpci->pdev->dev;
> +	int ret;
> +
> +	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));

Why do you need this when no other dwc-based drivers do?  Is Baikal-T1
different in this respect?

> +	if (ret)
> +		return ret;
> +
> +	btpci->dw.version = DW_PCIE_VER_460A;
> +	btpci->dw.dev = dev;
> +	btpci->dw.ops = &bt1_pcie_dw_ops;
> +
> +	btpci->dw.pp.num_vectors = MAX_MSI_IRQS;
> +	btpci->dw.pp.ops = &bt1_pcie_host_ops;
> +
> +	dw_pcie_cap_set(&btpci->dw, REQ_RES);
> +
> +	ret = dw_pcie_host_init(&btpci->dw.pp);
> +	if (ret)
> +		dev_err_probe(dev, ret, "Failed to initialize DWC PCIe host\n");
> +
> +	return ret;
> +}
> +
> +static void bt1_pcie_del_dw_port(struct bt1_pcie *btpci)

Can you call dw_pcie_host_deinit() from the same place as other
drivers?

  $ git grep -p dw_pcie_host_deinit drivers/pci/controller/dwc

> +{
> +	dw_pcie_host_deinit(&btpci->dw.pp);
> +}
> +
> +static int bt1_pcie_probe(struct platform_device *pdev)
> +{
> +	struct bt1_pcie *btpci;
> +
> +	btpci = bt1_pcie_create_data(pdev);
> +	if (IS_ERR(btpci))
> +		return PTR_ERR(btpci);
> +
> +	return bt1_pcie_add_dw_port(btpci);
> +}
> +
> +static int bt1_pcie_remove(struct platform_device *pdev)
> +{
> +	struct bt1_pcie *btpci = platform_get_drvdata(pdev);
> +
> +	bt1_pcie_del_dw_port(btpci);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id bt1_pcie_of_match[] = {
> +	{ .compatible = "baikal,bt1-pcie" },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, bt1_pcie_of_match);
> +
> +static struct platform_driver bt1_pcie_driver = {
> +	.probe = bt1_pcie_probe,
> +	.remove = bt1_pcie_remove,
> +	.driver = {
> +		.name	= "bt1-pcie",
> +		.of_match_table = bt1_pcie_of_match,
> +	},
> +};
> +module_platform_driver(bt1_pcie_driver);
> +
> +MODULE_AUTHOR("Serge Semin <Sergey.Semin@baikalelectronics.ru>");
> +MODULE_DESCRIPTION("Baikal-T1 PCIe driver");
> +MODULE_LICENSE("GPL");
> -- 
> 2.35.1
>
Rob Herring (Arm) June 15, 2022, 5:10 p.m. UTC | #2
On Fri, Jun 10, 2022 at 11:57:05AM +0300, Serge Semin wrote:
> Baikal-T1 SoC is equipped with DWC PCIe v4.60a host controller. It can be
> trained to work up to Gen.3 speed over up to x4 lanes. The host controller
> is attached to the DW PCIe 3.0 PCS via the PIPE-4 interface, which in its
> turn is connected to the DWC 10G PHY. The whole system is supposed to be
> fed up with four clock sources: DBI peripheral clock, AXI application
> clocks and external PHY/core reference clock generating the 100MHz signal.
> In addition to that the platform provide a way to reset each part of the
> controller: sticky/non-sticky bits, host controller core, PIPE interface,
> PCS/PHY and Hot/Power reset signal. The driver also provides a way to
> handle the GPIO-based PERST# signal.
> 
> Note due to the Baikal-T1 MMIO peculiarity we have to implement the DBI
> interface accessors which make sure the IO operations are dword-aligned.
> 
> Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
> 
> ---
> 
> Changelog v2:
> - Rename 'syscon' property to 'baikal,bt1-syscon'.
> 
> Changelog v3:
> - Use the clocks/resets handlers defined in the DW PCIe core descriptor.
>   (@Rob)
> - Redefine PCI host bridge config space accessors with the generic
>   pci_generic_config_read32() and pci_generic_config_write32() methods.
>   (@Rob)
> ---
>  drivers/pci/controller/dwc/Kconfig    |   9 +
>  drivers/pci/controller/dwc/Makefile   |   1 +
>  drivers/pci/controller/dwc/pcie-bt1.c | 649 ++++++++++++++++++++++++++
>  3 files changed, 659 insertions(+)
>  create mode 100644 drivers/pci/controller/dwc/pcie-bt1.c
> 
> diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
> index 62ce3abf0f19..771b8b146623 100644
> --- a/drivers/pci/controller/dwc/Kconfig
> +++ b/drivers/pci/controller/dwc/Kconfig
> @@ -222,6 +222,15 @@ config PCIE_ARTPEC6_EP
>  	  Enables support for the PCIe controller in the ARTPEC-6 SoC to work in
>  	  endpoint mode. This uses the DesignWare core.
>  
> +config PCIE_BT1
> +	tristate "Baikal-T1 PCIe controller"
> +	depends on MIPS_BAIKAL_T1 || COMPILE_TEST
> +	depends on PCI_MSI_IRQ_DOMAIN
> +	select PCIE_DW_HOST
> +	help
> +	  Enables support for the PCIe controller in the Baikal-T1 SoC to work
> +	  in host mode. It's based on the Synopsys DWC PCIe v4.60a IP-core.
> +
>  config PCIE_ROCKCHIP_DW_HOST
>  	bool "Rockchip DesignWare PCIe controller"
>  	select PCIE_DW
> diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
> index 8ba7b67f5e50..bf5c311875a1 100644
> --- a/drivers/pci/controller/dwc/Makefile
> +++ b/drivers/pci/controller/dwc/Makefile
> @@ -3,6 +3,7 @@ obj-$(CONFIG_PCIE_DW) += pcie-designware.o
>  obj-$(CONFIG_PCIE_DW_HOST) += pcie-designware-host.o
>  obj-$(CONFIG_PCIE_DW_EP) += pcie-designware-ep.o
>  obj-$(CONFIG_PCIE_DW_PLAT) += pcie-designware-plat.o
> +obj-$(CONFIG_PCIE_BT1) += pcie-bt1.o
>  obj-$(CONFIG_PCI_DRA7XX) += pci-dra7xx.o
>  obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o
>  obj-$(CONFIG_PCIE_FU740) += pcie-fu740.o
> diff --git a/drivers/pci/controller/dwc/pcie-bt1.c b/drivers/pci/controller/dwc/pcie-bt1.c
> new file mode 100644
> index 000000000000..03f035743b78
> --- /dev/null
> +++ b/drivers/pci/controller/dwc/pcie-bt1.c
> @@ -0,0 +1,649 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2021 BAIKAL ELECTRONICS, JSC
> + *
> + * Authors:
> + *   Vadim Vlasov <Vadim.Vlasov@baikalelectronics.ru>
> + *   Serge Semin <Sergey.Semin@baikalelectronics.ru>
> + *
> + * Baikal-T1 PCIe controller driver
> + */
> +
> +#include <linux/bitfield.h>
> +#include <linux/bits.h>
> +#include <linux/clk.h>
> +#include <linux/delay.h>
> +#include <linux/gpio/consumer.h>
> +#include <linux/kernel.h>
> +#include <linux/mfd/syscon.h>
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <linux/platform_device.h>
> +#include <linux/regmap.h>
> +#include <linux/reset.h>
> +#include <linux/types.h>
> +
> +#include "pcie-designware.h"
> +
> +/* Baikal-T1 System CCU control registers */
> +#define BT1_CCU_PCIE_CLKC			0x140
> +#define BT1_CCU_PCIE_REQ_PCS_CLK		BIT(16)
> +#define BT1_CCU_PCIE_REQ_MAC_CLK		BIT(17)
> +#define BT1_CCU_PCIE_REQ_PIPE_CLK		BIT(18)
> +
> +#define BT1_CCU_PCIE_RSTC			0x144
> +#define BT1_CCU_PCIE_REQ_LINK_RST		BIT(13)
> +#define BT1_CCU_PCIE_REQ_SMLH_RST		BIT(14)
> +#define BT1_CCU_PCIE_REQ_PHY_RST		BIT(16)
> +#define BT1_CCU_PCIE_REQ_CORE_RST		BIT(24)
> +#define BT1_CCU_PCIE_REQ_STICKY_RST		BIT(26)
> +#define BT1_CCU_PCIE_REQ_NSTICKY_RST		BIT(27)
> +
> +#define BT1_CCU_PCIE_PMSC			0x148
> +#define BT1_CCU_PCIE_LTSSM_STATE_MASK		GENMASK(5, 0)
> +#define BT1_CCU_PCIE_LTSSM_DET_QUIET		0x00
> +#define BT1_CCU_PCIE_LTSSM_DET_ACT		0x01
> +#define BT1_CCU_PCIE_LTSSM_POLL_ACT		0x02
> +#define BT1_CCU_PCIE_LTSSM_POLL_COMP		0x03
> +#define BT1_CCU_PCIE_LTSSM_POLL_CONF		0x04
> +#define BT1_CCU_PCIE_LTSSM_PRE_DET_QUIET	0x05
> +#define BT1_CCU_PCIE_LTSSM_DET_WAIT		0x06
> +#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_START	0x07
> +#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_ACEPT	0x08
> +#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_WAIT	0x09
> +#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_ACEPT	0x0a
> +#define BT1_CCU_PCIE_LTSSM_CFG_COMPLETE		0x0b
> +#define BT1_CCU_PCIE_LTSSM_CFG_IDLE		0x0c
> +#define BT1_CCU_PCIE_LTSSM_RCVR_LOCK		0x0d
> +#define BT1_CCU_PCIE_LTSSM_RCVR_SPEED		0x0e
> +#define BT1_CCU_PCIE_LTSSM_RCVR_RCVRCFG		0x0f
> +#define BT1_CCU_PCIE_LTSSM_RCVR_IDLE		0x10
> +#define BT1_CCU_PCIE_LTSSM_L0			0x11
> +#define BT1_CCU_PCIE_LTSSM_L0S			0x12
> +#define BT1_CCU_PCIE_LTSSM_L123_SEND_IDLE	0x13
> +#define BT1_CCU_PCIE_LTSSM_L1_IDLE		0x14
> +#define BT1_CCU_PCIE_LTSSM_L2_IDLE		0x15
> +#define BT1_CCU_PCIE_LTSSM_L2_WAKE		0x16
> +#define BT1_CCU_PCIE_LTSSM_DIS_ENTRY		0x17
> +#define BT1_CCU_PCIE_LTSSM_DIS_IDLE		0x18
> +#define BT1_CCU_PCIE_LTSSM_DISABLE		0x19
> +#define BT1_CCU_PCIE_LTSSM_LPBK_ENTRY		0x1a
> +#define BT1_CCU_PCIE_LTSSM_LPBK_ACTIVE		0x1b
> +#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT		0x1c
> +#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT_TOUT	0x1d
> +#define BT1_CCU_PCIE_LTSSM_HOT_RST_ENTRY	0x1e
> +#define BT1_CCU_PCIE_LTSSM_HOT_RST		0x1f
> +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ0		0x20
> +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ1		0x21
> +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ2		0x22
> +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ3		0x23
> +#define BT1_CCU_PCIE_SMLH_LINKUP		BIT(6)
> +#define BT1_CCU_PCIE_RDLH_LINKUP		BIT(7)
> +#define BT1_CCU_PCIE_PM_LINKSTATE_L0S		BIT(8)
> +#define BT1_CCU_PCIE_PM_LINKSTATE_L1		BIT(9)
> +#define BT1_CCU_PCIE_PM_LINKSTATE_L2		BIT(10)
> +#define BT1_CCU_PCIE_L1_PENDING			BIT(12)
> +#define BT1_CCU_PCIE_REQ_EXIT_L1		BIT(14)
> +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ		BIT(15)
> +#define BT1_CCU_PCIE_PM_DSTAT_MASK		GENMASK(18, 16)
> +#define BT1_CCU_PCIE_PM_PME_EN			BIT(20)
> +#define BT1_CCU_PCIE_PM_PME_STATUS		BIT(21)
> +#define BT1_CCU_PCIE_AUX_PM_EN			BIT(22)
> +#define BT1_CCU_PCIE_AUX_PWR_DET		BIT(23)
> +#define BT1_CCU_PCIE_WAKE_DET			BIT(24)
> +#define BT1_CCU_PCIE_TURNOFF_REQ		BIT(30)
> +#define BT1_CCU_PCIE_TURNOFF_ACK		BIT(31)
> +
> +#define BT1_CCU_PCIE_GENC			0x14c
> +#define BT1_CCU_PCIE_LTSSM_EN			BIT(1)
> +#define BT1_CCU_PCIE_DBI2_MODE			BIT(2)
> +#define BT1_CCU_PCIE_MGMT_EN			BIT(3)
> +#define BT1_CCU_PCIE_RXLANE_FLIP_EN		BIT(16)
> +#define BT1_CCU_PCIE_TXLANE_FLIP_EN		BIT(17)
> +#define BT1_CCU_PCIE_SLV_XFER_PEND		BIT(24)
> +#define BT1_CCU_PCIE_RCV_XFER_PEND		BIT(25)
> +#define BT1_CCU_PCIE_DBI_XFER_PEND		BIT(26)
> +#define BT1_CCU_PCIE_DMA_XFER_PEND		BIT(27)
> +
> +#define BT1_CCU_PCIE_LTSSM_LINKUP(_pmsc) \
> +({ \
> +	int __state = FIELD_GET(BT1_CCU_PCIE_LTSSM_STATE_MASK, _pmsc); \
> +	__state >= BT1_CCU_PCIE_LTSSM_L0 && __state <= BT1_CCU_PCIE_LTSSM_L2_WAKE; \
> +})
> +
> +/* Baikal-T1 PCIe specific control registers */
> +#define BT1_PCIE_AXI2MGM_LANENUM		0xd04
> +#define BT1_PCIE_AXI2MGM_LANESEL_MASK		GENMASK(3, 0)
> +
> +#define BT1_PCIE_AXI2MGM_ADDRCTL		0xd08
> +#define BT1_PCIE_AXI2MGM_PHYREG_ADDR_MASK	GENMASK(20, 0)
> +#define BT1_PCIE_AXI2MGM_READ_FLAG		BIT(29)
> +#define BT1_PCIE_AXI2MGM_DONE			BIT(30)
> +#define BT1_PCIE_AXI2MGM_BUSY			BIT(31)
> +
> +#define BT1_PCIE_AXI2MGM_WRITEDATA		0xd0c
> +#define BT1_PCIE_AXI2MGM_WDATA			GENMASK(15, 0)
> +
> +#define BT1_PCIE_AXI2MGM_READDATA		0xd10
> +#define BT1_PCIE_AXI2MGM_RDATA			GENMASK(15, 0)
> +
> +/* Generic Baikal-T1 PCIe interface resources */
> +#define BT1_PCIE_NUM_APP_CLKS			ARRAY_SIZE(bt1_pcie_app_clks)
> +#define BT1_PCIE_NUM_CORE_CLKS			ARRAY_SIZE(bt1_pcie_core_clks)
> +#define BT1_PCIE_NUM_APP_RSTS			ARRAY_SIZE(bt1_pcie_app_rsts)
> +#define BT1_PCIE_NUM_CORE_RSTS			ARRAY_SIZE(bt1_pcie_core_rsts)
> +
> +static const enum dw_pcie_app_clk bt1_pcie_app_clks[] = {
> +	DW_PCIE_DBI_CLK, DW_PCIE_MSTR_CLK, DW_PCIE_SLV_CLK,
> +};
> +
> +static const enum dw_pcie_core_clk bt1_pcie_core_clks[] = {
> +	DW_PCIE_REF_CLK,
> +};
> +
> +static const enum dw_pcie_app_rst bt1_pcie_app_rsts[] = {
> +	DW_PCIE_MSTR_RST, DW_PCIE_SLV_RST,
> +};
> +
> +static const enum dw_pcie_core_rst bt1_pcie_core_rsts[] = {
> +	DW_PCIE_NON_STICKY_RST, DW_PCIE_STICKY_RST, DW_PCIE_CORE_RST,
> +	DW_PCIE_PIPE_RST, DW_PCIE_PHY_RST, DW_PCIE_HOT_RST, DW_PCIE_PWR_RST,
> +};
> +
> +struct bt1_pcie {
> +	struct dw_pcie dw;
> +	struct platform_device *pdev;
> +	struct regmap *sys_regs;
> +};
> +#define to_bt1_pcie(_dw) container_of(_dw, struct bt1_pcie, dw)
> +
> +/*
> + * Baikal-T1 MMIO space must be read/written by the dword-aligned
> + * instructions. Note the methods are optimized to have the dword operations
> + * performed with minimum overhead as the most frequently used ones.
> + */
> +static int bt1_pcie_read_mmio(void __iomem *addr, int size, u32 *val)
> +{
> +	unsigned int ofs = (uintptr_t)addr & 0x3;
> +
> +	if (!IS_ALIGNED((uintptr_t)addr, size))
> +		return PCIBIOS_BAD_REGISTER_NUMBER;

This isn't for PCI config space accessors, don't use PCIBIOS_*. We 
really want to get rid of those.

You are in control of all the accesses, so the error conditions should 
never happen, don't need to be checked, and don't need to be returned.

> +
> +	*val = readl(addr - ofs) >> ofs * BITS_PER_BYTE;
> +	if (size == 4) {
> +		return PCIBIOS_SUCCESSFUL;
> +	} else if (size == 2) {
> +		*val &= 0xffff;
> +		return PCIBIOS_SUCCESSFUL;
> +	} else if (size == 1) {
> +		*val &= 0xff;
> +		return PCIBIOS_SUCCESSFUL;
> +	}
> +
> +	return PCIBIOS_BAD_REGISTER_NUMBER;
> +}
> +
> +static int bt1_pcie_write_mmio(void __iomem *addr, int size, u32 val)
> +{
> +	unsigned int ofs = (uintptr_t)addr & 0x3;
> +	u32 tmp, mask;
> +
> +	if (!IS_ALIGNED((uintptr_t)addr, size))
> +		return PCIBIOS_BAD_REGISTER_NUMBER;
> +
> +	if (size == 4) {
> +		writel(val, addr);
> +		return PCIBIOS_SUCCESSFUL;
> +	} else if (size == 2 || size == 1) {
> +		mask = GENMASK(size * BITS_PER_BYTE - 1, 0);
> +		tmp = readl(addr - ofs) & ~(mask << ofs * BITS_PER_BYTE);
> +		tmp |= (val & mask) << ofs * BITS_PER_BYTE;
> +		writel(tmp, addr - ofs);
> +		return PCIBIOS_SUCCESSFUL;
> +	}
> +
> +	return PCIBIOS_BAD_REGISTER_NUMBER;
> +}
> +
> +static u32 bt1_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
> +			     size_t size)
> +{
> +	int ret;
> +	u32 val;
> +
> +	ret = bt1_pcie_read_mmio(base + reg, size, &val);
> +	if (ret != PCIBIOS_SUCCESSFUL) {
> +		dev_err(pci->dev, "Read DBI address failed\n");
> +		return ~0U;
> +	}
> +
> +	return val;
> +}
> +
> +static void bt1_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
> +			       size_t size, u32 val)
> +{
> +	int ret;
> +
> +	ret = bt1_pcie_write_mmio(base + reg, size, val);
> +	if (ret != PCIBIOS_SUCCESSFUL)
> +		dev_err(pci->dev, "Write DBI address failed\n");
> +}
> +
> +static void bt1_pcie_write_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg,
> +				size_t size, u32 val)
> +{
> +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> +	int ret;
> +
> +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> +			   BT1_CCU_PCIE_DBI2_MODE, BT1_CCU_PCIE_DBI2_MODE);
> +
> +	ret = bt1_pcie_write_mmio(base + reg, size, val);
> +	if (ret != PCIBIOS_SUCCESSFUL)
> +		dev_err(pci->dev, "Write DBI2 address failed\n");
> +
> +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> +			   BT1_CCU_PCIE_DBI2_MODE, 0);
> +}
> +
> +static int bt1_pcie_start_ltssm(struct dw_pcie *pci)
> +{
> +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> +	u32 val;
> +	int ret;
> +
> +	/*
> +	 * Enable LTSSM and make sure it was able to establish both PHY and
> +	 * data links. This procedure shall work fine to reach 2.5 GT/s speed.
> +	 */
> +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> +			   BT1_CCU_PCIE_LTSSM_EN, BT1_CCU_PCIE_LTSSM_EN);
> +
> +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
> +				       (val & BT1_CCU_PCIE_SMLH_LINKUP),
> +				       1000, 1000000);
> +	if (ret) {
> +		dev_err(pci->dev, "LTSSM failed to set PHY link up\n");
> +		return ret;
> +	}
> +
> +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
> +				       (val & BT1_CCU_PCIE_RDLH_LINKUP),
> +				       1000, 1000000);
> +	if (ret) {
> +		dev_err(pci->dev, "LTSSM failed to set data link up\n");
> +		return ret;
> +	}
> +
> +	/*
> +	 * Activate direct speed change after the link is established in an
> +	 * attempt to reach a higher bus performance (up to Gen.3 - 8.0 GT/s).
> +	 * This is required at least to get 8.0 GT/s speed.
> +	 */
> +	val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
> +	val |= PORT_LOGIC_SPEED_CHANGE;
> +	dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> +
> +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
> +				       BT1_CCU_PCIE_LTSSM_LINKUP(val),
> +				       1000, 1000000);
> +	if (ret)
> +		dev_err(pci->dev, "LTSSM failed to get into L0 state\n");
> +
> +	return ret;
> +}
> +
> +static void bt1_pcie_stop_ltssm(struct dw_pcie *pci)
> +{
> +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> +
> +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> +			   BT1_CCU_PCIE_LTSSM_EN, 0);
> +}
> +
> +struct dw_pcie_ops bt1_pcie_dw_ops = {
> +	.read_dbi = bt1_pcie_read_dbi,
> +	.write_dbi = bt1_pcie_write_dbi,
> +	.write_dbi2 = bt1_pcie_write_dbi2,
> +	.start_link = bt1_pcie_start_ltssm,
> +	.stop_link = bt1_pcie_stop_ltssm,
> +};
> +
> +static struct pci_ops bt1_pcie_ops = {
> +	.map_bus = dw_pcie_own_conf_map_bus,
> +	.read = pci_generic_config_read32,
> +	.write = pci_generic_config_write32,
> +};
> +
> +static int bt1_pcie_get_res(struct bt1_pcie *btpci)
> +{
> +	struct device *dev = btpci->dw.dev;
> +	int i, ret;
> +
> +	/* DBI access is supposed to be performed by the dword-aligned IOs */
> +	btpci->dw.pp.bridge->ops = &bt1_pcie_ops;
> +
> +	/* AXI-interface is configured with 64-bit address bus width */
> +	ret = dma_coerce_mask_and_coherent(&btpci->dw.pp.bridge->dev,
> +					   DMA_BIT_MASK(64));
> +	if (ret) {
> +		ret = dma_set_mask_and_coherent(&btpci->dw.pp.bridge->dev,
> +						DMA_BIT_MASK(32));
> +		if (ret)
> +			return ret;
> +	}
> +
> +	/* These CSRs are in MMIO so we won't check the regmap-methods status */
> +	btpci->sys_regs =
> +		syscon_regmap_lookup_by_phandle(dev->of_node, "baikal,bt1-syscon");
> +	if (IS_ERR(btpci->sys_regs))
> +		return dev_err_probe(dev, PTR_ERR(btpci->sys_regs),
> +				     "Failed to get syscon\n");
> +
> +	/* Make sure all the required resources have been specified */
> +	for (i = 0; i < BT1_PCIE_NUM_APP_CLKS; i++) {
> +		if (!btpci->dw.app_clks[bt1_pcie_app_clks[i]].clk) {
> +			dev_err(dev, "App clocks set is incomplete\n");
> +			return -ENOENT;
> +		}
> +	}
> +
> +	for (i = 0; i < BT1_PCIE_NUM_CORE_CLKS; i++) {
> +		if (!btpci->dw.core_clks[bt1_pcie_core_clks[i]].clk) {
> +			dev_err(dev, "Core clocks set is incomplete\n");
> +			return -ENOENT;
> +		}
> +	}
> +
> +	for (i = 0; i < BT1_PCIE_NUM_APP_RSTS; i++) {
> +		if (!btpci->dw.app_rsts[bt1_pcie_app_rsts[i]].rstc) {
> +			dev_err(dev, "App resets set is incomplete\n");
> +			return -ENOENT;
> +		}
> +	}
> +
> +	for (i = 0; i < BT1_PCIE_NUM_CORE_RSTS; i++) {
> +		if (!btpci->dw.core_rsts[bt1_pcie_core_rsts[i]].rstc) {
> +			dev_err(dev, "Core resets set is incomplete\n");
> +			return -ENOENT;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static void bt1_pcie_full_stop_bus(struct bt1_pcie *btpci, bool init)
> +{
> +	struct device *dev = btpci->dw.dev;
> +	struct dw_pcie *pci = &btpci->dw;
> +	int ret;
> +
> +	/* Disable LTSSM for sure */
> +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> +			   BT1_CCU_PCIE_LTSSM_EN, 0);
> +
> +	/*
> +	 * Application reset controls are trigger-based so de-assert the core
> +	 * resets only.
> +	 */
> +	ret = reset_control_bulk_assert(DW_PCIE_NUM_CORE_RSTS, pci->core_rsts);
> +	if (ret)
> +		dev_err(dev, "Failed to assert core resets\n");
> +
> +	/*
> +	 * Clocks are disabled by default at least in accordance with the clk
> +	 * enable counter value on init stage.
> +	 */
> +	if (!init) {
> +		clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
> +
> +		clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
> +	}
> +
> +	/* The peripheral devices are unavailable anyway so reset them too */
> +	gpiod_set_value_cansleep(pci->pe_rst, 1);
> +
> +	/* Make sure the reset is settled */
> +	usleep_range(1, 10);
> +}
> +
> +/*
> + * Implements the cold reset procedure in accordance with the reference manual
> + * and available PM signals.
> + */
> +static int bt1_pcie_cold_start_bus(struct bt1_pcie *btpci)
> +{
> +	struct device *dev = btpci->dw.dev;
> +	struct dw_pcie *pci = &btpci->dw;
> +	u32 val;
> +	int ret;
> +
> +	/* First get out of the Power/Hot reset state */
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert PHY reset\n");
> +		return ret;
> +	}
> +
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert hot reset\n");
> +		goto err_assert_pwr_rst;
> +	}
> +
> +	/* Wait for the PM-core to stop requesting the PHY reset */
> +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
> +				       !(val & BT1_CCU_PCIE_REQ_PHY_RST), 1, 1000);
> +	if (ret) {
> +		dev_err(dev, "Timed out waiting for PM to stop PHY resetting\n");
> +		goto err_assert_hot_rst;
> +	}
> +
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert PHY reset\n");
> +		goto err_assert_hot_rst;
> +	}
> +
> +	/* Clocks can be now enabled, but the ref one is crucial at this stage */
> +	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
> +	if (ret) {
> +		dev_err(dev, "Failed to enable app clocks\n");
> +		goto err_assert_phy_rst;
> +	}
> +
> +	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
> +	if (ret) {
> +		dev_err(dev, "Failed to enable ref clocks\n");
> +		goto err_disable_app_clk;
> +	}
> +
> +	/* Wait for the PM to stop requesting the controller core reset */
> +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
> +				       !(val & BT1_CCU_PCIE_REQ_CORE_RST), 1, 1000);
> +	if (ret) {
> +		dev_err(dev, "Timed out waiting for PM to stop core resetting\n");
> +		goto err_disable_core_clk;
> +	}
> +
> +	/* PCS-PIPE interface and controller core can be now activated */
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert PIPE reset\n");
> +		goto err_disable_core_clk;
> +	}
> +
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert core reset\n");
> +		goto err_assert_pipe_rst;
> +	}
> +
> +	/* It's recommended to reset the core and application logic together */
> +	ret = reset_control_bulk_reset(DW_PCIE_NUM_APP_RSTS, pci->app_rsts);
> +	if (ret) {
> +		dev_err(dev, "Failed to reset app domain\n");
> +		goto err_assert_core_rst;
> +	}
> +
> +	/* Sticky/Non-sticky CSR flags can be now unreset too */
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert sticky reset\n");
> +		goto err_assert_core_rst;
> +	}
> +
> +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_NON_STICKY_RST].rstc);
> +	if (ret) {
> +		dev_err(dev, "Failed to deassert non-sticky reset\n");
> +		goto err_assert_sticky_rst;
> +	}
> +
> +	/* Activate the PCIe bus peripheral devices */
> +	gpiod_set_value_cansleep(pci->pe_rst, 0);
> +
> +	/* Make sure the state is settled (LTSSM is still disabled though) */
> +	usleep_range(1, 10);
> +
> +	return 0;
> +
> +err_assert_sticky_rst:
> +	reset_control_assert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
> +
> +err_assert_core_rst:
> +	reset_control_assert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
> +
> +err_assert_pipe_rst:
> +	reset_control_assert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
> +
> +err_disable_core_clk:
> +	clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
> +
> +err_disable_app_clk:
> +	clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
> +
> +err_assert_phy_rst:
> +	reset_control_assert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
> +
> +err_assert_hot_rst:
> +	reset_control_assert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
> +
> +err_assert_pwr_rst:
> +	reset_control_assert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
> +
> +	return ret;
> +}
> +
> +static int bt1_pcie_host_init(struct dw_pcie_rp *pp)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> +	int ret;
> +
> +	ret = bt1_pcie_get_res(btpci);
> +	if (ret)
> +		return ret;
> +
> +	bt1_pcie_full_stop_bus(btpci, true);
> +
> +	return bt1_pcie_cold_start_bus(btpci);
> +}
> +
> +static void bt1_pcie_host_deinit(struct dw_pcie_rp *pp)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> +
> +	bt1_pcie_full_stop_bus(btpci, false);
> +}
> +
> +struct dw_pcie_host_ops bt1_pcie_host_ops = {
> +	.host_init = bt1_pcie_host_init,
> +	.host_deinit = bt1_pcie_host_deinit,
> +};
> +
> +static struct bt1_pcie *bt1_pcie_create_data(struct platform_device *pdev)
> +{
> +	struct bt1_pcie *btpci;
> +
> +	btpci = devm_kzalloc(&pdev->dev, sizeof(*btpci), GFP_KERNEL);
> +	if (!btpci)
> +		return ERR_PTR(-ENOMEM);
> +
> +	btpci->pdev = pdev;
> +
> +	platform_set_drvdata(pdev, btpci);
> +
> +	return btpci;
> +}
> +
> +static int bt1_pcie_add_dw_port(struct bt1_pcie *btpci)
> +{
> +	struct device *dev = &btpci->pdev->dev;
> +	int ret;
> +
> +	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));

Didn't you do this elsewhere...

Rob
Serge Semin June 19, 2022, 8:39 p.m. UTC | #3
On Wed, Jun 15, 2022 at 11:10:45AM -0600, Rob Herring wrote:
> On Fri, Jun 10, 2022 at 11:57:05AM +0300, Serge Semin wrote:
> > Baikal-T1 SoC is equipped with DWC PCIe v4.60a host controller. It can be
> > trained to work up to Gen.3 speed over up to x4 lanes. The host controller
> > is attached to the DW PCIe 3.0 PCS via the PIPE-4 interface, which in its
> > turn is connected to the DWC 10G PHY. The whole system is supposed to be
> > fed up with four clock sources: DBI peripheral clock, AXI application
> > clocks and external PHY/core reference clock generating the 100MHz signal.
> > In addition to that the platform provide a way to reset each part of the
> > controller: sticky/non-sticky bits, host controller core, PIPE interface,
> > PCS/PHY and Hot/Power reset signal. The driver also provides a way to
> > handle the GPIO-based PERST# signal.
> > 
> > Note due to the Baikal-T1 MMIO peculiarity we have to implement the DBI
> > interface accessors which make sure the IO operations are dword-aligned.
> > 
> > Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
> > 
> > ---
> > 
> > Changelog v2:
> > - Rename 'syscon' property to 'baikal,bt1-syscon'.
> > 
> > Changelog v3:
> > - Use the clocks/resets handlers defined in the DW PCIe core descriptor.
> >   (@Rob)
> > - Redefine PCI host bridge config space accessors with the generic
> >   pci_generic_config_read32() and pci_generic_config_write32() methods.
> >   (@Rob)
> > ---
> >  drivers/pci/controller/dwc/Kconfig    |   9 +
> >  drivers/pci/controller/dwc/Makefile   |   1 +
> >  drivers/pci/controller/dwc/pcie-bt1.c | 649 ++++++++++++++++++++++++++
> >  3 files changed, 659 insertions(+)
> >  create mode 100644 drivers/pci/controller/dwc/pcie-bt1.c
> > 
> > diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
> > index 62ce3abf0f19..771b8b146623 100644
> > --- a/drivers/pci/controller/dwc/Kconfig
> > +++ b/drivers/pci/controller/dwc/Kconfig
> > @@ -222,6 +222,15 @@ config PCIE_ARTPEC6_EP
> >  	  Enables support for the PCIe controller in the ARTPEC-6 SoC to work in
> >  	  endpoint mode. This uses the DesignWare core.
> >  
> > +config PCIE_BT1
> > +	tristate "Baikal-T1 PCIe controller"
> > +	depends on MIPS_BAIKAL_T1 || COMPILE_TEST
> > +	depends on PCI_MSI_IRQ_DOMAIN
> > +	select PCIE_DW_HOST
> > +	help
> > +	  Enables support for the PCIe controller in the Baikal-T1 SoC to work
> > +	  in host mode. It's based on the Synopsys DWC PCIe v4.60a IP-core.
> > +
> >  config PCIE_ROCKCHIP_DW_HOST
> >  	bool "Rockchip DesignWare PCIe controller"
> >  	select PCIE_DW
> > diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
> > index 8ba7b67f5e50..bf5c311875a1 100644
> > --- a/drivers/pci/controller/dwc/Makefile
> > +++ b/drivers/pci/controller/dwc/Makefile
> > @@ -3,6 +3,7 @@ obj-$(CONFIG_PCIE_DW) += pcie-designware.o
> >  obj-$(CONFIG_PCIE_DW_HOST) += pcie-designware-host.o
> >  obj-$(CONFIG_PCIE_DW_EP) += pcie-designware-ep.o
> >  obj-$(CONFIG_PCIE_DW_PLAT) += pcie-designware-plat.o
> > +obj-$(CONFIG_PCIE_BT1) += pcie-bt1.o
> >  obj-$(CONFIG_PCI_DRA7XX) += pci-dra7xx.o
> >  obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o
> >  obj-$(CONFIG_PCIE_FU740) += pcie-fu740.o
> > diff --git a/drivers/pci/controller/dwc/pcie-bt1.c b/drivers/pci/controller/dwc/pcie-bt1.c
> > new file mode 100644
> > index 000000000000..03f035743b78
> > --- /dev/null
> > +++ b/drivers/pci/controller/dwc/pcie-bt1.c
> > @@ -0,0 +1,649 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2021 BAIKAL ELECTRONICS, JSC
> > + *
> > + * Authors:
> > + *   Vadim Vlasov <Vadim.Vlasov@baikalelectronics.ru>
> > + *   Serge Semin <Sergey.Semin@baikalelectronics.ru>
> > + *
> > + * Baikal-T1 PCIe controller driver
> > + */
> > +
> > +#include <linux/bitfield.h>
> > +#include <linux/bits.h>
> > +#include <linux/clk.h>
> > +#include <linux/delay.h>
> > +#include <linux/gpio/consumer.h>
> > +#include <linux/kernel.h>
> > +#include <linux/mfd/syscon.h>
> > +#include <linux/module.h>
> > +#include <linux/pci.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/regmap.h>
> > +#include <linux/reset.h>
> > +#include <linux/types.h>
> > +
> > +#include "pcie-designware.h"
> > +
> > +/* Baikal-T1 System CCU control registers */
> > +#define BT1_CCU_PCIE_CLKC			0x140
> > +#define BT1_CCU_PCIE_REQ_PCS_CLK		BIT(16)
> > +#define BT1_CCU_PCIE_REQ_MAC_CLK		BIT(17)
> > +#define BT1_CCU_PCIE_REQ_PIPE_CLK		BIT(18)
> > +
> > +#define BT1_CCU_PCIE_RSTC			0x144
> > +#define BT1_CCU_PCIE_REQ_LINK_RST		BIT(13)
> > +#define BT1_CCU_PCIE_REQ_SMLH_RST		BIT(14)
> > +#define BT1_CCU_PCIE_REQ_PHY_RST		BIT(16)
> > +#define BT1_CCU_PCIE_REQ_CORE_RST		BIT(24)
> > +#define BT1_CCU_PCIE_REQ_STICKY_RST		BIT(26)
> > +#define BT1_CCU_PCIE_REQ_NSTICKY_RST		BIT(27)
> > +
> > +#define BT1_CCU_PCIE_PMSC			0x148
> > +#define BT1_CCU_PCIE_LTSSM_STATE_MASK		GENMASK(5, 0)
> > +#define BT1_CCU_PCIE_LTSSM_DET_QUIET		0x00
> > +#define BT1_CCU_PCIE_LTSSM_DET_ACT		0x01
> > +#define BT1_CCU_PCIE_LTSSM_POLL_ACT		0x02
> > +#define BT1_CCU_PCIE_LTSSM_POLL_COMP		0x03
> > +#define BT1_CCU_PCIE_LTSSM_POLL_CONF		0x04
> > +#define BT1_CCU_PCIE_LTSSM_PRE_DET_QUIET	0x05
> > +#define BT1_CCU_PCIE_LTSSM_DET_WAIT		0x06
> > +#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_START	0x07
> > +#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_ACEPT	0x08
> > +#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_WAIT	0x09
> > +#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_ACEPT	0x0a
> > +#define BT1_CCU_PCIE_LTSSM_CFG_COMPLETE		0x0b
> > +#define BT1_CCU_PCIE_LTSSM_CFG_IDLE		0x0c
> > +#define BT1_CCU_PCIE_LTSSM_RCVR_LOCK		0x0d
> > +#define BT1_CCU_PCIE_LTSSM_RCVR_SPEED		0x0e
> > +#define BT1_CCU_PCIE_LTSSM_RCVR_RCVRCFG		0x0f
> > +#define BT1_CCU_PCIE_LTSSM_RCVR_IDLE		0x10
> > +#define BT1_CCU_PCIE_LTSSM_L0			0x11
> > +#define BT1_CCU_PCIE_LTSSM_L0S			0x12
> > +#define BT1_CCU_PCIE_LTSSM_L123_SEND_IDLE	0x13
> > +#define BT1_CCU_PCIE_LTSSM_L1_IDLE		0x14
> > +#define BT1_CCU_PCIE_LTSSM_L2_IDLE		0x15
> > +#define BT1_CCU_PCIE_LTSSM_L2_WAKE		0x16
> > +#define BT1_CCU_PCIE_LTSSM_DIS_ENTRY		0x17
> > +#define BT1_CCU_PCIE_LTSSM_DIS_IDLE		0x18
> > +#define BT1_CCU_PCIE_LTSSM_DISABLE		0x19
> > +#define BT1_CCU_PCIE_LTSSM_LPBK_ENTRY		0x1a
> > +#define BT1_CCU_PCIE_LTSSM_LPBK_ACTIVE		0x1b
> > +#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT		0x1c
> > +#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT_TOUT	0x1d
> > +#define BT1_CCU_PCIE_LTSSM_HOT_RST_ENTRY	0x1e
> > +#define BT1_CCU_PCIE_LTSSM_HOT_RST		0x1f
> > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ0		0x20
> > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ1		0x21
> > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ2		0x22
> > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ3		0x23
> > +#define BT1_CCU_PCIE_SMLH_LINKUP		BIT(6)
> > +#define BT1_CCU_PCIE_RDLH_LINKUP		BIT(7)
> > +#define BT1_CCU_PCIE_PM_LINKSTATE_L0S		BIT(8)
> > +#define BT1_CCU_PCIE_PM_LINKSTATE_L1		BIT(9)
> > +#define BT1_CCU_PCIE_PM_LINKSTATE_L2		BIT(10)
> > +#define BT1_CCU_PCIE_L1_PENDING			BIT(12)
> > +#define BT1_CCU_PCIE_REQ_EXIT_L1		BIT(14)
> > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ		BIT(15)
> > +#define BT1_CCU_PCIE_PM_DSTAT_MASK		GENMASK(18, 16)
> > +#define BT1_CCU_PCIE_PM_PME_EN			BIT(20)
> > +#define BT1_CCU_PCIE_PM_PME_STATUS		BIT(21)
> > +#define BT1_CCU_PCIE_AUX_PM_EN			BIT(22)
> > +#define BT1_CCU_PCIE_AUX_PWR_DET		BIT(23)
> > +#define BT1_CCU_PCIE_WAKE_DET			BIT(24)
> > +#define BT1_CCU_PCIE_TURNOFF_REQ		BIT(30)
> > +#define BT1_CCU_PCIE_TURNOFF_ACK		BIT(31)
> > +
> > +#define BT1_CCU_PCIE_GENC			0x14c
> > +#define BT1_CCU_PCIE_LTSSM_EN			BIT(1)
> > +#define BT1_CCU_PCIE_DBI2_MODE			BIT(2)
> > +#define BT1_CCU_PCIE_MGMT_EN			BIT(3)
> > +#define BT1_CCU_PCIE_RXLANE_FLIP_EN		BIT(16)
> > +#define BT1_CCU_PCIE_TXLANE_FLIP_EN		BIT(17)
> > +#define BT1_CCU_PCIE_SLV_XFER_PEND		BIT(24)
> > +#define BT1_CCU_PCIE_RCV_XFER_PEND		BIT(25)
> > +#define BT1_CCU_PCIE_DBI_XFER_PEND		BIT(26)
> > +#define BT1_CCU_PCIE_DMA_XFER_PEND		BIT(27)
> > +
> > +#define BT1_CCU_PCIE_LTSSM_LINKUP(_pmsc) \
> > +({ \
> > +	int __state = FIELD_GET(BT1_CCU_PCIE_LTSSM_STATE_MASK, _pmsc); \
> > +	__state >= BT1_CCU_PCIE_LTSSM_L0 && __state <= BT1_CCU_PCIE_LTSSM_L2_WAKE; \
> > +})
> > +
> > +/* Baikal-T1 PCIe specific control registers */
> > +#define BT1_PCIE_AXI2MGM_LANENUM		0xd04
> > +#define BT1_PCIE_AXI2MGM_LANESEL_MASK		GENMASK(3, 0)
> > +
> > +#define BT1_PCIE_AXI2MGM_ADDRCTL		0xd08
> > +#define BT1_PCIE_AXI2MGM_PHYREG_ADDR_MASK	GENMASK(20, 0)
> > +#define BT1_PCIE_AXI2MGM_READ_FLAG		BIT(29)
> > +#define BT1_PCIE_AXI2MGM_DONE			BIT(30)
> > +#define BT1_PCIE_AXI2MGM_BUSY			BIT(31)
> > +
> > +#define BT1_PCIE_AXI2MGM_WRITEDATA		0xd0c
> > +#define BT1_PCIE_AXI2MGM_WDATA			GENMASK(15, 0)
> > +
> > +#define BT1_PCIE_AXI2MGM_READDATA		0xd10
> > +#define BT1_PCIE_AXI2MGM_RDATA			GENMASK(15, 0)
> > +
> > +/* Generic Baikal-T1 PCIe interface resources */
> > +#define BT1_PCIE_NUM_APP_CLKS			ARRAY_SIZE(bt1_pcie_app_clks)
> > +#define BT1_PCIE_NUM_CORE_CLKS			ARRAY_SIZE(bt1_pcie_core_clks)
> > +#define BT1_PCIE_NUM_APP_RSTS			ARRAY_SIZE(bt1_pcie_app_rsts)
> > +#define BT1_PCIE_NUM_CORE_RSTS			ARRAY_SIZE(bt1_pcie_core_rsts)
> > +
> > +static const enum dw_pcie_app_clk bt1_pcie_app_clks[] = {
> > +	DW_PCIE_DBI_CLK, DW_PCIE_MSTR_CLK, DW_PCIE_SLV_CLK,
> > +};
> > +
> > +static const enum dw_pcie_core_clk bt1_pcie_core_clks[] = {
> > +	DW_PCIE_REF_CLK,
> > +};
> > +
> > +static const enum dw_pcie_app_rst bt1_pcie_app_rsts[] = {
> > +	DW_PCIE_MSTR_RST, DW_PCIE_SLV_RST,
> > +};
> > +
> > +static const enum dw_pcie_core_rst bt1_pcie_core_rsts[] = {
> > +	DW_PCIE_NON_STICKY_RST, DW_PCIE_STICKY_RST, DW_PCIE_CORE_RST,
> > +	DW_PCIE_PIPE_RST, DW_PCIE_PHY_RST, DW_PCIE_HOT_RST, DW_PCIE_PWR_RST,
> > +};
> > +
> > +struct bt1_pcie {
> > +	struct dw_pcie dw;
> > +	struct platform_device *pdev;
> > +	struct regmap *sys_regs;
> > +};
> > +#define to_bt1_pcie(_dw) container_of(_dw, struct bt1_pcie, dw)
> > +
> > +/*
> > + * Baikal-T1 MMIO space must be read/written by the dword-aligned
> > + * instructions. Note the methods are optimized to have the dword operations
> > + * performed with minimum overhead as the most frequently used ones.
> > + */
> > +static int bt1_pcie_read_mmio(void __iomem *addr, int size, u32 *val)
> > +{
> > +	unsigned int ofs = (uintptr_t)addr & 0x3;
> > +
> > +	if (!IS_ALIGNED((uintptr_t)addr, size))
> > +		return PCIBIOS_BAD_REGISTER_NUMBER;
> 

> This isn't for PCI config space accessors, don't use PCIBIOS_*. We 
> really want to get rid of those.

Ok. I'll drop the PCIBIOS_* macros usage from here.

> 
> You are in control of all the accesses, so the error conditions should 
> never happen, don't need to be checked, and don't need to be returned.

not entirely. I'd prefer to keep the error-condition check here because the
methods are the core of the callbacks called from the generic part of the
DW PCIe driver which if get to regress in making the unsupported IO
accesses will be easier to debug should the sanity check is performed.

> 
> > +
> > +	*val = readl(addr - ofs) >> ofs * BITS_PER_BYTE;
> > +	if (size == 4) {
> > +		return PCIBIOS_SUCCESSFUL;
> > +	} else if (size == 2) {
> > +		*val &= 0xffff;
> > +		return PCIBIOS_SUCCESSFUL;
> > +	} else if (size == 1) {
> > +		*val &= 0xff;
> > +		return PCIBIOS_SUCCESSFUL;
> > +	}
> > +
> > +	return PCIBIOS_BAD_REGISTER_NUMBER;
> > +}
> > +
> > +static int bt1_pcie_write_mmio(void __iomem *addr, int size, u32 val)
> > +{
> > +	unsigned int ofs = (uintptr_t)addr & 0x3;
> > +	u32 tmp, mask;
> > +
> > +	if (!IS_ALIGNED((uintptr_t)addr, size))
> > +		return PCIBIOS_BAD_REGISTER_NUMBER;
> > +
> > +	if (size == 4) {
> > +		writel(val, addr);
> > +		return PCIBIOS_SUCCESSFUL;
> > +	} else if (size == 2 || size == 1) {
> > +		mask = GENMASK(size * BITS_PER_BYTE - 1, 0);
> > +		tmp = readl(addr - ofs) & ~(mask << ofs * BITS_PER_BYTE);
> > +		tmp |= (val & mask) << ofs * BITS_PER_BYTE;
> > +		writel(tmp, addr - ofs);
> > +		return PCIBIOS_SUCCESSFUL;
> > +	}
> > +
> > +	return PCIBIOS_BAD_REGISTER_NUMBER;
> > +}
> > +
> > +static u32 bt1_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
> > +			     size_t size)
> > +{
> > +	int ret;
> > +	u32 val;
> > +
> > +	ret = bt1_pcie_read_mmio(base + reg, size, &val);
> > +	if (ret != PCIBIOS_SUCCESSFUL) {
> > +		dev_err(pci->dev, "Read DBI address failed\n");
> > +		return ~0U;
> > +	}
> > +
> > +	return val;
> > +}
> > +
> > +static void bt1_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
> > +			       size_t size, u32 val)
> > +{
> > +	int ret;
> > +
> > +	ret = bt1_pcie_write_mmio(base + reg, size, val);
> > +	if (ret != PCIBIOS_SUCCESSFUL)
> > +		dev_err(pci->dev, "Write DBI address failed\n");
> > +}
> > +
> > +static void bt1_pcie_write_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg,
> > +				size_t size, u32 val)
> > +{
> > +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> > +	int ret;
> > +
> > +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> > +			   BT1_CCU_PCIE_DBI2_MODE, BT1_CCU_PCIE_DBI2_MODE);
> > +
> > +	ret = bt1_pcie_write_mmio(base + reg, size, val);
> > +	if (ret != PCIBIOS_SUCCESSFUL)
> > +		dev_err(pci->dev, "Write DBI2 address failed\n");
> > +
> > +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> > +			   BT1_CCU_PCIE_DBI2_MODE, 0);
> > +}
> > +
> > +static int bt1_pcie_start_ltssm(struct dw_pcie *pci)
> > +{
> > +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> > +	u32 val;
> > +	int ret;
> > +
> > +	/*
> > +	 * Enable LTSSM and make sure it was able to establish both PHY and
> > +	 * data links. This procedure shall work fine to reach 2.5 GT/s speed.
> > +	 */
> > +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> > +			   BT1_CCU_PCIE_LTSSM_EN, BT1_CCU_PCIE_LTSSM_EN);
> > +
> > +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
> > +				       (val & BT1_CCU_PCIE_SMLH_LINKUP),
> > +				       1000, 1000000);
> > +	if (ret) {
> > +		dev_err(pci->dev, "LTSSM failed to set PHY link up\n");
> > +		return ret;
> > +	}
> > +
> > +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
> > +				       (val & BT1_CCU_PCIE_RDLH_LINKUP),
> > +				       1000, 1000000);
> > +	if (ret) {
> > +		dev_err(pci->dev, "LTSSM failed to set data link up\n");
> > +		return ret;
> > +	}
> > +
> > +	/*
> > +	 * Activate direct speed change after the link is established in an
> > +	 * attempt to reach a higher bus performance (up to Gen.3 - 8.0 GT/s).
> > +	 * This is required at least to get 8.0 GT/s speed.
> > +	 */
> > +	val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
> > +	val |= PORT_LOGIC_SPEED_CHANGE;
> > +	dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> > +
> > +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
> > +				       BT1_CCU_PCIE_LTSSM_LINKUP(val),
> > +				       1000, 1000000);
> > +	if (ret)
> > +		dev_err(pci->dev, "LTSSM failed to get into L0 state\n");
> > +
> > +	return ret;
> > +}
> > +
> > +static void bt1_pcie_stop_ltssm(struct dw_pcie *pci)
> > +{
> > +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> > +
> > +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> > +			   BT1_CCU_PCIE_LTSSM_EN, 0);
> > +}
> > +
> > +struct dw_pcie_ops bt1_pcie_dw_ops = {
> > +	.read_dbi = bt1_pcie_read_dbi,
> > +	.write_dbi = bt1_pcie_write_dbi,
> > +	.write_dbi2 = bt1_pcie_write_dbi2,
> > +	.start_link = bt1_pcie_start_ltssm,
> > +	.stop_link = bt1_pcie_stop_ltssm,
> > +};
> > +
> > +static struct pci_ops bt1_pcie_ops = {
> > +	.map_bus = dw_pcie_own_conf_map_bus,
> > +	.read = pci_generic_config_read32,
> > +	.write = pci_generic_config_write32,
> > +};
> > +
> > +static int bt1_pcie_get_res(struct bt1_pcie *btpci)
> > +{
> > +	struct device *dev = btpci->dw.dev;
> > +	int i, ret;
> > +
> > +	/* DBI access is supposed to be performed by the dword-aligned IOs */
> > +	btpci->dw.pp.bridge->ops = &bt1_pcie_ops;
> > +
> > +	/* AXI-interface is configured with 64-bit address bus width */
> > +	ret = dma_coerce_mask_and_coherent(&btpci->dw.pp.bridge->dev,
> > +					   DMA_BIT_MASK(64));
> > +	if (ret) {
> > +		ret = dma_set_mask_and_coherent(&btpci->dw.pp.bridge->dev,
> > +						DMA_BIT_MASK(32));
> > +		if (ret)
> > +			return ret;
> > +	}
> > +
> > +	/* These CSRs are in MMIO so we won't check the regmap-methods status */
> > +	btpci->sys_regs =
> > +		syscon_regmap_lookup_by_phandle(dev->of_node, "baikal,bt1-syscon");
> > +	if (IS_ERR(btpci->sys_regs))
> > +		return dev_err_probe(dev, PTR_ERR(btpci->sys_regs),
> > +				     "Failed to get syscon\n");
> > +
> > +	/* Make sure all the required resources have been specified */
> > +	for (i = 0; i < BT1_PCIE_NUM_APP_CLKS; i++) {
> > +		if (!btpci->dw.app_clks[bt1_pcie_app_clks[i]].clk) {
> > +			dev_err(dev, "App clocks set is incomplete\n");
> > +			return -ENOENT;
> > +		}
> > +	}
> > +
> > +	for (i = 0; i < BT1_PCIE_NUM_CORE_CLKS; i++) {
> > +		if (!btpci->dw.core_clks[bt1_pcie_core_clks[i]].clk) {
> > +			dev_err(dev, "Core clocks set is incomplete\n");
> > +			return -ENOENT;
> > +		}
> > +	}
> > +
> > +	for (i = 0; i < BT1_PCIE_NUM_APP_RSTS; i++) {
> > +		if (!btpci->dw.app_rsts[bt1_pcie_app_rsts[i]].rstc) {
> > +			dev_err(dev, "App resets set is incomplete\n");
> > +			return -ENOENT;
> > +		}
> > +	}
> > +
> > +	for (i = 0; i < BT1_PCIE_NUM_CORE_RSTS; i++) {
> > +		if (!btpci->dw.core_rsts[bt1_pcie_core_rsts[i]].rstc) {
> > +			dev_err(dev, "Core resets set is incomplete\n");
> > +			return -ENOENT;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void bt1_pcie_full_stop_bus(struct bt1_pcie *btpci, bool init)
> > +{
> > +	struct device *dev = btpci->dw.dev;
> > +	struct dw_pcie *pci = &btpci->dw;
> > +	int ret;
> > +
> > +	/* Disable LTSSM for sure */
> > +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> > +			   BT1_CCU_PCIE_LTSSM_EN, 0);
> > +
> > +	/*
> > +	 * Application reset controls are trigger-based so de-assert the core
> > +	 * resets only.
> > +	 */
> > +	ret = reset_control_bulk_assert(DW_PCIE_NUM_CORE_RSTS, pci->core_rsts);
> > +	if (ret)
> > +		dev_err(dev, "Failed to assert core resets\n");
> > +
> > +	/*
> > +	 * Clocks are disabled by default at least in accordance with the clk
> > +	 * enable counter value on init stage.
> > +	 */
> > +	if (!init) {
> > +		clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
> > +
> > +		clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
> > +	}
> > +
> > +	/* The peripheral devices are unavailable anyway so reset them too */
> > +	gpiod_set_value_cansleep(pci->pe_rst, 1);
> > +
> > +	/* Make sure the reset is settled */
> > +	usleep_range(1, 10);
> > +}
> > +
> > +/*
> > + * Implements the cold reset procedure in accordance with the reference manual
> > + * and available PM signals.
> > + */
> > +static int bt1_pcie_cold_start_bus(struct bt1_pcie *btpci)
> > +{
> > +	struct device *dev = btpci->dw.dev;
> > +	struct dw_pcie *pci = &btpci->dw;
> > +	u32 val;
> > +	int ret;
> > +
> > +	/* First get out of the Power/Hot reset state */
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert PHY reset\n");
> > +		return ret;
> > +	}
> > +
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert hot reset\n");
> > +		goto err_assert_pwr_rst;
> > +	}
> > +
> > +	/* Wait for the PM-core to stop requesting the PHY reset */
> > +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
> > +				       !(val & BT1_CCU_PCIE_REQ_PHY_RST), 1, 1000);
> > +	if (ret) {
> > +		dev_err(dev, "Timed out waiting for PM to stop PHY resetting\n");
> > +		goto err_assert_hot_rst;
> > +	}
> > +
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert PHY reset\n");
> > +		goto err_assert_hot_rst;
> > +	}
> > +
> > +	/* Clocks can be now enabled, but the ref one is crucial at this stage */
> > +	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to enable app clocks\n");
> > +		goto err_assert_phy_rst;
> > +	}
> > +
> > +	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to enable ref clocks\n");
> > +		goto err_disable_app_clk;
> > +	}
> > +
> > +	/* Wait for the PM to stop requesting the controller core reset */
> > +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
> > +				       !(val & BT1_CCU_PCIE_REQ_CORE_RST), 1, 1000);
> > +	if (ret) {
> > +		dev_err(dev, "Timed out waiting for PM to stop core resetting\n");
> > +		goto err_disable_core_clk;
> > +	}
> > +
> > +	/* PCS-PIPE interface and controller core can be now activated */
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert PIPE reset\n");
> > +		goto err_disable_core_clk;
> > +	}
> > +
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert core reset\n");
> > +		goto err_assert_pipe_rst;
> > +	}
> > +
> > +	/* It's recommended to reset the core and application logic together */
> > +	ret = reset_control_bulk_reset(DW_PCIE_NUM_APP_RSTS, pci->app_rsts);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to reset app domain\n");
> > +		goto err_assert_core_rst;
> > +	}
> > +
> > +	/* Sticky/Non-sticky CSR flags can be now unreset too */
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert sticky reset\n");
> > +		goto err_assert_core_rst;
> > +	}
> > +
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_NON_STICKY_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert non-sticky reset\n");
> > +		goto err_assert_sticky_rst;
> > +	}
> > +
> > +	/* Activate the PCIe bus peripheral devices */
> > +	gpiod_set_value_cansleep(pci->pe_rst, 0);
> > +
> > +	/* Make sure the state is settled (LTSSM is still disabled though) */
> > +	usleep_range(1, 10);
> > +
> > +	return 0;
> > +
> > +err_assert_sticky_rst:
> > +	reset_control_assert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
> > +
> > +err_assert_core_rst:
> > +	reset_control_assert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
> > +
> > +err_assert_pipe_rst:
> > +	reset_control_assert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
> > +
> > +err_disable_core_clk:
> > +	clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
> > +
> > +err_disable_app_clk:
> > +	clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
> > +
> > +err_assert_phy_rst:
> > +	reset_control_assert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
> > +
> > +err_assert_hot_rst:
> > +	reset_control_assert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
> > +
> > +err_assert_pwr_rst:
> > +	reset_control_assert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
> > +
> > +	return ret;
> > +}
> > +
> > +static int bt1_pcie_host_init(struct dw_pcie_rp *pp)
> > +{
> > +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> > +	int ret;
> > +
> > +	ret = bt1_pcie_get_res(btpci);
> > +	if (ret)
> > +		return ret;
> > +
> > +	bt1_pcie_full_stop_bus(btpci, true);
> > +
> > +	return bt1_pcie_cold_start_bus(btpci);
> > +}
> > +
> > +static void bt1_pcie_host_deinit(struct dw_pcie_rp *pp)
> > +{
> > +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> > +
> > +	bt1_pcie_full_stop_bus(btpci, false);
> > +}
> > +
> > +struct dw_pcie_host_ops bt1_pcie_host_ops = {
> > +	.host_init = bt1_pcie_host_init,
> > +	.host_deinit = bt1_pcie_host_deinit,
> > +};
> > +
> > +static struct bt1_pcie *bt1_pcie_create_data(struct platform_device *pdev)
> > +{
> > +	struct bt1_pcie *btpci;
> > +
> > +	btpci = devm_kzalloc(&pdev->dev, sizeof(*btpci), GFP_KERNEL);
> > +	if (!btpci)
> > +		return ERR_PTR(-ENOMEM);
> > +
> > +	btpci->pdev = pdev;
> > +
> > +	platform_set_drvdata(pdev, btpci);
> > +
> > +	return btpci;
> > +}
> > +
> > +static int bt1_pcie_add_dw_port(struct bt1_pcie *btpci)
> > +{
> > +	struct device *dev = &btpci->pdev->dev;
> > +	int ret;
> > +
> > +	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
> 

> Didn't you do this elsewhere...

No. The core DW PCIe driver doesn't do that.

-Sergey

> 
> Rob
Serge Semin June 20, 2022, 5:13 p.m. UTC | #4
On Wed, Jun 15, 2022 at 11:48:48AM -0500, Bjorn Helgaas wrote:
> On Fri, Jun 10, 2022 at 11:57:05AM +0300, Serge Semin wrote:
> > Baikal-T1 SoC is equipped with DWC PCIe v4.60a host controller. It can be
> > trained to work up to Gen.3 speed over up to x4 lanes. The host controller
> > is attached to the DW PCIe 3.0 PCS via the PIPE-4 interface, which in its
> > turn is connected to the DWC 10G PHY. The whole system is supposed to be
> > fed up with four clock sources: DBI peripheral clock, AXI application
> > clocks and external PHY/core reference clock generating the 100MHz signal.
> > In addition to that the platform provide a way to reset each part of the
> > controller: sticky/non-sticky bits, host controller core, PIPE interface,
> > PCS/PHY and Hot/Power reset signal. The driver also provides a way to
> > handle the GPIO-based PERST# signal.
> > 
> > Note due to the Baikal-T1 MMIO peculiarity we have to implement the DBI
> > interface accessors which make sure the IO operations are dword-aligned.
> > 
> > Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
> 
> > +static int bt1_pcie_start_ltssm(struct dw_pcie *pci)
> > +{
> > +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> > +	u32 val;
> > +	int ret;
> > +
> > +	/*
> > +	 * Enable LTSSM and make sure it was able to establish both PHY and
> > +	 * data links. This procedure shall work fine to reach 2.5 GT/s speed.
> > +	 */
> > +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> > +			   BT1_CCU_PCIE_LTSSM_EN, BT1_CCU_PCIE_LTSSM_EN);
> > +
> > +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
> > +				       (val & BT1_CCU_PCIE_SMLH_LINKUP),
> > +				       1000, 1000000);
> > +	if (ret) {
> > +		dev_err(pci->dev, "LTSSM failed to set PHY link up\n");
> > +		return ret;
> > +	}
> > +
> > +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
> > +				       (val & BT1_CCU_PCIE_RDLH_LINKUP),
> > +				       1000, 1000000);
> > +	if (ret) {
> > +		dev_err(pci->dev, "LTSSM failed to set data link up\n");
> > +		return ret;
> > +	}
> > +
> > +	/*
> > +	 * Activate direct speed change after the link is established in an
> > +	 * attempt to reach a higher bus performance (up to Gen.3 - 8.0 GT/s).
> > +	 * This is required at least to get 8.0 GT/s speed.
> > +	 */
> > +	val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
> > +	val |= PORT_LOGIC_SPEED_CHANGE;
> > +	dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
> > +
> > +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
> > +				       BT1_CCU_PCIE_LTSSM_LINKUP(val),
> > +				       1000, 1000000);
> > +	if (ret)
> > +		dev_err(pci->dev, "LTSSM failed to get into L0 state\n");
> > +
> > +	return ret;
> > +}
> > +
> > +static void bt1_pcie_stop_ltssm(struct dw_pcie *pci)
> > +{
> > +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> > +
> > +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> > +			   BT1_CCU_PCIE_LTSSM_EN, 0);
> > +}
> > +
> > +struct dw_pcie_ops bt1_pcie_dw_ops = {
> > +	.read_dbi = bt1_pcie_read_dbi,
> > +	.write_dbi = bt1_pcie_write_dbi,
> > +	.write_dbi2 = bt1_pcie_write_dbi2,
> > +	.start_link = bt1_pcie_start_ltssm,
> > +	.stop_link = bt1_pcie_stop_ltssm,
> > +};
> 

> Should be static and const. 

Right. No idea how come this has slipped in through my hands.

> Please rename to "dw_pcie_ops" as most
> drivers use. 

IMO matching the structure and its instance names is not a good idea.
Other than confusing objects nature, at the very least it forces you to
violate the local namespace convention. Thus in the line of the
dw_pcie->ops initialization it looks like you use some generic
operations while in fact you just refer to the locally defined
DW PCIe ops instance with the generic variable name. Moreover AFAICS
the latest platform drivers mainly use the vendor-specific prefix in
the dw_pcie_ops structure instance including the ones acked by you,
Lorenzo and Gustavo. What makes my code any different from them?

> Please rename bt1_pcie_start_ltssm() and
> bt1_pcie_stop_ltssm() to bt1_pcie_start_link() and
> bt1_pcie_stop_link() for consistency with other drivers to make
> maintenance easier.

I believe there were no such requirement to use the particular suffix
in these callbacks, but it turned to be a nice coincident that almost
all the drivers have used the same naming convention.) Anyway let's
not brake the naturally evolved harmony and use the same suffixes in
my driver too. Thanks for noticing this.

> 
> > +static struct pci_ops bt1_pcie_ops = {
> > +	.map_bus = dw_pcie_own_conf_map_bus,
> > +	.read = pci_generic_config_read32,
> > +	.write = pci_generic_config_write32,
> > +};
> > +
> > +static int bt1_pcie_get_res(struct bt1_pcie *btpci)
> 

> Can you name this something similar to what other drivers use?  There
> are a couple *_pcie_get_resources() functions (normally called from
> *_pcie_probe()), but no *_get_res() yet.

Earlier in this patchset I've introduced a new method to get
the CSRs ranges, PCIe speed, NoF lanes, etc resources. See the patch:
[PATCH v3 14/17] PCI: dwc: Introduce generic resources getter
The method has been named as "dw_pcie_get_res()". So the locally
defined function has been named to refer to that method. If you think
that using the "_resources" suffix is better (IMO there is no
significant difference) then we'll need to change the name there too.
Do you?

> 
> > +{
> > +	struct device *dev = btpci->dw.dev;
> > +	int i, ret;
> > +
> > +	/* DBI access is supposed to be performed by the dword-aligned IOs */
> > +	btpci->dw.pp.bridge->ops = &bt1_pcie_ops;
> > +
> > +	/* AXI-interface is configured with 64-bit address bus width */
> > +	ret = dma_coerce_mask_and_coherent(&btpci->dw.pp.bridge->dev,
> > +					   DMA_BIT_MASK(64));
> 

> Just to double-check since this is the first instance of
> dma_coerce_mask_and_coherent() in drivers/pci -- I guess Baikal-T1 is
> unique in needing this?

To be honest I've set it here just in case, seeing the dma_mask and
coherent_dma_mask are left uninitialized in the Host bridge device
instance, while it's still participate in the PCI devices hierarchy:

1. platform_device.dev;
                   | (<= devm_pci_alloc_host_bridge(dev))
                   +---+
                      &v
2. pci_host_bridge.dev.parent
                   | (<= pci_register_host_bridge(bridge) or)
                   | (<= pci_alloc_child_bus()              )
                  &v
           pci_bus.bridge
                   +-------------------+
                   |                   | (<= pci_setup_device())
                   v                   v
3.     pci_bus.dev.parent  pci_dev.dev.parent
                           pci_dev.dma_mask = 0xffffffff;
                                   | (<= pci_device_add())
                                   +----+
                                       &v
                           pci_dev->dev.dma_mask
                           pci_dev->dev.coherent_dma_mask = 0xffffffffull;

So each device detected on the very first PCIe bus gets to have the
PCI host bridge device as a parent. But AFAICS the PCI subsystem core
code doesn't use the PCI host bridge DMA-mask and by default the
dma_mask/coherent_dma_mask fields of each PCIe peripheral device are
unconditionally initialized with DMA_BIT_MASK(32) (they are supposed
to be overridden by the device-driver anyway). So to speak we can
freely drop the dma_coerce_mask_and_coherent() method invocation from
my driver if you say it is required and the PCI host bridge DMA parameter
will never be used. What do you think?

As a side note regarding the way the DMA-capability is initialized in
the kernel PCI subsystem. The real magic happens in the
bus->dma_configure() callback, which in case of the OF-based platform
parses the dma-ranges property of the parental OF-nodes in order to
get the dev->bus_dma_limit and dev->dma_range_map maps - the maps of
the CPU memory as seen by the PCIe devices. So in case of the direct
DMA mapping both dev->dma_range_map and dev->dma_mask get to be
important since the smallest non-zero value is taken as the upper
limit of the device DMA-capability (see the dma_capable() inliner
implementation). So to speak my worries were related with the
dev->dma_mask usage. Should some driver/PCI subsystem core
dereferences it taken from the PCI host bridge, the kernel will crash.
That's why I've added the dma_coerce_mask_and_coherent() method
invocation here in the first place.

> 
> > +	if (ret) {
> > +		ret = dma_set_mask_and_coherent(&btpci->dw.pp.bridge->dev,
> > +						DMA_BIT_MASK(32));
> 

> Also the first instance of dma_set_mask_and_coherent() in dwc-based
> drivers, so double-checking here, too.

I can drop it if you insist so. (Please see my comment above.)

> 
> > +		if (ret)
> > +			return ret;
> > +	}
> > +
> > +	/* These CSRs are in MMIO so we won't check the regmap-methods status */
> > +	btpci->sys_regs =
> > +		syscon_regmap_lookup_by_phandle(dev->of_node, "baikal,bt1-syscon");
> > +	if (IS_ERR(btpci->sys_regs))
> > +		return dev_err_probe(dev, PTR_ERR(btpci->sys_regs),
> > +				     "Failed to get syscon\n");
> > +
> > +	/* Make sure all the required resources have been specified */
> > +	for (i = 0; i < BT1_PCIE_NUM_APP_CLKS; i++) {
> > +		if (!btpci->dw.app_clks[bt1_pcie_app_clks[i]].clk) {
> > +			dev_err(dev, "App clocks set is incomplete\n");
> > +			return -ENOENT;
> > +		}
> > +	}
> > +
> > +	for (i = 0; i < BT1_PCIE_NUM_CORE_CLKS; i++) {
> > +		if (!btpci->dw.core_clks[bt1_pcie_core_clks[i]].clk) {
> > +			dev_err(dev, "Core clocks set is incomplete\n");
> > +			return -ENOENT;
> > +		}
> > +	}
> > +
> > +	for (i = 0; i < BT1_PCIE_NUM_APP_RSTS; i++) {
> > +		if (!btpci->dw.app_rsts[bt1_pcie_app_rsts[i]].rstc) {
> > +			dev_err(dev, "App resets set is incomplete\n");
> > +			return -ENOENT;
> > +		}
> > +	}
> > +
> > +	for (i = 0; i < BT1_PCIE_NUM_CORE_RSTS; i++) {
> > +		if (!btpci->dw.core_rsts[bt1_pcie_core_rsts[i]].rstc) {
> > +			dev_err(dev, "Core resets set is incomplete\n");
> > +			return -ENOENT;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +

> > +static void bt1_pcie_full_stop_bus(struct bt1_pcie *btpci, bool init)
> 
> Can you name this something similar to what other drivers use?

For instance? (Please note, the link_stop/link_start callbacks are
defined as separate methods above.) The current names correctly describe
the methods logic. So I wouldn't want to fully change their names.

> 
> > +{
> > +	struct device *dev = btpci->dw.dev;
> > +	struct dw_pcie *pci = &btpci->dw;
> > +	int ret;
> > +
> > +	/* Disable LTSSM for sure */
> > +	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
> > +			   BT1_CCU_PCIE_LTSSM_EN, 0);
> > +
> > +	/*
> > +	 * Application reset controls are trigger-based so de-assert the core
> > +	 * resets only.
> > +	 */
> > +	ret = reset_control_bulk_assert(DW_PCIE_NUM_CORE_RSTS, pci->core_rsts);
> > +	if (ret)
> > +		dev_err(dev, "Failed to assert core resets\n");
> > +
> > +	/*
> > +	 * Clocks are disabled by default at least in accordance with the clk
> > +	 * enable counter value on init stage.
> > +	 */
> > +	if (!init) {
> > +		clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
> > +
> > +		clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
> > +	}
> > +
> > +	/* The peripheral devices are unavailable anyway so reset them too */
> > +	gpiod_set_value_cansleep(pci->pe_rst, 1);
> > +
> > +	/* Make sure the reset is settled */
> > +	usleep_range(1, 10);
> 

> Is this duration related to something in the PCIe spec?  Or the DWC
> spec? 

No. These durations are the chip-specific. Partly due to them being
specific for each SoC we can't implement a generic bus reset
procedure.

> I'd really like to use named constants when possible, although
> we have a ton of bare magic numbers currently.
> 
> Similar for the poll timeouts and the "state settled" sleep below.

I don't really see much need in this parametrization since these
numbers are used only once in the platform driver and their
application is easily inferable from the code context.

> 
> > +}
> > +
> > +/*
> > + * Implements the cold reset procedure in accordance with the reference manual
> > + * and available PM signals.
> > + */
> > +static int bt1_pcie_cold_start_bus(struct bt1_pcie *btpci)
> > +{
> > +	struct device *dev = btpci->dw.dev;
> > +	struct dw_pcie *pci = &btpci->dw;
> > +	u32 val;
> > +	int ret;
> > +
> > +	/* First get out of the Power/Hot reset state */
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert PHY reset\n");
> > +		return ret;
> > +	}
> > +
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert hot reset\n");
> > +		goto err_assert_pwr_rst;
> > +	}
> > +
> > +	/* Wait for the PM-core to stop requesting the PHY reset */
> > +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
> > +				       !(val & BT1_CCU_PCIE_REQ_PHY_RST), 1, 1000);
> > +	if (ret) {
> > +		dev_err(dev, "Timed out waiting for PM to stop PHY resetting\n");
> > +		goto err_assert_hot_rst;
> > +	}
> > +
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert PHY reset\n");
> > +		goto err_assert_hot_rst;
> > +	}
> > +
> > +	/* Clocks can be now enabled, but the ref one is crucial at this stage */
> > +	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to enable app clocks\n");
> > +		goto err_assert_phy_rst;
> > +	}
> > +
> > +	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to enable ref clocks\n");
> > +		goto err_disable_app_clk;
> > +	}
> > +
> > +	/* Wait for the PM to stop requesting the controller core reset */
> > +	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
> > +				       !(val & BT1_CCU_PCIE_REQ_CORE_RST), 1, 1000);
> > +	if (ret) {
> > +		dev_err(dev, "Timed out waiting for PM to stop core resetting\n");
> > +		goto err_disable_core_clk;
> > +	}
> > +
> > +	/* PCS-PIPE interface and controller core can be now activated */
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert PIPE reset\n");
> > +		goto err_disable_core_clk;
> > +	}
> > +
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert core reset\n");
> > +		goto err_assert_pipe_rst;
> > +	}
> > +
> > +	/* It's recommended to reset the core and application logic together */
> > +	ret = reset_control_bulk_reset(DW_PCIE_NUM_APP_RSTS, pci->app_rsts);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to reset app domain\n");
> > +		goto err_assert_core_rst;
> > +	}
> > +
> > +	/* Sticky/Non-sticky CSR flags can be now unreset too */
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert sticky reset\n");
> > +		goto err_assert_core_rst;
> > +	}
> > +
> > +	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_NON_STICKY_RST].rstc);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to deassert non-sticky reset\n");
> > +		goto err_assert_sticky_rst;
> > +	}
> > +
> > +	/* Activate the PCIe bus peripheral devices */
> > +	gpiod_set_value_cansleep(pci->pe_rst, 0);
> > +
> > +	/* Make sure the state is settled (LTSSM is still disabled though) */
> > +	usleep_range(1, 10);
> > +
> > +	return 0;
> > +
> > +err_assert_sticky_rst:
> > +	reset_control_assert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
> > +
> > +err_assert_core_rst:
> > +	reset_control_assert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
> > +
> > +err_assert_pipe_rst:
> > +	reset_control_assert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
> > +
> > +err_disable_core_clk:
> > +	clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
> > +
> > +err_disable_app_clk:
> > +	clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
> > +
> > +err_assert_phy_rst:
> > +	reset_control_assert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
> > +
> > +err_assert_hot_rst:
> > +	reset_control_assert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
> > +
> > +err_assert_pwr_rst:
> > +	reset_control_assert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
> > +
> > +	return ret;
> > +}
> > +
> > +static int bt1_pcie_host_init(struct dw_pcie_rp *pp)
> > +{
> > +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> > +	int ret;
> > +
> > +	ret = bt1_pcie_get_res(btpci);
> > +	if (ret)
> > +		return ret;
> > +
> > +	bt1_pcie_full_stop_bus(btpci, true);
> > +
> > +	return bt1_pcie_cold_start_bus(btpci);
> 

> Generally I think the get_res-type stuff happens elsewhere.  I'm not
> an expert in that, but this doesn't look much like other
> *_pcie_host_init() functions, which mainly deal with enabling clocks,
> reset assertion/deassertion, PHY init, interrupt enable, etc.
> 
> Maybe this is connected with your new common clocks/resets properties.
> I'm certainly in favor of making as much of that common as is
> practical!  I hope we can take advantage of that and make more
> consistency across the dwc-based drivers as well.

Right, the bt1_pcie_get_res() method is now makes sure that all the
clocks and resets have been requested since the generic resource
getter uses the optional-version of the clocks/resets request methods.

> 
> > +}
> > +
> > +static void bt1_pcie_host_deinit(struct dw_pcie_rp *pp)
> > +{
> > +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > +	struct bt1_pcie *btpci = to_bt1_pcie(pci);
> > +
> > +	bt1_pcie_full_stop_bus(btpci, false);
> > +}
> > +
> > +struct dw_pcie_host_ops bt1_pcie_host_ops = {
> > +	.host_init = bt1_pcie_host_init,
> > +	.host_deinit = bt1_pcie_host_deinit,
> > +};
> > +
> > +static struct bt1_pcie *bt1_pcie_create_data(struct platform_device *pdev)
> > +{
> > +	struct bt1_pcie *btpci;
> > +
> > +	btpci = devm_kzalloc(&pdev->dev, sizeof(*btpci), GFP_KERNEL);
> > +	if (!btpci)
> > +		return ERR_PTR(-ENOMEM);
> > +
> > +	btpci->pdev = pdev;
> > +
> > +	platform_set_drvdata(pdev, btpci);
> > +
> > +	return btpci;
> 

> I don't think it's worth splitting this into a separate function.  I
> think it would be better to use the same structure as other dwc-based
> drivers and keep this in bt1_pcie_probe().

Sorry, I disagree in this matter. Generally I don't like the most of
the probe methods designed in the kernel well because after evolving
in time they get to be a mess if incoherent initializations,
allocations, requests, etc. Splitting it up into a set of smaller
coherent methods makes the code much clearer.

> 
> > +}
> > +
> > +static int bt1_pcie_add_dw_port(struct bt1_pcie *btpci)
> 

> All other dwc-based drivers call dw_pcie_host_init() from either
> *_pcie_probe() or *_add_pcie_port().  Please use a similar convention.

Not entirely. Tegra is an exception. So as before I don't think there
is a real convention. Most likely it's a result of a lucky coincident.
Moreover I don't really like such naming. Something like
VENDOR_pcie_add_root_port() would be much better.

Anyway since changing it to a more suitable naming would be too tiresome,
I'll change the name as you request.

> 
> > +{
> > +	struct device *dev = &btpci->pdev->dev;
> > +	int ret;
> > +

> > +	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
> 
> Why do you need this when no other dwc-based drivers do?  Is Baikal-T1
> different in this respect?

It's because eDMA engine embedded into the DW PCIe root port. Please
see my patchset:
Link: https://lore.kernel.org/linux-pci/20220610091459.17612-1-Sergey.Semin@baikalelectronics.ru/
and the next particular patch of that series:
Link: https://lore.kernel.org/linux-pci/20220610091459.17612-23-Sergey.Semin@baikalelectronics.ru/

The PCIe peripheral device drivers may wish to use eDMA embedded into
the Root Port. In that case they can use the
dmaengine_get_dma_device() method to get the core device of the
DMA-engine for the memory mapping. That device must have the
DMA-parameters properly configured in order for the DMA mapping
effectively working. In case of the Local eDMA platform setup (eDMA
embedded into the Root Port/Endpoint and accessible from the
CPU/Application side) the DMA-parameters are copied (see the patch
I've listed above) from the platform device. That's why I need the
dma_set_mask_and_coherent() method invocation here. Should I omit it
dma_mask will be left of 32-bits wide and SWIOTLB will be used in
order to reach the memory above 4G.

Please note we can't use dma_set_mask_and_coherent() in the generic
part of the DW PCIe driver since the DMA capability and the
eDMA-accessible memory region is the platform specific settings. So by
default all the eDMA-capable DW PCIe drivers will be left with the
eDMA engine working with the 4GB memory only.

> 
> > +	if (ret)
> > +		return ret;
> > +
> > +	btpci->dw.version = DW_PCIE_VER_460A;
> > +	btpci->dw.dev = dev;
> > +	btpci->dw.ops = &bt1_pcie_dw_ops;
> > +
> > +	btpci->dw.pp.num_vectors = MAX_MSI_IRQS;
> > +	btpci->dw.pp.ops = &bt1_pcie_host_ops;
> > +
> > +	dw_pcie_cap_set(&btpci->dw, REQ_RES);
> > +
> > +	ret = dw_pcie_host_init(&btpci->dw.pp);
> > +	if (ret)
> > +		dev_err_probe(dev, ret, "Failed to initialize DWC PCIe host\n");
> > +
> > +	return ret;
> > +}
> > +
> > +static void bt1_pcie_del_dw_port(struct bt1_pcie *btpci)
> 

> Can you call dw_pcie_host_deinit() from the same place as other
> drivers?
> 
>   $ git grep -p dw_pcie_host_deinit drivers/pci/controller/dwc

Sorry I'd rather leave it as is. There are only four drivers using
it and one of them don't follow what seems like a convention. I'd
rather have my driver code coherent:
bt1_pcie_add_pcie_port() is used to add the DW PCIe Root Port.
and
bt1_pcie_del_pcie_port() is used to remove the DW PCIe Root Port

-Sergey.

> 
> > +{
> > +	dw_pcie_host_deinit(&btpci->dw.pp);
> > +}
> > +
> > +static int bt1_pcie_probe(struct platform_device *pdev)
> > +{
> > +	struct bt1_pcie *btpci;
> > +
> > +	btpci = bt1_pcie_create_data(pdev);
> > +	if (IS_ERR(btpci))
> > +		return PTR_ERR(btpci);
> > +
> > +	return bt1_pcie_add_dw_port(btpci);
> > +}
> > +
> > +static int bt1_pcie_remove(struct platform_device *pdev)
> > +{
> > +	struct bt1_pcie *btpci = platform_get_drvdata(pdev);
> > +
> > +	bt1_pcie_del_dw_port(btpci);
> > +
> > +	return 0;
> > +}
> > +
> > +static const struct of_device_id bt1_pcie_of_match[] = {
> > +	{ .compatible = "baikal,bt1-pcie" },
> > +	{},
> > +};
> > +MODULE_DEVICE_TABLE(of, bt1_pcie_of_match);
> > +
> > +static struct platform_driver bt1_pcie_driver = {
> > +	.probe = bt1_pcie_probe,
> > +	.remove = bt1_pcie_remove,
> > +	.driver = {
> > +		.name	= "bt1-pcie",
> > +		.of_match_table = bt1_pcie_of_match,
> > +	},
> > +};
> > +module_platform_driver(bt1_pcie_driver);
> > +
> > +MODULE_AUTHOR("Serge Semin <Sergey.Semin@baikalelectronics.ru>");
> > +MODULE_DESCRIPTION("Baikal-T1 PCIe driver");
> > +MODULE_LICENSE("GPL");
> > -- 
> > 2.35.1
> >
Bjorn Helgaas June 21, 2022, 6:29 p.m. UTC | #5
On Mon, Jun 20, 2022 at 08:13:47PM +0300, Serge Semin wrote:
> On Wed, Jun 15, 2022 at 11:48:48AM -0500, Bjorn Helgaas wrote:
> > On Fri, Jun 10, 2022 at 11:57:05AM +0300, Serge Semin wrote:
> > > Baikal-T1 SoC is equipped with DWC PCIe v4.60a host controller. It can be
> > > trained to work up to Gen.3 speed over up to x4 lanes. The host controller
> > > is attached to the DW PCIe 3.0 PCS via the PIPE-4 interface, which in its
> > > turn is connected to the DWC 10G PHY. The whole system is supposed to be
> > > fed up with four clock sources: DBI peripheral clock, AXI application
> > > clocks and external PHY/core reference clock generating the 100MHz signal.
> > > In addition to that the platform provide a way to reset each part of the
> > > controller: sticky/non-sticky bits, host controller core, PIPE interface,
> > > PCS/PHY and Hot/Power reset signal. The driver also provides a way to
> > > handle the GPIO-based PERST# signal.
> > > 
> > > Note due to the Baikal-T1 MMIO peculiarity we have to implement the DBI
> > > interface accessors which make sure the IO operations are dword-aligned.

> > > +struct dw_pcie_ops bt1_pcie_dw_ops = {
> > > +	.read_dbi = bt1_pcie_read_dbi,
> > > +	.write_dbi = bt1_pcie_write_dbi,
> > > +	.write_dbi2 = bt1_pcie_write_dbi2,
> > > +	.start_link = bt1_pcie_start_ltssm,
> > > +	.stop_link = bt1_pcie_stop_ltssm,
> > > +};

> > Please rename to "dw_pcie_ops" as most
> > drivers use. 
> 
> IMO matching the structure and its instance names is not a good idea.
> Other than confusing objects nature, at the very least it forces you to
> violate the local namespace convention. Thus in the line of the
> dw_pcie->ops initialization it looks like you use some generic
> operations while in fact you just refer to the locally defined
> DW PCIe ops instance with the generic variable name. Moreover AFAICS
> the latest platform drivers mainly use the vendor-specific prefix in
> the dw_pcie_ops structure instance including the ones acked by you,
> Lorenzo and Gustavo. What makes my code any different from them?

That's fair.  I suggest "bt1_pcie_ops" or "bt1_dw_pcie_ops" to match
the other drivers that include the driver name:

  intel_pcie_ops
  keembay_pcie_ops
  kirin_dw_pcie_ops
  tegra_dw_pcie_ops

They're not 100% consistent, but hopefully we can at least not make
things *less* consistent.

> > > +static int bt1_pcie_get_res(struct bt1_pcie *btpci)
> 
> > Can you name this something similar to what other drivers use?  There
> > are a couple *_pcie_get_resources() functions (normally called from
> > *_pcie_probe()), but no *_get_res() yet.
> 
> Earlier in this patchset I've introduced a new method to get
> the CSRs ranges, PCIe speed, NoF lanes, etc resources. See the patch:
> [PATCH v3 14/17] PCI: dwc: Introduce generic resources getter
> The method has been named as "dw_pcie_get_res()". So the locally
> defined function has been named to refer to that method. If you think
> that using the "_resources" suffix is better (IMO there is no
> significant difference) then we'll need to change the name there too.
> Do you?

Yes.  I don't think there's value in names being gratuitously
different.

> > > +	/* AXI-interface is configured with 64-bit address bus width */
> > > +	ret = dma_coerce_mask_and_coherent(&btpci->dw.pp.bridge->dev,
> > > +					   DMA_BIT_MASK(64));
> 
> > Just to double-check since this is the first instance of
> > dma_coerce_mask_and_coherent() in drivers/pci -- I guess Baikal-T1 is
> > unique in needing this?
> 
> To be honest I've set it here just in case, seeing the dma_mask and
> coherent_dma_mask are left uninitialized in the Host bridge device
> instance, while it's still participate in the PCI devices hierarchy:
> 
> 1. platform_device.dev;
>                    | (<= devm_pci_alloc_host_bridge(dev))
>                    +---+
>                       &v
> 2. pci_host_bridge.dev.parent
>                    | (<= pci_register_host_bridge(bridge) or)
>                    | (<= pci_alloc_child_bus()              )
>                   &v
>            pci_bus.bridge
>                    +-------------------+
>                    |                   | (<= pci_setup_device())
>                    v                   v
> 3.     pci_bus.dev.parent  pci_dev.dev.parent
>                            pci_dev.dma_mask = 0xffffffff;
>                                    | (<= pci_device_add())
>                                    +----+
>                                        &v
>                            pci_dev->dev.dma_mask
>                            pci_dev->dev.coherent_dma_mask = 0xffffffffull;
> 
> So each device detected on the very first PCIe bus gets to have the
> PCI host bridge device as a parent. But AFAICS the PCI subsystem core
> code doesn't use the PCI host bridge DMA-mask and by default the
> dma_mask/coherent_dma_mask fields of each PCIe peripheral device are
> unconditionally initialized with DMA_BIT_MASK(32) (they are supposed
> to be overridden by the device-driver anyway). So to speak we can
> freely drop the dma_coerce_mask_and_coherent() method invocation from
> my driver if you say it is required and the PCI host bridge DMA parameter
> will never be used. What do you think?

I'd like the usage across drivers to be consistent unless there's a
hardware difference that requires something different.  So if you can
point to something different in bt1, great.  If not, do it the same as
the other drivers.

> > > +static void bt1_pcie_full_stop_bus(struct bt1_pcie *btpci, bool init)
> > 
> > Can you name this something similar to what other drivers use?
> 
> For instance? (Please note, the link_stop/link_start callbacks are
> defined as separate methods above.) The current names correctly describe
> the methods logic. So I wouldn't want to fully change their names.

Do any other drivers contain similar logic?  If so, please use a
similar name.

> > > +	 * Application reset controls are trigger-based so de-assert the core
> > > +	 * resets only.
> > > +	 */
> > > +	ret = reset_control_bulk_assert(DW_PCIE_NUM_CORE_RSTS, pci->core_rsts);

BTW, the comment says "de-assert" but the code looks like "assert".

> > > +	/* Make sure the reset is settled */
> > > +	usleep_range(1, 10);
> 
> > Is this duration related to something in the PCIe spec?  Or the DWC
> > spec? 
> 
> No. These durations are the chip-specific. Partly due to them being
> specific for each SoC we can't implement a generic bus reset
> procedure.
> 
> > I'd really like to use named constants when possible, although
> > we have a ton of bare magic numbers currently.
> > 
> > Similar for the poll timeouts and the "state settled" sleep below.
> 
> I don't really see much need in this parametrization since these
> numbers are used only once in the platform driver and their
> application is easily inferable from the code context.

Even if they are used only once, it's helpful when constants like this
can be connected to the spec or other justification for the specific
values.

> > > +static struct bt1_pcie *bt1_pcie_create_data(struct platform_device *pdev)
> > > +{
> > > +	struct bt1_pcie *btpci;
> > > +
> > > +	btpci = devm_kzalloc(&pdev->dev, sizeof(*btpci), GFP_KERNEL);
> > > +	if (!btpci)
> > > +		return ERR_PTR(-ENOMEM);
> > > +
> > > +	btpci->pdev = pdev;
> > > +
> > > +	platform_set_drvdata(pdev, btpci);
> > > +
> > > +	return btpci;
> 
> > I don't think it's worth splitting this into a separate function.  I
> > think it would be better to use the same structure as other dwc-based
> > drivers and keep this in bt1_pcie_probe().
> 
> Sorry, I disagree in this matter. Generally I don't like the most of
> the probe methods designed in the kernel well because after evolving
> in time they get to be a mess if incoherent initializations,
> allocations, requests, etc. Splitting it up into a set of smaller
> coherent methods makes the code much clearer.

There's definitely some tension between making one driver better and
making it different from all the others.  I'm all in favor of making
all the drivers better and more consistent.  I'm less in favor of
one-off improvements because consistency is extremely important for
maintenance.

> > > +static int bt1_pcie_add_dw_port(struct bt1_pcie *btpci)
> 
> > All other dwc-based drivers call dw_pcie_host_init() from either
> > *_pcie_probe() or *_add_pcie_port().  Please use a similar convention.
> 
> Not entirely. Tegra is an exception. So as before I don't think there
> is a real convention. Most likely it's a result of a lucky coincident.
> Moreover I don't really like such naming. Something like
> VENDOR_pcie_add_root_port() would be much better.

I stand corrected.  Of the 21 dw_pcie_host_init() calls, 13 are from
*_pcie_probe(), 7 are from *_add_pcie_port(), and tegra is from
tegra_pcie_init_controller().  I think the *_add_pcie_port() structure
is better because it makes room to support multiple root ports.

> > > +	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
> > 
> > Why do you need this when no other dwc-based drivers do?  Is Baikal-T1
> > different in this respect?
> 
> It's because eDMA engine embedded into the DW PCIe root port. 

Let's add a comment about the fact that this is needed because of the
eDMA engine.

> > > +static void bt1_pcie_del_dw_port(struct bt1_pcie *btpci)
> 
> > Can you call dw_pcie_host_deinit() from the same place as other
> > drivers?
> > 
> >   $ git grep -p dw_pcie_host_deinit drivers/pci/controller/dwc
> 
> Sorry I'd rather leave it as is. There are only four drivers using
> it and one of them don't follow what seems like a convention. I'd
> rather have my driver code coherent:
> bt1_pcie_add_pcie_port() is used to add the DW PCIe Root Port.
> and
> bt1_pcie_del_pcie_port() is used to remove the DW PCIe Root Port

I agree with your rationale.  Intel and kirin do:

  *_pcie_probe
    dw_pcie_host_init

  *_pcie_remove
    dw_pcie_host_deinit

and tegra is similar but from tegra_pcie_init_controller() and
tegra_pcie_deinit_controller().  Exynos is the odd one out and calls
dw_pcie_host_init() from exynos_add_pcie_port() but
dw_pcie_host_deinit() from exynos_pcie_remove().

Your model is better since it removes the "single root port"
assumption.  I would like the "bt1_pcie_add_port()" and
"bt1_pcie_del_port()" (or "bt1_pcie_remove_port()", which would be
slightly more parallel with "add") names to align with other drivers.

Bjorn
Serge Semin June 22, 2022, 5:04 p.m. UTC | #6
On Tue, Jun 21, 2022 at 01:29:41PM -0500, Bjorn Helgaas wrote:
> On Mon, Jun 20, 2022 at 08:13:47PM +0300, Serge Semin wrote:
> > On Wed, Jun 15, 2022 at 11:48:48AM -0500, Bjorn Helgaas wrote:
> > > On Fri, Jun 10, 2022 at 11:57:05AM +0300, Serge Semin wrote:
> > > > Baikal-T1 SoC is equipped with DWC PCIe v4.60a host controller. It can be
> > > > trained to work up to Gen.3 speed over up to x4 lanes. The host controller
> > > > is attached to the DW PCIe 3.0 PCS via the PIPE-4 interface, which in its
> > > > turn is connected to the DWC 10G PHY. The whole system is supposed to be
> > > > fed up with four clock sources: DBI peripheral clock, AXI application
> > > > clocks and external PHY/core reference clock generating the 100MHz signal.
> > > > In addition to that the platform provide a way to reset each part of the
> > > > controller: sticky/non-sticky bits, host controller core, PIPE interface,
> > > > PCS/PHY and Hot/Power reset signal. The driver also provides a way to
> > > > handle the GPIO-based PERST# signal.
> > > > 
> > > > Note due to the Baikal-T1 MMIO peculiarity we have to implement the DBI
> > > > interface accessors which make sure the IO operations are dword-aligned.
> 
> > > > +struct dw_pcie_ops bt1_pcie_dw_ops = {
> > > > +	.read_dbi = bt1_pcie_read_dbi,
> > > > +	.write_dbi = bt1_pcie_write_dbi,
> > > > +	.write_dbi2 = bt1_pcie_write_dbi2,
> > > > +	.start_link = bt1_pcie_start_ltssm,
> > > > +	.stop_link = bt1_pcie_stop_ltssm,
> > > > +};
> 
> > > Please rename to "dw_pcie_ops" as most
> > > drivers use. 
> > 
> > IMO matching the structure and its instance names is not a good idea.
> > Other than confusing objects nature, at the very least it forces you to
> > violate the local namespace convention. Thus in the line of the
> > dw_pcie->ops initialization it looks like you use some generic
> > operations while in fact you just refer to the locally defined
> > DW PCIe ops instance with the generic variable name. Moreover AFAICS
> > the latest platform drivers mainly use the vendor-specific prefix in
> > the dw_pcie_ops structure instance including the ones acked by you,
> > Lorenzo and Gustavo. What makes my code any different from them?
> 

> That's fair.  I suggest "bt1_pcie_ops" or "bt1_dw_pcie_ops" to match
> the other drivers that include the driver name:
> 
>   intel_pcie_ops
>   keembay_pcie_ops
>   kirin_dw_pcie_ops
>   tegra_dw_pcie_ops

+   ks_pcie_dw_pcie_ops

which is even further from the suggested names.)

> 
> They're not 100% consistent, but hopefully we can at least not make
> things *less* consistent.

I don't think we can make something less consistent if there is no real
consistency.) There are at most five ops descriptors can be defined in
the DW PCIe platform drivers:

1. struct dw_pcie_ops - DW PCIe DBI interface accessors,
+-> dw_pcie_ops
+-> <vendor>_pcie_ops
+-> <vendor>_dw_pcie_ops

2. struct pci_ops     - own or child PCIe config space accessors,
+-> dw_pcie_ops !!! in the driver core.
+-> <vendor>_pci_ops
+-> <vendor>_pcie_ops
+-> dw_child_pcie_ops
+-> <vendor>_child_pcie_ops
+-> <vendor>_child_pci_ops

3. struct dw_pcie_host_ops - DW PCIe Root Port init/de-init operations
+-> <vendor>_pcie_host_ops
+-> <vendor>_pcie_dw_host_ops

4. struct dw_pcie_ep_ops   - DW PCIe Endpoint init/de-init operations
+-> pcie_ep_ops
+-> pci_ep_ops
+-> <vendor>_pcie_ep_ops

As you can see each can have different naming approaches used in the
DW PCIe platform drivers here and there. Some of them have been utilized
more frequently, some of them - less. As for me what is really consistent
across all the DW PCIe platform drivers is the local namespace prefix
of the form "<vendor>_pcie". It is used in all the locally defined
functions names and more-or-less mainly in the local instances of the
operation descriptors. So if you want we can pick some approach and
make sure it is used in all the driver from now on. For instance,

struct dw_pcie_ops <vendor>_pcie_ops
struct dw_pcie_host_ops <vendor>_pcie_host_ops
struct dw_pcie_ep_ops <vendor>_pcie_ep_ops
struct pci_ops <vendor>_pci_ops // Can be confused with the struct
                                // dw_pcie_ops instance, but this what
                                // is mainly used in the available drivers.
struct pci_ops <vendor>_child_pci_ops // less frequent naming
                                      // approach, but it looks more
                                      // like the own CFG-space IOs.

Note the later two cases will violate the local namespace naming
convention of having "<vendor>_pcie" prefix.

In my case the names would look like:
struct dw_pcie_ops bt1_pcie_ops // What you suggest in the comment above
struct dw_pcie_host_ops bt1_pcie_host_ops
struct pci_ops bt1_pci_ops // It may look ambiguous with bt1_pcie_ops.

What do you think?

> 
> > > > +static int bt1_pcie_get_res(struct bt1_pcie *btpci)
> > 
> > > Can you name this something similar to what other drivers use?  There
> > > are a couple *_pcie_get_resources() functions (normally called from
> > > *_pcie_probe()), but no *_get_res() yet.
> > 
> > Earlier in this patchset I've introduced a new method to get
> > the CSRs ranges, PCIe speed, NoF lanes, etc resources. See the patch:
> > [PATCH v3 14/17] PCI: dwc: Introduce generic resources getter
> > The method has been named as "dw_pcie_get_res()". So the locally
> > defined function has been named to refer to that method. If you think
> > that using the "_resources" suffix is better (IMO there is no
> > significant difference) then we'll need to change the name there too.
> > Do you?
> 

> Yes.  I don't think there's value in names being gratuitously
> different.

Ok.

> 
> > > > +	/* AXI-interface is configured with 64-bit address bus width */
> > > > +	ret = dma_coerce_mask_and_coherent(&btpci->dw.pp.bridge->dev,
> > > > +					   DMA_BIT_MASK(64));
> > 
> > > Just to double-check since this is the first instance of
> > > dma_coerce_mask_and_coherent() in drivers/pci -- I guess Baikal-T1 is
> > > unique in needing this?
> > 
> > To be honest I've set it here just in case, seeing the dma_mask and
> > coherent_dma_mask are left uninitialized in the Host bridge device
> > instance, while it's still participate in the PCI devices hierarchy:
> > 
> > 1. platform_device.dev;
> >                    | (<= devm_pci_alloc_host_bridge(dev))
> >                    +---+
> >                       &v
> > 2. pci_host_bridge.dev.parent
> >                    | (<= pci_register_host_bridge(bridge) or)
> >                    | (<= pci_alloc_child_bus()              )
> >                   &v
> >            pci_bus.bridge
> >                    +-------------------+
> >                    |                   | (<= pci_setup_device())
> >                    v                   v
> > 3.     pci_bus.dev.parent  pci_dev.dev.parent
> >                            pci_dev.dma_mask = 0xffffffff;
> >                                    | (<= pci_device_add())
> >                                    +----+
> >                                        &v
> >                            pci_dev->dev.dma_mask
> >                            pci_dev->dev.coherent_dma_mask = 0xffffffffull;
> > 
> > So each device detected on the very first PCIe bus gets to have the
> > PCI host bridge device as a parent. But AFAICS the PCI subsystem core
> > code doesn't use the PCI host bridge DMA-mask and by default the
> > dma_mask/coherent_dma_mask fields of each PCIe peripheral device are
> > unconditionally initialized with DMA_BIT_MASK(32) (they are supposed
> > to be overridden by the device-driver anyway). So to speak we can
> > freely drop the dma_coerce_mask_and_coherent() method invocation from
> > my driver if you say it is required and the PCI host bridge DMA parameter
> > will never be used. What do you think?
> 

> I'd like the usage across drivers to be consistent unless there's a
> hardware difference that requires something different.  So if you can
> point to something different in bt1, great.  If not, do it the same as
> the other drivers.

Ok. I'll drop it from the driver then.

> 
> > > > +static void bt1_pcie_full_stop_bus(struct bt1_pcie *btpci, bool init)
> > > 
> > > Can you name this something similar to what other drivers use?
> > 
> > For instance? (Please note, the link_stop/link_start callbacks are
> > defined as separate methods above.) The current names correctly describe
> > the methods logic. So I wouldn't want to fully change their names.
> 

> Do any other drivers contain similar logic?  If so, please use a
> similar name.

host_init content is very platform-specific. So each driver has its own
callback implementation and logical sub-methods split up. My case
isn't an exception.

> 
> > > > +	 * Application reset controls are trigger-based so de-assert the core
> > > > +	 * resets only.
> > > > +	 */
> > > > +	ret = reset_control_bulk_assert(DW_PCIE_NUM_CORE_RSTS, pci->core_rsts);
> 

> BTW, the comment says "de-assert" but the code looks like "assert".

Right. It is supposed to be "assert" in accordance with what is
actually done.

> 
> > > > +	/* Make sure the reset is settled */
> > > > +	usleep_range(1, 10);
> > 
> > > Is this duration related to something in the PCIe spec?  Or the DWC
> > > spec? 
> > 
> > No. These durations are the chip-specific. Partly due to them being
> > specific for each SoC we can't implement a generic bus reset
> > procedure.
> > 
> > > I'd really like to use named constants when possible, although
> > > we have a ton of bare magic numbers currently.
> > > 
> > > Similar for the poll timeouts and the "state settled" sleep below.
> > 
> > I don't really see much need in this parametrization since these
> > numbers are used only once in the platform driver and their
> > application is easily inferable from the code context.
> 

> Even if they are used only once, it's helpful when constants like this
> can be connected to the spec or other justification for the specific
> values.

Ok. I'll replace the literals with the macros.

> 
> > > > +static struct bt1_pcie *bt1_pcie_create_data(struct platform_device *pdev)
> > > > +{
> > > > +	struct bt1_pcie *btpci;
> > > > +
> > > > +	btpci = devm_kzalloc(&pdev->dev, sizeof(*btpci), GFP_KERNEL);
> > > > +	if (!btpci)
> > > > +		return ERR_PTR(-ENOMEM);
> > > > +
> > > > +	btpci->pdev = pdev;
> > > > +
> > > > +	platform_set_drvdata(pdev, btpci);
> > > > +
> > > > +	return btpci;
> > 
> > > I don't think it's worth splitting this into a separate function.  I
> > > think it would be better to use the same structure as other dwc-based
> > > drivers and keep this in bt1_pcie_probe().
> > 
> > Sorry, I disagree in this matter. Generally I don't like the most of
> > the probe methods designed in the kernel well because after evolving
> > in time they get to be a mess if incoherent initializations,
> > allocations, requests, etc. Splitting it up into a set of smaller
> > coherent methods makes the code much clearer.
> 

> There's definitely some tension between making one driver better and
> making it different from all the others.  I'm all in favor of making
> all the drivers better and more consistent.  I'm less in favor of
> one-off improvements because consistency is extremely important for
> maintenance.

Well, if there were a consistency in the probe method design it would
have been another story, but in case of the DW PCIe there is none.
Some DW PCIe platform drivers perform all the probe actions right in
the probe method forming a long list of the weakly coherent things,
some of them have a few sub-functions called but still look the same,
some of them are mainly split up in the sub-methods, but still perform
some initialization right in the probe method. So in general there is
no unification and well defined convention in that regard.

My approach on the contrary makes the probe method code well unified.
Should any new additional step is required, the new method can be
added together with the cleanup antagonist. Similarly the
sub-methods update patches is easier to review than reading the
all-in-one probe code update. Moreover such design approach I've
been using in all the drivers submitted by me to the kernel and no
questions have been raised so far. Finally the driver is supposed to
be maintained by its author at the very least. So I definitely won't
have any problem with it especially after using the same design
pattern in all my drivers.

> 
> > > > +static int bt1_pcie_add_dw_port(struct bt1_pcie *btpci)
> > 
> > > All other dwc-based drivers call dw_pcie_host_init() from either
> > > *_pcie_probe() or *_add_pcie_port().  Please use a similar convention.
> > 
> > Not entirely. Tegra is an exception. So as before I don't think there
> > is a real convention. Most likely it's a result of a lucky coincident.
> > Moreover I don't really like such naming. Something like
> > VENDOR_pcie_add_root_port() would be much better.
> 

> I stand corrected.  Of the 21 dw_pcie_host_init() calls, 13 are from
> *_pcie_probe(), 7 are from *_add_pcie_port(), and tegra is from
> tegra_pcie_init_controller().  I think the *_add_pcie_port() structure
> is better because it makes room to support multiple root ports.

See the last comment. It concerns the same methods, but you suggest to
use the names "*_add_port()" there.

> 
> > > > +	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
> > > 
> > > Why do you need this when no other dwc-based drivers do?  Is Baikal-T1
> > > different in this respect?
> > 
> > It's because eDMA engine embedded into the DW PCIe root port. 
> 

> Let's add a comment about the fact that this is needed because of the
> eDMA engine.

Ok.

> 
> > > > +static void bt1_pcie_del_dw_port(struct bt1_pcie *btpci)
> > 
> > > Can you call dw_pcie_host_deinit() from the same place as other
> > > drivers?
> > > 
> > >   $ git grep -p dw_pcie_host_deinit drivers/pci/controller/dwc
> > 
> > Sorry I'd rather leave it as is. There are only four drivers using
> > it and one of them don't follow what seems like a convention. I'd
> > rather have my driver code coherent:
> > bt1_pcie_add_pcie_port() is used to add the DW PCIe Root Port.
> > and
> > bt1_pcie_del_pcie_port() is used to remove the DW PCIe Root Port
> 
> I agree with your rationale.  Intel and kirin do:
> 
>   *_pcie_probe
>     dw_pcie_host_init
> 
>   *_pcie_remove
>     dw_pcie_host_deinit
> 
> and tegra is similar but from tegra_pcie_init_controller() and
> tegra_pcie_deinit_controller().  Exynos is the odd one out and calls
> dw_pcie_host_init() from exynos_add_pcie_port() but
> dw_pcie_host_deinit() from exynos_pcie_remove().
> 
> Your model is better since it removes the "single root port"
> assumption.  I would like the "bt1_pcie_add_port()" and
> "bt1_pcie_del_port()" (or "bt1_pcie_remove_port()", which would be
> slightly more parallel with "add") names to align with other drivers.

Ok. I'll use bt1_pcie_add_port() and bt1_pcie_del_port() names then.
* Note the DW PCIe platform drivers mainly use the _pcie_port() suffix
* in the add-method.

-Sergey

> 
> Bjorn
Serge Semin June 28, 2022, 12:23 p.m. UTC | #7
Bjorn,
Do you have anything to say based on the notes below?

-Sergey

On Wed, Jun 22, 2022 at 08:04:37PM +0300, Serge Semin wrote:
> On Tue, Jun 21, 2022 at 01:29:41PM -0500, Bjorn Helgaas wrote:
> > On Mon, Jun 20, 2022 at 08:13:47PM +0300, Serge Semin wrote:
> > > On Wed, Jun 15, 2022 at 11:48:48AM -0500, Bjorn Helgaas wrote:
> > > > On Fri, Jun 10, 2022 at 11:57:05AM +0300, Serge Semin wrote:
> > > > > Baikal-T1 SoC is equipped with DWC PCIe v4.60a host controller. It can be
> > > > > trained to work up to Gen.3 speed over up to x4 lanes. The host controller
> > > > > is attached to the DW PCIe 3.0 PCS via the PIPE-4 interface, which in its
> > > > > turn is connected to the DWC 10G PHY. The whole system is supposed to be
> > > > > fed up with four clock sources: DBI peripheral clock, AXI application
> > > > > clocks and external PHY/core reference clock generating the 100MHz signal.
> > > > > In addition to that the platform provide a way to reset each part of the
> > > > > controller: sticky/non-sticky bits, host controller core, PIPE interface,
> > > > > PCS/PHY and Hot/Power reset signal. The driver also provides a way to
> > > > > handle the GPIO-based PERST# signal.
> > > > > 
> > > > > Note due to the Baikal-T1 MMIO peculiarity we have to implement the DBI
> > > > > interface accessors which make sure the IO operations are dword-aligned.
> > 
> > > > > +struct dw_pcie_ops bt1_pcie_dw_ops = {
> > > > > +	.read_dbi = bt1_pcie_read_dbi,
> > > > > +	.write_dbi = bt1_pcie_write_dbi,
> > > > > +	.write_dbi2 = bt1_pcie_write_dbi2,
> > > > > +	.start_link = bt1_pcie_start_ltssm,
> > > > > +	.stop_link = bt1_pcie_stop_ltssm,
> > > > > +};
> > 
> > > > Please rename to "dw_pcie_ops" as most
> > > > drivers use. 
> > > 
> > > IMO matching the structure and its instance names is not a good idea.
> > > Other than confusing objects nature, at the very least it forces you to
> > > violate the local namespace convention. Thus in the line of the
> > > dw_pcie->ops initialization it looks like you use some generic
> > > operations while in fact you just refer to the locally defined
> > > DW PCIe ops instance with the generic variable name. Moreover AFAICS
> > > the latest platform drivers mainly use the vendor-specific prefix in
> > > the dw_pcie_ops structure instance including the ones acked by you,
> > > Lorenzo and Gustavo. What makes my code any different from them?
> > 
> 
> > That's fair.  I suggest "bt1_pcie_ops" or "bt1_dw_pcie_ops" to match
> > the other drivers that include the driver name:
> > 
> >   intel_pcie_ops
> >   keembay_pcie_ops
> >   kirin_dw_pcie_ops
> >   tegra_dw_pcie_ops
> 
> +   ks_pcie_dw_pcie_ops
> 
> which is even further from the suggested names.)
> 
> > 
> > They're not 100% consistent, but hopefully we can at least not make
> > things *less* consistent.
> 
> I don't think we can make something less consistent if there is no real
> consistency.) There are at most five ops descriptors can be defined in
> the DW PCIe platform drivers:
> 
> 1. struct dw_pcie_ops - DW PCIe DBI interface accessors,
> +-> dw_pcie_ops
> +-> <vendor>_pcie_ops
> +-> <vendor>_dw_pcie_ops
> 
> 2. struct pci_ops     - own or child PCIe config space accessors,
> +-> dw_pcie_ops !!! in the driver core.
> +-> <vendor>_pci_ops
> +-> <vendor>_pcie_ops
> +-> dw_child_pcie_ops
> +-> <vendor>_child_pcie_ops
> +-> <vendor>_child_pci_ops
> 
> 3. struct dw_pcie_host_ops - DW PCIe Root Port init/de-init operations
> +-> <vendor>_pcie_host_ops
> +-> <vendor>_pcie_dw_host_ops
> 
> 4. struct dw_pcie_ep_ops   - DW PCIe Endpoint init/de-init operations
> +-> pcie_ep_ops
> +-> pci_ep_ops
> +-> <vendor>_pcie_ep_ops
> 
> As you can see each can have different naming approaches used in the
> DW PCIe platform drivers here and there. Some of them have been utilized
> more frequently, some of them - less. As for me what is really consistent
> across all the DW PCIe platform drivers is the local namespace prefix
> of the form "<vendor>_pcie". It is used in all the locally defined
> functions names and more-or-less mainly in the local instances of the
> operation descriptors. So if you want we can pick some approach and
> make sure it is used in all the driver from now on. For instance,
> 
> struct dw_pcie_ops <vendor>_pcie_ops
> struct dw_pcie_host_ops <vendor>_pcie_host_ops
> struct dw_pcie_ep_ops <vendor>_pcie_ep_ops
> struct pci_ops <vendor>_pci_ops // Can be confused with the struct
>                                 // dw_pcie_ops instance, but this what
>                                 // is mainly used in the available drivers.
> struct pci_ops <vendor>_child_pci_ops // less frequent naming
>                                       // approach, but it looks more
>                                       // like the own CFG-space IOs.
> 
> Note the later two cases will violate the local namespace naming
> convention of having "<vendor>_pcie" prefix.
> 
> In my case the names would look like:
> struct dw_pcie_ops bt1_pcie_ops // What you suggest in the comment above
> struct dw_pcie_host_ops bt1_pcie_host_ops
> struct pci_ops bt1_pci_ops // It may look ambiguous with bt1_pcie_ops.
> 
> What do you think?
> 
> > 
> > > > > +static int bt1_pcie_get_res(struct bt1_pcie *btpci)
> > > 
> > > > Can you name this something similar to what other drivers use?  There
> > > > are a couple *_pcie_get_resources() functions (normally called from
> > > > *_pcie_probe()), but no *_get_res() yet.
> > > 
> > > Earlier in this patchset I've introduced a new method to get
> > > the CSRs ranges, PCIe speed, NoF lanes, etc resources. See the patch:
> > > [PATCH v3 14/17] PCI: dwc: Introduce generic resources getter
> > > The method has been named as "dw_pcie_get_res()". So the locally
> > > defined function has been named to refer to that method. If you think
> > > that using the "_resources" suffix is better (IMO there is no
> > > significant difference) then we'll need to change the name there too.
> > > Do you?
> > 
> 
> > Yes.  I don't think there's value in names being gratuitously
> > different.
> 
> Ok.
> 
> > 
> > > > > +	/* AXI-interface is configured with 64-bit address bus width */
> > > > > +	ret = dma_coerce_mask_and_coherent(&btpci->dw.pp.bridge->dev,
> > > > > +					   DMA_BIT_MASK(64));
> > > 
> > > > Just to double-check since this is the first instance of
> > > > dma_coerce_mask_and_coherent() in drivers/pci -- I guess Baikal-T1 is
> > > > unique in needing this?
> > > 
> > > To be honest I've set it here just in case, seeing the dma_mask and
> > > coherent_dma_mask are left uninitialized in the Host bridge device
> > > instance, while it's still participate in the PCI devices hierarchy:
> > > 
> > > 1. platform_device.dev;
> > >                    | (<= devm_pci_alloc_host_bridge(dev))
> > >                    +---+
> > >                       &v
> > > 2. pci_host_bridge.dev.parent
> > >                    | (<= pci_register_host_bridge(bridge) or)
> > >                    | (<= pci_alloc_child_bus()              )
> > >                   &v
> > >            pci_bus.bridge
> > >                    +-------------------+
> > >                    |                   | (<= pci_setup_device())
> > >                    v                   v
> > > 3.     pci_bus.dev.parent  pci_dev.dev.parent
> > >                            pci_dev.dma_mask = 0xffffffff;
> > >                                    | (<= pci_device_add())
> > >                                    +----+
> > >                                        &v
> > >                            pci_dev->dev.dma_mask
> > >                            pci_dev->dev.coherent_dma_mask = 0xffffffffull;
> > > 
> > > So each device detected on the very first PCIe bus gets to have the
> > > PCI host bridge device as a parent. But AFAICS the PCI subsystem core
> > > code doesn't use the PCI host bridge DMA-mask and by default the
> > > dma_mask/coherent_dma_mask fields of each PCIe peripheral device are
> > > unconditionally initialized with DMA_BIT_MASK(32) (they are supposed
> > > to be overridden by the device-driver anyway). So to speak we can
> > > freely drop the dma_coerce_mask_and_coherent() method invocation from
> > > my driver if you say it is required and the PCI host bridge DMA parameter
> > > will never be used. What do you think?
> > 
> 
> > I'd like the usage across drivers to be consistent unless there's a
> > hardware difference that requires something different.  So if you can
> > point to something different in bt1, great.  If not, do it the same as
> > the other drivers.
> 
> Ok. I'll drop it from the driver then.
> 
> > 
> > > > > +static void bt1_pcie_full_stop_bus(struct bt1_pcie *btpci, bool init)
> > > > 
> > > > Can you name this something similar to what other drivers use?
> > > 
> > > For instance? (Please note, the link_stop/link_start callbacks are
> > > defined as separate methods above.) The current names correctly describe
> > > the methods logic. So I wouldn't want to fully change their names.
> > 
> 
> > Do any other drivers contain similar logic?  If so, please use a
> > similar name.
> 
> host_init content is very platform-specific. So each driver has its own
> callback implementation and logical sub-methods split up. My case
> isn't an exception.
> 
> > 
> > > > > +	 * Application reset controls are trigger-based so de-assert the core
> > > > > +	 * resets only.
> > > > > +	 */
> > > > > +	ret = reset_control_bulk_assert(DW_PCIE_NUM_CORE_RSTS, pci->core_rsts);
> > 
> 
> > BTW, the comment says "de-assert" but the code looks like "assert".
> 
> Right. It is supposed to be "assert" in accordance with what is
> actually done.
> 
> > 
> > > > > +	/* Make sure the reset is settled */
> > > > > +	usleep_range(1, 10);
> > > 
> > > > Is this duration related to something in the PCIe spec?  Or the DWC
> > > > spec? 
> > > 
> > > No. These durations are the chip-specific. Partly due to them being
> > > specific for each SoC we can't implement a generic bus reset
> > > procedure.
> > > 
> > > > I'd really like to use named constants when possible, although
> > > > we have a ton of bare magic numbers currently.
> > > > 
> > > > Similar for the poll timeouts and the "state settled" sleep below.
> > > 
> > > I don't really see much need in this parametrization since these
> > > numbers are used only once in the platform driver and their
> > > application is easily inferable from the code context.
> > 
> 
> > Even if they are used only once, it's helpful when constants like this
> > can be connected to the spec or other justification for the specific
> > values.
> 
> Ok. I'll replace the literals with the macros.
> 
> > 
> > > > > +static struct bt1_pcie *bt1_pcie_create_data(struct platform_device *pdev)
> > > > > +{
> > > > > +	struct bt1_pcie *btpci;
> > > > > +
> > > > > +	btpci = devm_kzalloc(&pdev->dev, sizeof(*btpci), GFP_KERNEL);
> > > > > +	if (!btpci)
> > > > > +		return ERR_PTR(-ENOMEM);
> > > > > +
> > > > > +	btpci->pdev = pdev;
> > > > > +
> > > > > +	platform_set_drvdata(pdev, btpci);
> > > > > +
> > > > > +	return btpci;
> > > 
> > > > I don't think it's worth splitting this into a separate function.  I
> > > > think it would be better to use the same structure as other dwc-based
> > > > drivers and keep this in bt1_pcie_probe().
> > > 
> > > Sorry, I disagree in this matter. Generally I don't like the most of
> > > the probe methods designed in the kernel well because after evolving
> > > in time they get to be a mess if incoherent initializations,
> > > allocations, requests, etc. Splitting it up into a set of smaller
> > > coherent methods makes the code much clearer.
> > 
> 
> > There's definitely some tension between making one driver better and
> > making it different from all the others.  I'm all in favor of making
> > all the drivers better and more consistent.  I'm less in favor of
> > one-off improvements because consistency is extremely important for
> > maintenance.
> 
> Well, if there were a consistency in the probe method design it would
> have been another story, but in case of the DW PCIe there is none.
> Some DW PCIe platform drivers perform all the probe actions right in
> the probe method forming a long list of the weakly coherent things,
> some of them have a few sub-functions called but still look the same,
> some of them are mainly split up in the sub-methods, but still perform
> some initialization right in the probe method. So in general there is
> no unification and well defined convention in that regard.
> 
> My approach on the contrary makes the probe method code well unified.
> Should any new additional step is required, the new method can be
> added together with the cleanup antagonist. Similarly the
> sub-methods update patches is easier to review than reading the
> all-in-one probe code update. Moreover such design approach I've
> been using in all the drivers submitted by me to the kernel and no
> questions have been raised so far. Finally the driver is supposed to
> be maintained by its author at the very least. So I definitely won't
> have any problem with it especially after using the same design
> pattern in all my drivers.
> 
> > 
> > > > > +static int bt1_pcie_add_dw_port(struct bt1_pcie *btpci)
> > > 
> > > > All other dwc-based drivers call dw_pcie_host_init() from either
> > > > *_pcie_probe() or *_add_pcie_port().  Please use a similar convention.
> > > 
> > > Not entirely. Tegra is an exception. So as before I don't think there
> > > is a real convention. Most likely it's a result of a lucky coincident.
> > > Moreover I don't really like such naming. Something like
> > > VENDOR_pcie_add_root_port() would be much better.
> > 
> 
> > I stand corrected.  Of the 21 dw_pcie_host_init() calls, 13 are from
> > *_pcie_probe(), 7 are from *_add_pcie_port(), and tegra is from
> > tegra_pcie_init_controller().  I think the *_add_pcie_port() structure
> > is better because it makes room to support multiple root ports.
> 
> See the last comment. It concerns the same methods, but you suggest to
> use the names "*_add_port()" there.
> 
> > 
> > > > > +	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
> > > > 
> > > > Why do you need this when no other dwc-based drivers do?  Is Baikal-T1
> > > > different in this respect?
> > > 
> > > It's because eDMA engine embedded into the DW PCIe root port. 
> > 
> 
> > Let's add a comment about the fact that this is needed because of the
> > eDMA engine.
> 
> Ok.
> 
> > 
> > > > > +static void bt1_pcie_del_dw_port(struct bt1_pcie *btpci)
> > > 
> > > > Can you call dw_pcie_host_deinit() from the same place as other
> > > > drivers?
> > > > 
> > > >   $ git grep -p dw_pcie_host_deinit drivers/pci/controller/dwc
> > > 
> > > Sorry I'd rather leave it as is. There are only four drivers using
> > > it and one of them don't follow what seems like a convention. I'd
> > > rather have my driver code coherent:
> > > bt1_pcie_add_pcie_port() is used to add the DW PCIe Root Port.
> > > and
> > > bt1_pcie_del_pcie_port() is used to remove the DW PCIe Root Port
> > 
> > I agree with your rationale.  Intel and kirin do:
> > 
> >   *_pcie_probe
> >     dw_pcie_host_init
> > 
> >   *_pcie_remove
> >     dw_pcie_host_deinit
> > 
> > and tegra is similar but from tegra_pcie_init_controller() and
> > tegra_pcie_deinit_controller().  Exynos is the odd one out and calls
> > dw_pcie_host_init() from exynos_add_pcie_port() but
> > dw_pcie_host_deinit() from exynos_pcie_remove().
> > 
> > Your model is better since it removes the "single root port"
> > assumption.  I would like the "bt1_pcie_add_port()" and
> > "bt1_pcie_del_port()" (or "bt1_pcie_remove_port()", which would be
> > slightly more parallel with "add") names to align with other drivers.
> 
> Ok. I'll use bt1_pcie_add_port() and bt1_pcie_del_port() names then.
> * Note the DW PCIe platform drivers mainly use the _pcie_port() suffix
> * in the add-method.
> 
> -Sergey
> 
> > 
> > Bjorn
Bjorn Helgaas June 28, 2022, 3:17 p.m. UTC | #8
On Tue, Jun 28, 2022 at 03:23:56PM +0300, Serge Semin wrote:
> Bjorn,
> Do you have anything to say based on the notes below?

I'm focused on the first series for now.

> On Wed, Jun 22, 2022 at 08:04:37PM +0300, Serge Semin wrote:
> > On Tue, Jun 21, 2022 at 01:29:41PM -0500, Bjorn Helgaas wrote:
> > > On Mon, Jun 20, 2022 at 08:13:47PM +0300, Serge Semin wrote:
> > > > On Wed, Jun 15, 2022 at 11:48:48AM -0500, Bjorn Helgaas wrote:
> > > > > On Fri, Jun 10, 2022 at 11:57:05AM +0300, Serge Semin wrote:

> > > > > > +struct dw_pcie_ops bt1_pcie_dw_ops = {
> > > > > > +	.read_dbi = bt1_pcie_read_dbi,
> > > > > > +	.write_dbi = bt1_pcie_write_dbi,
> > > > > > +	.write_dbi2 = bt1_pcie_write_dbi2,
> > > > > > +	.start_link = bt1_pcie_start_ltssm,
> > > > > > +	.stop_link = bt1_pcie_stop_ltssm,
> > > > > > +};
> > > 
> > > > > Please rename to "dw_pcie_ops" as most drivers use. 
> > > > 
> > > > IMO matching the structure and its instance names is not a good idea.
> > > > Other than confusing objects nature, at the very least it forces you to
> > > > violate the local namespace convention. Thus in the line of the
> > > > dw_pcie->ops initialization it looks like you use some generic
> > > > operations while in fact you just refer to the locally defined
> > > > DW PCIe ops instance with the generic variable name. Moreover AFAICS
> > > > the latest platform drivers mainly use the vendor-specific prefix in
> > > > the dw_pcie_ops structure instance including the ones acked by you,
> > > > Lorenzo and Gustavo. What makes my code any different from them?
> > 
> > > That's fair.  I suggest "bt1_pcie_ops" or "bt1_dw_pcie_ops" to match
> > > the other drivers that include the driver name:
> > > 
> > >   intel_pcie_ops
> > >   keembay_pcie_ops
> > >   kirin_dw_pcie_ops
> > >   tegra_dw_pcie_ops
> > 
> > +   ks_pcie_dw_pcie_ops
> > 
> > which is even further from the suggested names.)

As I said, they're not 100% consistent, but they almost all end in
"pcie_ops".  Yours ends in "pcie_dw_ops", which is why I suggested
renaming to be similar to the others.

I don't want to make a huge deal about this, but I do want as much
similarity across drivers as possible.  I get that it doesn't benefit
*you*, but it's a huge benefit to everybody else.

> > > They're not 100% consistent, but hopefully we can at least not make
> > > things *less* consistent.
> > 
> > I don't think we can make something less consistent if there is no real
> > consistency.) There are at most five ops descriptors can be defined in
> > the DW PCIe platform drivers:
Rob Herring (Arm) July 12, 2022, 8:29 p.m. UTC | #9
On Sun, Jun 19, 2022 at 11:39:04PM +0300, Serge Semin wrote:
> On Wed, Jun 15, 2022 at 11:10:45AM -0600, Rob Herring wrote:
> > On Fri, Jun 10, 2022 at 11:57:05AM +0300, Serge Semin wrote:
> > > Baikal-T1 SoC is equipped with DWC PCIe v4.60a host controller. It can be
> > > trained to work up to Gen.3 speed over up to x4 lanes. The host controller
> > > is attached to the DW PCIe 3.0 PCS via the PIPE-4 interface, which in its
> > > turn is connected to the DWC 10G PHY. The whole system is supposed to be
> > > fed up with four clock sources: DBI peripheral clock, AXI application
> > > clocks and external PHY/core reference clock generating the 100MHz signal.
> > > In addition to that the platform provide a way to reset each part of the
> > > controller: sticky/non-sticky bits, host controller core, PIPE interface,
> > > PCS/PHY and Hot/Power reset signal. The driver also provides a way to
> > > handle the GPIO-based PERST# signal.
> > > 
> > > Note due to the Baikal-T1 MMIO peculiarity we have to implement the DBI
> > > interface accessors which make sure the IO operations are dword-aligned.
> > > 
> > > Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
> > > 
> > > ---
> > > 
> > > Changelog v2:
> > > - Rename 'syscon' property to 'baikal,bt1-syscon'.
> > > 
> > > Changelog v3:
> > > - Use the clocks/resets handlers defined in the DW PCIe core descriptor.
> > >   (@Rob)
> > > - Redefine PCI host bridge config space accessors with the generic
> > >   pci_generic_config_read32() and pci_generic_config_write32() methods.
> > >   (@Rob)
> > > ---
> > >  drivers/pci/controller/dwc/Kconfig    |   9 +
> > >  drivers/pci/controller/dwc/Makefile   |   1 +
> > >  drivers/pci/controller/dwc/pcie-bt1.c | 649 ++++++++++++++++++++++++++
> > >  3 files changed, 659 insertions(+)
> > >  create mode 100644 drivers/pci/controller/dwc/pcie-bt1.c
> > > 
> > > diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
> > > index 62ce3abf0f19..771b8b146623 100644
> > > --- a/drivers/pci/controller/dwc/Kconfig
> > > +++ b/drivers/pci/controller/dwc/Kconfig
> > > @@ -222,6 +222,15 @@ config PCIE_ARTPEC6_EP
> > >  	  Enables support for the PCIe controller in the ARTPEC-6 SoC to work in
> > >  	  endpoint mode. This uses the DesignWare core.
> > >  
> > > +config PCIE_BT1
> > > +	tristate "Baikal-T1 PCIe controller"
> > > +	depends on MIPS_BAIKAL_T1 || COMPILE_TEST
> > > +	depends on PCI_MSI_IRQ_DOMAIN
> > > +	select PCIE_DW_HOST
> > > +	help
> > > +	  Enables support for the PCIe controller in the Baikal-T1 SoC to work
> > > +	  in host mode. It's based on the Synopsys DWC PCIe v4.60a IP-core.
> > > +
> > >  config PCIE_ROCKCHIP_DW_HOST
> > >  	bool "Rockchip DesignWare PCIe controller"
> > >  	select PCIE_DW
> > > diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
> > > index 8ba7b67f5e50..bf5c311875a1 100644
> > > --- a/drivers/pci/controller/dwc/Makefile
> > > +++ b/drivers/pci/controller/dwc/Makefile
> > > @@ -3,6 +3,7 @@ obj-$(CONFIG_PCIE_DW) += pcie-designware.o
> > >  obj-$(CONFIG_PCIE_DW_HOST) += pcie-designware-host.o
> > >  obj-$(CONFIG_PCIE_DW_EP) += pcie-designware-ep.o
> > >  obj-$(CONFIG_PCIE_DW_PLAT) += pcie-designware-plat.o
> > > +obj-$(CONFIG_PCIE_BT1) += pcie-bt1.o
> > >  obj-$(CONFIG_PCI_DRA7XX) += pci-dra7xx.o
> > >  obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o
> > >  obj-$(CONFIG_PCIE_FU740) += pcie-fu740.o
> > > diff --git a/drivers/pci/controller/dwc/pcie-bt1.c b/drivers/pci/controller/dwc/pcie-bt1.c
> > > new file mode 100644
> > > index 000000000000..03f035743b78
> > > --- /dev/null
> > > +++ b/drivers/pci/controller/dwc/pcie-bt1.c
> > > @@ -0,0 +1,649 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +/*
> > > + * Copyright (C) 2021 BAIKAL ELECTRONICS, JSC
> > > + *
> > > + * Authors:
> > > + *   Vadim Vlasov <Vadim.Vlasov@baikalelectronics.ru>
> > > + *   Serge Semin <Sergey.Semin@baikalelectronics.ru>
> > > + *
> > > + * Baikal-T1 PCIe controller driver
> > > + */
> > > +
> > > +#include <linux/bitfield.h>
> > > +#include <linux/bits.h>
> > > +#include <linux/clk.h>
> > > +#include <linux/delay.h>
> > > +#include <linux/gpio/consumer.h>
> > > +#include <linux/kernel.h>
> > > +#include <linux/mfd/syscon.h>
> > > +#include <linux/module.h>
> > > +#include <linux/pci.h>
> > > +#include <linux/platform_device.h>
> > > +#include <linux/regmap.h>
> > > +#include <linux/reset.h>
> > > +#include <linux/types.h>
> > > +
> > > +#include "pcie-designware.h"
> > > +
> > > +/* Baikal-T1 System CCU control registers */
> > > +#define BT1_CCU_PCIE_CLKC			0x140
> > > +#define BT1_CCU_PCIE_REQ_PCS_CLK		BIT(16)
> > > +#define BT1_CCU_PCIE_REQ_MAC_CLK		BIT(17)
> > > +#define BT1_CCU_PCIE_REQ_PIPE_CLK		BIT(18)
> > > +
> > > +#define BT1_CCU_PCIE_RSTC			0x144
> > > +#define BT1_CCU_PCIE_REQ_LINK_RST		BIT(13)
> > > +#define BT1_CCU_PCIE_REQ_SMLH_RST		BIT(14)
> > > +#define BT1_CCU_PCIE_REQ_PHY_RST		BIT(16)
> > > +#define BT1_CCU_PCIE_REQ_CORE_RST		BIT(24)
> > > +#define BT1_CCU_PCIE_REQ_STICKY_RST		BIT(26)
> > > +#define BT1_CCU_PCIE_REQ_NSTICKY_RST		BIT(27)
> > > +
> > > +#define BT1_CCU_PCIE_PMSC			0x148
> > > +#define BT1_CCU_PCIE_LTSSM_STATE_MASK		GENMASK(5, 0)
> > > +#define BT1_CCU_PCIE_LTSSM_DET_QUIET		0x00
> > > +#define BT1_CCU_PCIE_LTSSM_DET_ACT		0x01
> > > +#define BT1_CCU_PCIE_LTSSM_POLL_ACT		0x02
> > > +#define BT1_CCU_PCIE_LTSSM_POLL_COMP		0x03
> > > +#define BT1_CCU_PCIE_LTSSM_POLL_CONF		0x04
> > > +#define BT1_CCU_PCIE_LTSSM_PRE_DET_QUIET	0x05
> > > +#define BT1_CCU_PCIE_LTSSM_DET_WAIT		0x06
> > > +#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_START	0x07
> > > +#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_ACEPT	0x08
> > > +#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_WAIT	0x09
> > > +#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_ACEPT	0x0a
> > > +#define BT1_CCU_PCIE_LTSSM_CFG_COMPLETE		0x0b
> > > +#define BT1_CCU_PCIE_LTSSM_CFG_IDLE		0x0c
> > > +#define BT1_CCU_PCIE_LTSSM_RCVR_LOCK		0x0d
> > > +#define BT1_CCU_PCIE_LTSSM_RCVR_SPEED		0x0e
> > > +#define BT1_CCU_PCIE_LTSSM_RCVR_RCVRCFG		0x0f
> > > +#define BT1_CCU_PCIE_LTSSM_RCVR_IDLE		0x10
> > > +#define BT1_CCU_PCIE_LTSSM_L0			0x11
> > > +#define BT1_CCU_PCIE_LTSSM_L0S			0x12
> > > +#define BT1_CCU_PCIE_LTSSM_L123_SEND_IDLE	0x13
> > > +#define BT1_CCU_PCIE_LTSSM_L1_IDLE		0x14
> > > +#define BT1_CCU_PCIE_LTSSM_L2_IDLE		0x15
> > > +#define BT1_CCU_PCIE_LTSSM_L2_WAKE		0x16
> > > +#define BT1_CCU_PCIE_LTSSM_DIS_ENTRY		0x17
> > > +#define BT1_CCU_PCIE_LTSSM_DIS_IDLE		0x18
> > > +#define BT1_CCU_PCIE_LTSSM_DISABLE		0x19
> > > +#define BT1_CCU_PCIE_LTSSM_LPBK_ENTRY		0x1a
> > > +#define BT1_CCU_PCIE_LTSSM_LPBK_ACTIVE		0x1b
> > > +#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT		0x1c
> > > +#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT_TOUT	0x1d
> > > +#define BT1_CCU_PCIE_LTSSM_HOT_RST_ENTRY	0x1e
> > > +#define BT1_CCU_PCIE_LTSSM_HOT_RST		0x1f
> > > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ0		0x20
> > > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ1		0x21
> > > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ2		0x22
> > > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ3		0x23
> > > +#define BT1_CCU_PCIE_SMLH_LINKUP		BIT(6)
> > > +#define BT1_CCU_PCIE_RDLH_LINKUP		BIT(7)
> > > +#define BT1_CCU_PCIE_PM_LINKSTATE_L0S		BIT(8)
> > > +#define BT1_CCU_PCIE_PM_LINKSTATE_L1		BIT(9)
> > > +#define BT1_CCU_PCIE_PM_LINKSTATE_L2		BIT(10)
> > > +#define BT1_CCU_PCIE_L1_PENDING			BIT(12)
> > > +#define BT1_CCU_PCIE_REQ_EXIT_L1		BIT(14)
> > > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ		BIT(15)
> > > +#define BT1_CCU_PCIE_PM_DSTAT_MASK		GENMASK(18, 16)
> > > +#define BT1_CCU_PCIE_PM_PME_EN			BIT(20)
> > > +#define BT1_CCU_PCIE_PM_PME_STATUS		BIT(21)
> > > +#define BT1_CCU_PCIE_AUX_PM_EN			BIT(22)
> > > +#define BT1_CCU_PCIE_AUX_PWR_DET		BIT(23)
> > > +#define BT1_CCU_PCIE_WAKE_DET			BIT(24)
> > > +#define BT1_CCU_PCIE_TURNOFF_REQ		BIT(30)
> > > +#define BT1_CCU_PCIE_TURNOFF_ACK		BIT(31)
> > > +
> > > +#define BT1_CCU_PCIE_GENC			0x14c
> > > +#define BT1_CCU_PCIE_LTSSM_EN			BIT(1)
> > > +#define BT1_CCU_PCIE_DBI2_MODE			BIT(2)
> > > +#define BT1_CCU_PCIE_MGMT_EN			BIT(3)
> > > +#define BT1_CCU_PCIE_RXLANE_FLIP_EN		BIT(16)
> > > +#define BT1_CCU_PCIE_TXLANE_FLIP_EN		BIT(17)
> > > +#define BT1_CCU_PCIE_SLV_XFER_PEND		BIT(24)
> > > +#define BT1_CCU_PCIE_RCV_XFER_PEND		BIT(25)
> > > +#define BT1_CCU_PCIE_DBI_XFER_PEND		BIT(26)
> > > +#define BT1_CCU_PCIE_DMA_XFER_PEND		BIT(27)
> > > +
> > > +#define BT1_CCU_PCIE_LTSSM_LINKUP(_pmsc) \
> > > +({ \
> > > +	int __state = FIELD_GET(BT1_CCU_PCIE_LTSSM_STATE_MASK, _pmsc); \
> > > +	__state >= BT1_CCU_PCIE_LTSSM_L0 && __state <= BT1_CCU_PCIE_LTSSM_L2_WAKE; \
> > > +})
> > > +
> > > +/* Baikal-T1 PCIe specific control registers */
> > > +#define BT1_PCIE_AXI2MGM_LANENUM		0xd04
> > > +#define BT1_PCIE_AXI2MGM_LANESEL_MASK		GENMASK(3, 0)
> > > +
> > > +#define BT1_PCIE_AXI2MGM_ADDRCTL		0xd08
> > > +#define BT1_PCIE_AXI2MGM_PHYREG_ADDR_MASK	GENMASK(20, 0)
> > > +#define BT1_PCIE_AXI2MGM_READ_FLAG		BIT(29)
> > > +#define BT1_PCIE_AXI2MGM_DONE			BIT(30)
> > > +#define BT1_PCIE_AXI2MGM_BUSY			BIT(31)
> > > +
> > > +#define BT1_PCIE_AXI2MGM_WRITEDATA		0xd0c
> > > +#define BT1_PCIE_AXI2MGM_WDATA			GENMASK(15, 0)
> > > +
> > > +#define BT1_PCIE_AXI2MGM_READDATA		0xd10
> > > +#define BT1_PCIE_AXI2MGM_RDATA			GENMASK(15, 0)
> > > +
> > > +/* Generic Baikal-T1 PCIe interface resources */
> > > +#define BT1_PCIE_NUM_APP_CLKS			ARRAY_SIZE(bt1_pcie_app_clks)
> > > +#define BT1_PCIE_NUM_CORE_CLKS			ARRAY_SIZE(bt1_pcie_core_clks)
> > > +#define BT1_PCIE_NUM_APP_RSTS			ARRAY_SIZE(bt1_pcie_app_rsts)
> > > +#define BT1_PCIE_NUM_CORE_RSTS			ARRAY_SIZE(bt1_pcie_core_rsts)
> > > +
> > > +static const enum dw_pcie_app_clk bt1_pcie_app_clks[] = {
> > > +	DW_PCIE_DBI_CLK, DW_PCIE_MSTR_CLK, DW_PCIE_SLV_CLK,
> > > +};
> > > +
> > > +static const enum dw_pcie_core_clk bt1_pcie_core_clks[] = {
> > > +	DW_PCIE_REF_CLK,
> > > +};
> > > +
> > > +static const enum dw_pcie_app_rst bt1_pcie_app_rsts[] = {
> > > +	DW_PCIE_MSTR_RST, DW_PCIE_SLV_RST,
> > > +};
> > > +
> > > +static const enum dw_pcie_core_rst bt1_pcie_core_rsts[] = {
> > > +	DW_PCIE_NON_STICKY_RST, DW_PCIE_STICKY_RST, DW_PCIE_CORE_RST,
> > > +	DW_PCIE_PIPE_RST, DW_PCIE_PHY_RST, DW_PCIE_HOT_RST, DW_PCIE_PWR_RST,
> > > +};
> > > +
> > > +struct bt1_pcie {
> > > +	struct dw_pcie dw;
> > > +	struct platform_device *pdev;
> > > +	struct regmap *sys_regs;
> > > +};
> > > +#define to_bt1_pcie(_dw) container_of(_dw, struct bt1_pcie, dw)
> > > +
> > > +/*
> > > + * Baikal-T1 MMIO space must be read/written by the dword-aligned
> > > + * instructions. Note the methods are optimized to have the dword operations
> > > + * performed with minimum overhead as the most frequently used ones.
> > > + */
> > > +static int bt1_pcie_read_mmio(void __iomem *addr, int size, u32 *val)
> > > +{
> > > +	unsigned int ofs = (uintptr_t)addr & 0x3;
> > > +
> > > +	if (!IS_ALIGNED((uintptr_t)addr, size))
> > > +		return PCIBIOS_BAD_REGISTER_NUMBER;
> > 
> 
> > This isn't for PCI config space accessors, don't use PCIBIOS_*. We 
> > really want to get rid of those.
> 
> Ok. I'll drop the PCIBIOS_* macros usage from here.
> 
> > 
> > You are in control of all the accesses, so the error conditions should 
> > never happen, don't need to be checked, and don't need to be returned.
> 
> not entirely. I'd prefer to keep the error-condition check here because the
> methods are the core of the callbacks called from the generic part of the
> DW PCIe driver which if get to regress in making the unsupported IO
> accesses will be easier to debug should the sanity check is performed.

Actually, this will get called for the config accessor path, so you need 
to keep it as-is. We do want to push the PCIBIOS_* error codes out of 
the drivers though...

Rob
Serge Semin July 12, 2022, 8:58 p.m. UTC | #10
On Tue, Jul 12, 2022 at 02:29:07PM -0600, Rob Herring wrote:
> On Sun, Jun 19, 2022 at 11:39:04PM +0300, Serge Semin wrote:
> > On Wed, Jun 15, 2022 at 11:10:45AM -0600, Rob Herring wrote:
> > > On Fri, Jun 10, 2022 at 11:57:05AM +0300, Serge Semin wrote:
> > > > Baikal-T1 SoC is equipped with DWC PCIe v4.60a host controller. It can be
> > > > trained to work up to Gen.3 speed over up to x4 lanes. The host controller
> > > > is attached to the DW PCIe 3.0 PCS via the PIPE-4 interface, which in its
> > > > turn is connected to the DWC 10G PHY. The whole system is supposed to be
> > > > fed up with four clock sources: DBI peripheral clock, AXI application
> > > > clocks and external PHY/core reference clock generating the 100MHz signal.
> > > > In addition to that the platform provide a way to reset each part of the
> > > > controller: sticky/non-sticky bits, host controller core, PIPE interface,
> > > > PCS/PHY and Hot/Power reset signal. The driver also provides a way to
> > > > handle the GPIO-based PERST# signal.
> > > > 
> > > > Note due to the Baikal-T1 MMIO peculiarity we have to implement the DBI
> > > > interface accessors which make sure the IO operations are dword-aligned.
> > > > 
> > > > Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
> > > > 
> > > > ---
> > > > 
> > > > Changelog v2:
> > > > - Rename 'syscon' property to 'baikal,bt1-syscon'.
> > > > 
> > > > Changelog v3:
> > > > - Use the clocks/resets handlers defined in the DW PCIe core descriptor.
> > > >   (@Rob)
> > > > - Redefine PCI host bridge config space accessors with the generic
> > > >   pci_generic_config_read32() and pci_generic_config_write32() methods.
> > > >   (@Rob)
> > > > ---
> > > >  drivers/pci/controller/dwc/Kconfig    |   9 +
> > > >  drivers/pci/controller/dwc/Makefile   |   1 +
> > > >  drivers/pci/controller/dwc/pcie-bt1.c | 649 ++++++++++++++++++++++++++
> > > >  3 files changed, 659 insertions(+)
> > > >  create mode 100644 drivers/pci/controller/dwc/pcie-bt1.c
> > > > 
> > > > diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
> > > > index 62ce3abf0f19..771b8b146623 100644
> > > > --- a/drivers/pci/controller/dwc/Kconfig
> > > > +++ b/drivers/pci/controller/dwc/Kconfig
> > > > @@ -222,6 +222,15 @@ config PCIE_ARTPEC6_EP
> > > >  	  Enables support for the PCIe controller in the ARTPEC-6 SoC to work in
> > > >  	  endpoint mode. This uses the DesignWare core.
> > > >  
> > > > +config PCIE_BT1
> > > > +	tristate "Baikal-T1 PCIe controller"
> > > > +	depends on MIPS_BAIKAL_T1 || COMPILE_TEST
> > > > +	depends on PCI_MSI_IRQ_DOMAIN
> > > > +	select PCIE_DW_HOST
> > > > +	help
> > > > +	  Enables support for the PCIe controller in the Baikal-T1 SoC to work
> > > > +	  in host mode. It's based on the Synopsys DWC PCIe v4.60a IP-core.
> > > > +
> > > >  config PCIE_ROCKCHIP_DW_HOST
> > > >  	bool "Rockchip DesignWare PCIe controller"
> > > >  	select PCIE_DW
> > > > diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
> > > > index 8ba7b67f5e50..bf5c311875a1 100644
> > > > --- a/drivers/pci/controller/dwc/Makefile
> > > > +++ b/drivers/pci/controller/dwc/Makefile
> > > > @@ -3,6 +3,7 @@ obj-$(CONFIG_PCIE_DW) += pcie-designware.o
> > > >  obj-$(CONFIG_PCIE_DW_HOST) += pcie-designware-host.o
> > > >  obj-$(CONFIG_PCIE_DW_EP) += pcie-designware-ep.o
> > > >  obj-$(CONFIG_PCIE_DW_PLAT) += pcie-designware-plat.o
> > > > +obj-$(CONFIG_PCIE_BT1) += pcie-bt1.o
> > > >  obj-$(CONFIG_PCI_DRA7XX) += pci-dra7xx.o
> > > >  obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o
> > > >  obj-$(CONFIG_PCIE_FU740) += pcie-fu740.o
> > > > diff --git a/drivers/pci/controller/dwc/pcie-bt1.c b/drivers/pci/controller/dwc/pcie-bt1.c
> > > > new file mode 100644
> > > > index 000000000000..03f035743b78
> > > > --- /dev/null
> > > > +++ b/drivers/pci/controller/dwc/pcie-bt1.c
> > > > @@ -0,0 +1,649 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > +/*
> > > > + * Copyright (C) 2021 BAIKAL ELECTRONICS, JSC
> > > > + *
> > > > + * Authors:
> > > > + *   Vadim Vlasov <Vadim.Vlasov@baikalelectronics.ru>
> > > > + *   Serge Semin <Sergey.Semin@baikalelectronics.ru>
> > > > + *
> > > > + * Baikal-T1 PCIe controller driver
> > > > + */
> > > > +
> > > > +#include <linux/bitfield.h>
> > > > +#include <linux/bits.h>
> > > > +#include <linux/clk.h>
> > > > +#include <linux/delay.h>
> > > > +#include <linux/gpio/consumer.h>
> > > > +#include <linux/kernel.h>
> > > > +#include <linux/mfd/syscon.h>
> > > > +#include <linux/module.h>
> > > > +#include <linux/pci.h>
> > > > +#include <linux/platform_device.h>
> > > > +#include <linux/regmap.h>
> > > > +#include <linux/reset.h>
> > > > +#include <linux/types.h>
> > > > +
> > > > +#include "pcie-designware.h"
> > > > +
> > > > +/* Baikal-T1 System CCU control registers */
> > > > +#define BT1_CCU_PCIE_CLKC			0x140
> > > > +#define BT1_CCU_PCIE_REQ_PCS_CLK		BIT(16)
> > > > +#define BT1_CCU_PCIE_REQ_MAC_CLK		BIT(17)
> > > > +#define BT1_CCU_PCIE_REQ_PIPE_CLK		BIT(18)
> > > > +
> > > > +#define BT1_CCU_PCIE_RSTC			0x144
> > > > +#define BT1_CCU_PCIE_REQ_LINK_RST		BIT(13)
> > > > +#define BT1_CCU_PCIE_REQ_SMLH_RST		BIT(14)
> > > > +#define BT1_CCU_PCIE_REQ_PHY_RST		BIT(16)
> > > > +#define BT1_CCU_PCIE_REQ_CORE_RST		BIT(24)
> > > > +#define BT1_CCU_PCIE_REQ_STICKY_RST		BIT(26)
> > > > +#define BT1_CCU_PCIE_REQ_NSTICKY_RST		BIT(27)
> > > > +
> > > > +#define BT1_CCU_PCIE_PMSC			0x148
> > > > +#define BT1_CCU_PCIE_LTSSM_STATE_MASK		GENMASK(5, 0)
> > > > +#define BT1_CCU_PCIE_LTSSM_DET_QUIET		0x00
> > > > +#define BT1_CCU_PCIE_LTSSM_DET_ACT		0x01
> > > > +#define BT1_CCU_PCIE_LTSSM_POLL_ACT		0x02
> > > > +#define BT1_CCU_PCIE_LTSSM_POLL_COMP		0x03
> > > > +#define BT1_CCU_PCIE_LTSSM_POLL_CONF		0x04
> > > > +#define BT1_CCU_PCIE_LTSSM_PRE_DET_QUIET	0x05
> > > > +#define BT1_CCU_PCIE_LTSSM_DET_WAIT		0x06
> > > > +#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_START	0x07
> > > > +#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_ACEPT	0x08
> > > > +#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_WAIT	0x09
> > > > +#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_ACEPT	0x0a
> > > > +#define BT1_CCU_PCIE_LTSSM_CFG_COMPLETE		0x0b
> > > > +#define BT1_CCU_PCIE_LTSSM_CFG_IDLE		0x0c
> > > > +#define BT1_CCU_PCIE_LTSSM_RCVR_LOCK		0x0d
> > > > +#define BT1_CCU_PCIE_LTSSM_RCVR_SPEED		0x0e
> > > > +#define BT1_CCU_PCIE_LTSSM_RCVR_RCVRCFG		0x0f
> > > > +#define BT1_CCU_PCIE_LTSSM_RCVR_IDLE		0x10
> > > > +#define BT1_CCU_PCIE_LTSSM_L0			0x11
> > > > +#define BT1_CCU_PCIE_LTSSM_L0S			0x12
> > > > +#define BT1_CCU_PCIE_LTSSM_L123_SEND_IDLE	0x13
> > > > +#define BT1_CCU_PCIE_LTSSM_L1_IDLE		0x14
> > > > +#define BT1_CCU_PCIE_LTSSM_L2_IDLE		0x15
> > > > +#define BT1_CCU_PCIE_LTSSM_L2_WAKE		0x16
> > > > +#define BT1_CCU_PCIE_LTSSM_DIS_ENTRY		0x17
> > > > +#define BT1_CCU_PCIE_LTSSM_DIS_IDLE		0x18
> > > > +#define BT1_CCU_PCIE_LTSSM_DISABLE		0x19
> > > > +#define BT1_CCU_PCIE_LTSSM_LPBK_ENTRY		0x1a
> > > > +#define BT1_CCU_PCIE_LTSSM_LPBK_ACTIVE		0x1b
> > > > +#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT		0x1c
> > > > +#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT_TOUT	0x1d
> > > > +#define BT1_CCU_PCIE_LTSSM_HOT_RST_ENTRY	0x1e
> > > > +#define BT1_CCU_PCIE_LTSSM_HOT_RST		0x1f
> > > > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ0		0x20
> > > > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ1		0x21
> > > > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ2		0x22
> > > > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ3		0x23
> > > > +#define BT1_CCU_PCIE_SMLH_LINKUP		BIT(6)
> > > > +#define BT1_CCU_PCIE_RDLH_LINKUP		BIT(7)
> > > > +#define BT1_CCU_PCIE_PM_LINKSTATE_L0S		BIT(8)
> > > > +#define BT1_CCU_PCIE_PM_LINKSTATE_L1		BIT(9)
> > > > +#define BT1_CCU_PCIE_PM_LINKSTATE_L2		BIT(10)
> > > > +#define BT1_CCU_PCIE_L1_PENDING			BIT(12)
> > > > +#define BT1_CCU_PCIE_REQ_EXIT_L1		BIT(14)
> > > > +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ		BIT(15)
> > > > +#define BT1_CCU_PCIE_PM_DSTAT_MASK		GENMASK(18, 16)
> > > > +#define BT1_CCU_PCIE_PM_PME_EN			BIT(20)
> > > > +#define BT1_CCU_PCIE_PM_PME_STATUS		BIT(21)
> > > > +#define BT1_CCU_PCIE_AUX_PM_EN			BIT(22)
> > > > +#define BT1_CCU_PCIE_AUX_PWR_DET		BIT(23)
> > > > +#define BT1_CCU_PCIE_WAKE_DET			BIT(24)
> > > > +#define BT1_CCU_PCIE_TURNOFF_REQ		BIT(30)
> > > > +#define BT1_CCU_PCIE_TURNOFF_ACK		BIT(31)
> > > > +
> > > > +#define BT1_CCU_PCIE_GENC			0x14c
> > > > +#define BT1_CCU_PCIE_LTSSM_EN			BIT(1)
> > > > +#define BT1_CCU_PCIE_DBI2_MODE			BIT(2)
> > > > +#define BT1_CCU_PCIE_MGMT_EN			BIT(3)
> > > > +#define BT1_CCU_PCIE_RXLANE_FLIP_EN		BIT(16)
> > > > +#define BT1_CCU_PCIE_TXLANE_FLIP_EN		BIT(17)
> > > > +#define BT1_CCU_PCIE_SLV_XFER_PEND		BIT(24)
> > > > +#define BT1_CCU_PCIE_RCV_XFER_PEND		BIT(25)
> > > > +#define BT1_CCU_PCIE_DBI_XFER_PEND		BIT(26)
> > > > +#define BT1_CCU_PCIE_DMA_XFER_PEND		BIT(27)
> > > > +
> > > > +#define BT1_CCU_PCIE_LTSSM_LINKUP(_pmsc) \
> > > > +({ \
> > > > +	int __state = FIELD_GET(BT1_CCU_PCIE_LTSSM_STATE_MASK, _pmsc); \
> > > > +	__state >= BT1_CCU_PCIE_LTSSM_L0 && __state <= BT1_CCU_PCIE_LTSSM_L2_WAKE; \
> > > > +})
> > > > +
> > > > +/* Baikal-T1 PCIe specific control registers */
> > > > +#define BT1_PCIE_AXI2MGM_LANENUM		0xd04
> > > > +#define BT1_PCIE_AXI2MGM_LANESEL_MASK		GENMASK(3, 0)
> > > > +
> > > > +#define BT1_PCIE_AXI2MGM_ADDRCTL		0xd08
> > > > +#define BT1_PCIE_AXI2MGM_PHYREG_ADDR_MASK	GENMASK(20, 0)
> > > > +#define BT1_PCIE_AXI2MGM_READ_FLAG		BIT(29)
> > > > +#define BT1_PCIE_AXI2MGM_DONE			BIT(30)
> > > > +#define BT1_PCIE_AXI2MGM_BUSY			BIT(31)
> > > > +
> > > > +#define BT1_PCIE_AXI2MGM_WRITEDATA		0xd0c
> > > > +#define BT1_PCIE_AXI2MGM_WDATA			GENMASK(15, 0)
> > > > +
> > > > +#define BT1_PCIE_AXI2MGM_READDATA		0xd10
> > > > +#define BT1_PCIE_AXI2MGM_RDATA			GENMASK(15, 0)
> > > > +
> > > > +/* Generic Baikal-T1 PCIe interface resources */
> > > > +#define BT1_PCIE_NUM_APP_CLKS			ARRAY_SIZE(bt1_pcie_app_clks)
> > > > +#define BT1_PCIE_NUM_CORE_CLKS			ARRAY_SIZE(bt1_pcie_core_clks)
> > > > +#define BT1_PCIE_NUM_APP_RSTS			ARRAY_SIZE(bt1_pcie_app_rsts)
> > > > +#define BT1_PCIE_NUM_CORE_RSTS			ARRAY_SIZE(bt1_pcie_core_rsts)
> > > > +
> > > > +static const enum dw_pcie_app_clk bt1_pcie_app_clks[] = {
> > > > +	DW_PCIE_DBI_CLK, DW_PCIE_MSTR_CLK, DW_PCIE_SLV_CLK,
> > > > +};
> > > > +
> > > > +static const enum dw_pcie_core_clk bt1_pcie_core_clks[] = {
> > > > +	DW_PCIE_REF_CLK,
> > > > +};
> > > > +
> > > > +static const enum dw_pcie_app_rst bt1_pcie_app_rsts[] = {
> > > > +	DW_PCIE_MSTR_RST, DW_PCIE_SLV_RST,
> > > > +};
> > > > +
> > > > +static const enum dw_pcie_core_rst bt1_pcie_core_rsts[] = {
> > > > +	DW_PCIE_NON_STICKY_RST, DW_PCIE_STICKY_RST, DW_PCIE_CORE_RST,
> > > > +	DW_PCIE_PIPE_RST, DW_PCIE_PHY_RST, DW_PCIE_HOT_RST, DW_PCIE_PWR_RST,
> > > > +};
> > > > +
> > > > +struct bt1_pcie {
> > > > +	struct dw_pcie dw;
> > > > +	struct platform_device *pdev;
> > > > +	struct regmap *sys_regs;
> > > > +};
> > > > +#define to_bt1_pcie(_dw) container_of(_dw, struct bt1_pcie, dw)
> > > > +
> > > > +/*
> > > > + * Baikal-T1 MMIO space must be read/written by the dword-aligned
> > > > + * instructions. Note the methods are optimized to have the dword operations
> > > > + * performed with minimum overhead as the most frequently used ones.
> > > > + */
> > > > +static int bt1_pcie_read_mmio(void __iomem *addr, int size, u32 *val)
> > > > +{
> > > > +	unsigned int ofs = (uintptr_t)addr & 0x3;
> > > > +
> > > > +	if (!IS_ALIGNED((uintptr_t)addr, size))
> > > > +		return PCIBIOS_BAD_REGISTER_NUMBER;
> > > 
> > 
> > > This isn't for PCI config space accessors, don't use PCIBIOS_*. We 
> > > really want to get rid of those.
> > 
> > Ok. I'll drop the PCIBIOS_* macros usage from here.
> > 
> > > 
> > > You are in control of all the accesses, so the error conditions should 
> > > never happen, don't need to be checked, and don't need to be returned.
> > 
> > not entirely. I'd prefer to keep the error-condition check here because the
> > methods are the core of the callbacks called from the generic part of the
> > DW PCIe driver which if get to regress in making the unsupported IO
> > accesses will be easier to debug should the sanity check is performed.
> 

> Actually, this will get called for the config accessor path, so you need 
> to keep it as-is. We do want to push the PCIBIOS_* error codes out of 
> the drivers though...

Please clarify. By saying "keep it as-is" did you mean to keep the
PCIBIOS_* macros usage here?

Note bt1_pcie_read_mmio() and bt1_pcie_write_mmio() methods we are
talking about are used to access the DW PCIe DBI interface only in the
framework of the internal DW PCIe controller driver code. At least
AFAICS from the current DW PCIe driver implementation they won't be
called by the PCIe subsystem core neither explicitly nor implicitly.
So you were right using the PCIBIOS_* macros here wasn't required.

-Sergey

> 
> Rob
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
index 62ce3abf0f19..771b8b146623 100644
--- a/drivers/pci/controller/dwc/Kconfig
+++ b/drivers/pci/controller/dwc/Kconfig
@@ -222,6 +222,15 @@  config PCIE_ARTPEC6_EP
 	  Enables support for the PCIe controller in the ARTPEC-6 SoC to work in
 	  endpoint mode. This uses the DesignWare core.
 
+config PCIE_BT1
+	tristate "Baikal-T1 PCIe controller"
+	depends on MIPS_BAIKAL_T1 || COMPILE_TEST
+	depends on PCI_MSI_IRQ_DOMAIN
+	select PCIE_DW_HOST
+	help
+	  Enables support for the PCIe controller in the Baikal-T1 SoC to work
+	  in host mode. It's based on the Synopsys DWC PCIe v4.60a IP-core.
+
 config PCIE_ROCKCHIP_DW_HOST
 	bool "Rockchip DesignWare PCIe controller"
 	select PCIE_DW
diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
index 8ba7b67f5e50..bf5c311875a1 100644
--- a/drivers/pci/controller/dwc/Makefile
+++ b/drivers/pci/controller/dwc/Makefile
@@ -3,6 +3,7 @@  obj-$(CONFIG_PCIE_DW) += pcie-designware.o
 obj-$(CONFIG_PCIE_DW_HOST) += pcie-designware-host.o
 obj-$(CONFIG_PCIE_DW_EP) += pcie-designware-ep.o
 obj-$(CONFIG_PCIE_DW_PLAT) += pcie-designware-plat.o
+obj-$(CONFIG_PCIE_BT1) += pcie-bt1.o
 obj-$(CONFIG_PCI_DRA7XX) += pci-dra7xx.o
 obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o
 obj-$(CONFIG_PCIE_FU740) += pcie-fu740.o
diff --git a/drivers/pci/controller/dwc/pcie-bt1.c b/drivers/pci/controller/dwc/pcie-bt1.c
new file mode 100644
index 000000000000..03f035743b78
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-bt1.c
@@ -0,0 +1,649 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 BAIKAL ELECTRONICS, JSC
+ *
+ * Authors:
+ *   Vadim Vlasov <Vadim.Vlasov@baikalelectronics.ru>
+ *   Serge Semin <Sergey.Semin@baikalelectronics.ru>
+ *
+ * Baikal-T1 PCIe controller driver
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/types.h>
+
+#include "pcie-designware.h"
+
+/* Baikal-T1 System CCU control registers */
+#define BT1_CCU_PCIE_CLKC			0x140
+#define BT1_CCU_PCIE_REQ_PCS_CLK		BIT(16)
+#define BT1_CCU_PCIE_REQ_MAC_CLK		BIT(17)
+#define BT1_CCU_PCIE_REQ_PIPE_CLK		BIT(18)
+
+#define BT1_CCU_PCIE_RSTC			0x144
+#define BT1_CCU_PCIE_REQ_LINK_RST		BIT(13)
+#define BT1_CCU_PCIE_REQ_SMLH_RST		BIT(14)
+#define BT1_CCU_PCIE_REQ_PHY_RST		BIT(16)
+#define BT1_CCU_PCIE_REQ_CORE_RST		BIT(24)
+#define BT1_CCU_PCIE_REQ_STICKY_RST		BIT(26)
+#define BT1_CCU_PCIE_REQ_NSTICKY_RST		BIT(27)
+
+#define BT1_CCU_PCIE_PMSC			0x148
+#define BT1_CCU_PCIE_LTSSM_STATE_MASK		GENMASK(5, 0)
+#define BT1_CCU_PCIE_LTSSM_DET_QUIET		0x00
+#define BT1_CCU_PCIE_LTSSM_DET_ACT		0x01
+#define BT1_CCU_PCIE_LTSSM_POLL_ACT		0x02
+#define BT1_CCU_PCIE_LTSSM_POLL_COMP		0x03
+#define BT1_CCU_PCIE_LTSSM_POLL_CONF		0x04
+#define BT1_CCU_PCIE_LTSSM_PRE_DET_QUIET	0x05
+#define BT1_CCU_PCIE_LTSSM_DET_WAIT		0x06
+#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_START	0x07
+#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_ACEPT	0x08
+#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_WAIT	0x09
+#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_ACEPT	0x0a
+#define BT1_CCU_PCIE_LTSSM_CFG_COMPLETE		0x0b
+#define BT1_CCU_PCIE_LTSSM_CFG_IDLE		0x0c
+#define BT1_CCU_PCIE_LTSSM_RCVR_LOCK		0x0d
+#define BT1_CCU_PCIE_LTSSM_RCVR_SPEED		0x0e
+#define BT1_CCU_PCIE_LTSSM_RCVR_RCVRCFG		0x0f
+#define BT1_CCU_PCIE_LTSSM_RCVR_IDLE		0x10
+#define BT1_CCU_PCIE_LTSSM_L0			0x11
+#define BT1_CCU_PCIE_LTSSM_L0S			0x12
+#define BT1_CCU_PCIE_LTSSM_L123_SEND_IDLE	0x13
+#define BT1_CCU_PCIE_LTSSM_L1_IDLE		0x14
+#define BT1_CCU_PCIE_LTSSM_L2_IDLE		0x15
+#define BT1_CCU_PCIE_LTSSM_L2_WAKE		0x16
+#define BT1_CCU_PCIE_LTSSM_DIS_ENTRY		0x17
+#define BT1_CCU_PCIE_LTSSM_DIS_IDLE		0x18
+#define BT1_CCU_PCIE_LTSSM_DISABLE		0x19
+#define BT1_CCU_PCIE_LTSSM_LPBK_ENTRY		0x1a
+#define BT1_CCU_PCIE_LTSSM_LPBK_ACTIVE		0x1b
+#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT		0x1c
+#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT_TOUT	0x1d
+#define BT1_CCU_PCIE_LTSSM_HOT_RST_ENTRY	0x1e
+#define BT1_CCU_PCIE_LTSSM_HOT_RST		0x1f
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ0		0x20
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ1		0x21
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ2		0x22
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ3		0x23
+#define BT1_CCU_PCIE_SMLH_LINKUP		BIT(6)
+#define BT1_CCU_PCIE_RDLH_LINKUP		BIT(7)
+#define BT1_CCU_PCIE_PM_LINKSTATE_L0S		BIT(8)
+#define BT1_CCU_PCIE_PM_LINKSTATE_L1		BIT(9)
+#define BT1_CCU_PCIE_PM_LINKSTATE_L2		BIT(10)
+#define BT1_CCU_PCIE_L1_PENDING			BIT(12)
+#define BT1_CCU_PCIE_REQ_EXIT_L1		BIT(14)
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ		BIT(15)
+#define BT1_CCU_PCIE_PM_DSTAT_MASK		GENMASK(18, 16)
+#define BT1_CCU_PCIE_PM_PME_EN			BIT(20)
+#define BT1_CCU_PCIE_PM_PME_STATUS		BIT(21)
+#define BT1_CCU_PCIE_AUX_PM_EN			BIT(22)
+#define BT1_CCU_PCIE_AUX_PWR_DET		BIT(23)
+#define BT1_CCU_PCIE_WAKE_DET			BIT(24)
+#define BT1_CCU_PCIE_TURNOFF_REQ		BIT(30)
+#define BT1_CCU_PCIE_TURNOFF_ACK		BIT(31)
+
+#define BT1_CCU_PCIE_GENC			0x14c
+#define BT1_CCU_PCIE_LTSSM_EN			BIT(1)
+#define BT1_CCU_PCIE_DBI2_MODE			BIT(2)
+#define BT1_CCU_PCIE_MGMT_EN			BIT(3)
+#define BT1_CCU_PCIE_RXLANE_FLIP_EN		BIT(16)
+#define BT1_CCU_PCIE_TXLANE_FLIP_EN		BIT(17)
+#define BT1_CCU_PCIE_SLV_XFER_PEND		BIT(24)
+#define BT1_CCU_PCIE_RCV_XFER_PEND		BIT(25)
+#define BT1_CCU_PCIE_DBI_XFER_PEND		BIT(26)
+#define BT1_CCU_PCIE_DMA_XFER_PEND		BIT(27)
+
+#define BT1_CCU_PCIE_LTSSM_LINKUP(_pmsc) \
+({ \
+	int __state = FIELD_GET(BT1_CCU_PCIE_LTSSM_STATE_MASK, _pmsc); \
+	__state >= BT1_CCU_PCIE_LTSSM_L0 && __state <= BT1_CCU_PCIE_LTSSM_L2_WAKE; \
+})
+
+/* Baikal-T1 PCIe specific control registers */
+#define BT1_PCIE_AXI2MGM_LANENUM		0xd04
+#define BT1_PCIE_AXI2MGM_LANESEL_MASK		GENMASK(3, 0)
+
+#define BT1_PCIE_AXI2MGM_ADDRCTL		0xd08
+#define BT1_PCIE_AXI2MGM_PHYREG_ADDR_MASK	GENMASK(20, 0)
+#define BT1_PCIE_AXI2MGM_READ_FLAG		BIT(29)
+#define BT1_PCIE_AXI2MGM_DONE			BIT(30)
+#define BT1_PCIE_AXI2MGM_BUSY			BIT(31)
+
+#define BT1_PCIE_AXI2MGM_WRITEDATA		0xd0c
+#define BT1_PCIE_AXI2MGM_WDATA			GENMASK(15, 0)
+
+#define BT1_PCIE_AXI2MGM_READDATA		0xd10
+#define BT1_PCIE_AXI2MGM_RDATA			GENMASK(15, 0)
+
+/* Generic Baikal-T1 PCIe interface resources */
+#define BT1_PCIE_NUM_APP_CLKS			ARRAY_SIZE(bt1_pcie_app_clks)
+#define BT1_PCIE_NUM_CORE_CLKS			ARRAY_SIZE(bt1_pcie_core_clks)
+#define BT1_PCIE_NUM_APP_RSTS			ARRAY_SIZE(bt1_pcie_app_rsts)
+#define BT1_PCIE_NUM_CORE_RSTS			ARRAY_SIZE(bt1_pcie_core_rsts)
+
+static const enum dw_pcie_app_clk bt1_pcie_app_clks[] = {
+	DW_PCIE_DBI_CLK, DW_PCIE_MSTR_CLK, DW_PCIE_SLV_CLK,
+};
+
+static const enum dw_pcie_core_clk bt1_pcie_core_clks[] = {
+	DW_PCIE_REF_CLK,
+};
+
+static const enum dw_pcie_app_rst bt1_pcie_app_rsts[] = {
+	DW_PCIE_MSTR_RST, DW_PCIE_SLV_RST,
+};
+
+static const enum dw_pcie_core_rst bt1_pcie_core_rsts[] = {
+	DW_PCIE_NON_STICKY_RST, DW_PCIE_STICKY_RST, DW_PCIE_CORE_RST,
+	DW_PCIE_PIPE_RST, DW_PCIE_PHY_RST, DW_PCIE_HOT_RST, DW_PCIE_PWR_RST,
+};
+
+struct bt1_pcie {
+	struct dw_pcie dw;
+	struct platform_device *pdev;
+	struct regmap *sys_regs;
+};
+#define to_bt1_pcie(_dw) container_of(_dw, struct bt1_pcie, dw)
+
+/*
+ * Baikal-T1 MMIO space must be read/written by the dword-aligned
+ * instructions. Note the methods are optimized to have the dword operations
+ * performed with minimum overhead as the most frequently used ones.
+ */
+static int bt1_pcie_read_mmio(void __iomem *addr, int size, u32 *val)
+{
+	unsigned int ofs = (uintptr_t)addr & 0x3;
+
+	if (!IS_ALIGNED((uintptr_t)addr, size))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	*val = readl(addr - ofs) >> ofs * BITS_PER_BYTE;
+	if (size == 4) {
+		return PCIBIOS_SUCCESSFUL;
+	} else if (size == 2) {
+		*val &= 0xffff;
+		return PCIBIOS_SUCCESSFUL;
+	} else if (size == 1) {
+		*val &= 0xff;
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	return PCIBIOS_BAD_REGISTER_NUMBER;
+}
+
+static int bt1_pcie_write_mmio(void __iomem *addr, int size, u32 val)
+{
+	unsigned int ofs = (uintptr_t)addr & 0x3;
+	u32 tmp, mask;
+
+	if (!IS_ALIGNED((uintptr_t)addr, size))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	if (size == 4) {
+		writel(val, addr);
+		return PCIBIOS_SUCCESSFUL;
+	} else if (size == 2 || size == 1) {
+		mask = GENMASK(size * BITS_PER_BYTE - 1, 0);
+		tmp = readl(addr - ofs) & ~(mask << ofs * BITS_PER_BYTE);
+		tmp |= (val & mask) << ofs * BITS_PER_BYTE;
+		writel(tmp, addr - ofs);
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	return PCIBIOS_BAD_REGISTER_NUMBER;
+}
+
+static u32 bt1_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
+			     size_t size)
+{
+	int ret;
+	u32 val;
+
+	ret = bt1_pcie_read_mmio(base + reg, size, &val);
+	if (ret != PCIBIOS_SUCCESSFUL) {
+		dev_err(pci->dev, "Read DBI address failed\n");
+		return ~0U;
+	}
+
+	return val;
+}
+
+static void bt1_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
+			       size_t size, u32 val)
+{
+	int ret;
+
+	ret = bt1_pcie_write_mmio(base + reg, size, val);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		dev_err(pci->dev, "Write DBI address failed\n");
+}
+
+static void bt1_pcie_write_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg,
+				size_t size, u32 val)
+{
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+	int ret;
+
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_DBI2_MODE, BT1_CCU_PCIE_DBI2_MODE);
+
+	ret = bt1_pcie_write_mmio(base + reg, size, val);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		dev_err(pci->dev, "Write DBI2 address failed\n");
+
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_DBI2_MODE, 0);
+}
+
+static int bt1_pcie_start_ltssm(struct dw_pcie *pci)
+{
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+	u32 val;
+	int ret;
+
+	/*
+	 * Enable LTSSM and make sure it was able to establish both PHY and
+	 * data links. This procedure shall work fine to reach 2.5 GT/s speed.
+	 */
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_LTSSM_EN, BT1_CCU_PCIE_LTSSM_EN);
+
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
+				       (val & BT1_CCU_PCIE_SMLH_LINKUP),
+				       1000, 1000000);
+	if (ret) {
+		dev_err(pci->dev, "LTSSM failed to set PHY link up\n");
+		return ret;
+	}
+
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
+				       (val & BT1_CCU_PCIE_RDLH_LINKUP),
+				       1000, 1000000);
+	if (ret) {
+		dev_err(pci->dev, "LTSSM failed to set data link up\n");
+		return ret;
+	}
+
+	/*
+	 * Activate direct speed change after the link is established in an
+	 * attempt to reach a higher bus performance (up to Gen.3 - 8.0 GT/s).
+	 * This is required at least to get 8.0 GT/s speed.
+	 */
+	val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+	val |= PORT_LOGIC_SPEED_CHANGE;
+	dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
+				       BT1_CCU_PCIE_LTSSM_LINKUP(val),
+				       1000, 1000000);
+	if (ret)
+		dev_err(pci->dev, "LTSSM failed to get into L0 state\n");
+
+	return ret;
+}
+
+static void bt1_pcie_stop_ltssm(struct dw_pcie *pci)
+{
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_LTSSM_EN, 0);
+}
+
+struct dw_pcie_ops bt1_pcie_dw_ops = {
+	.read_dbi = bt1_pcie_read_dbi,
+	.write_dbi = bt1_pcie_write_dbi,
+	.write_dbi2 = bt1_pcie_write_dbi2,
+	.start_link = bt1_pcie_start_ltssm,
+	.stop_link = bt1_pcie_stop_ltssm,
+};
+
+static struct pci_ops bt1_pcie_ops = {
+	.map_bus = dw_pcie_own_conf_map_bus,
+	.read = pci_generic_config_read32,
+	.write = pci_generic_config_write32,
+};
+
+static int bt1_pcie_get_res(struct bt1_pcie *btpci)
+{
+	struct device *dev = btpci->dw.dev;
+	int i, ret;
+
+	/* DBI access is supposed to be performed by the dword-aligned IOs */
+	btpci->dw.pp.bridge->ops = &bt1_pcie_ops;
+
+	/* AXI-interface is configured with 64-bit address bus width */
+	ret = dma_coerce_mask_and_coherent(&btpci->dw.pp.bridge->dev,
+					   DMA_BIT_MASK(64));
+	if (ret) {
+		ret = dma_set_mask_and_coherent(&btpci->dw.pp.bridge->dev,
+						DMA_BIT_MASK(32));
+		if (ret)
+			return ret;
+	}
+
+	/* These CSRs are in MMIO so we won't check the regmap-methods status */
+	btpci->sys_regs =
+		syscon_regmap_lookup_by_phandle(dev->of_node, "baikal,bt1-syscon");
+	if (IS_ERR(btpci->sys_regs))
+		return dev_err_probe(dev, PTR_ERR(btpci->sys_regs),
+				     "Failed to get syscon\n");
+
+	/* Make sure all the required resources have been specified */
+	for (i = 0; i < BT1_PCIE_NUM_APP_CLKS; i++) {
+		if (!btpci->dw.app_clks[bt1_pcie_app_clks[i]].clk) {
+			dev_err(dev, "App clocks set is incomplete\n");
+			return -ENOENT;
+		}
+	}
+
+	for (i = 0; i < BT1_PCIE_NUM_CORE_CLKS; i++) {
+		if (!btpci->dw.core_clks[bt1_pcie_core_clks[i]].clk) {
+			dev_err(dev, "Core clocks set is incomplete\n");
+			return -ENOENT;
+		}
+	}
+
+	for (i = 0; i < BT1_PCIE_NUM_APP_RSTS; i++) {
+		if (!btpci->dw.app_rsts[bt1_pcie_app_rsts[i]].rstc) {
+			dev_err(dev, "App resets set is incomplete\n");
+			return -ENOENT;
+		}
+	}
+
+	for (i = 0; i < BT1_PCIE_NUM_CORE_RSTS; i++) {
+		if (!btpci->dw.core_rsts[bt1_pcie_core_rsts[i]].rstc) {
+			dev_err(dev, "Core resets set is incomplete\n");
+			return -ENOENT;
+		}
+	}
+
+	return 0;
+}
+
+static void bt1_pcie_full_stop_bus(struct bt1_pcie *btpci, bool init)
+{
+	struct device *dev = btpci->dw.dev;
+	struct dw_pcie *pci = &btpci->dw;
+	int ret;
+
+	/* Disable LTSSM for sure */
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_LTSSM_EN, 0);
+
+	/*
+	 * Application reset controls are trigger-based so de-assert the core
+	 * resets only.
+	 */
+	ret = reset_control_bulk_assert(DW_PCIE_NUM_CORE_RSTS, pci->core_rsts);
+	if (ret)
+		dev_err(dev, "Failed to assert core resets\n");
+
+	/*
+	 * Clocks are disabled by default at least in accordance with the clk
+	 * enable counter value on init stage.
+	 */
+	if (!init) {
+		clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
+
+		clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
+	}
+
+	/* The peripheral devices are unavailable anyway so reset them too */
+	gpiod_set_value_cansleep(pci->pe_rst, 1);
+
+	/* Make sure the reset is settled */
+	usleep_range(1, 10);
+}
+
+/*
+ * Implements the cold reset procedure in accordance with the reference manual
+ * and available PM signals.
+ */
+static int bt1_pcie_cold_start_bus(struct bt1_pcie *btpci)
+{
+	struct device *dev = btpci->dw.dev;
+	struct dw_pcie *pci = &btpci->dw;
+	u32 val;
+	int ret;
+
+	/* First get out of the Power/Hot reset state */
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert PHY reset\n");
+		return ret;
+	}
+
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert hot reset\n");
+		goto err_assert_pwr_rst;
+	}
+
+	/* Wait for the PM-core to stop requesting the PHY reset */
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
+				       !(val & BT1_CCU_PCIE_REQ_PHY_RST), 1, 1000);
+	if (ret) {
+		dev_err(dev, "Timed out waiting for PM to stop PHY resetting\n");
+		goto err_assert_hot_rst;
+	}
+
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert PHY reset\n");
+		goto err_assert_hot_rst;
+	}
+
+	/* Clocks can be now enabled, but the ref one is crucial at this stage */
+	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
+	if (ret) {
+		dev_err(dev, "Failed to enable app clocks\n");
+		goto err_assert_phy_rst;
+	}
+
+	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
+	if (ret) {
+		dev_err(dev, "Failed to enable ref clocks\n");
+		goto err_disable_app_clk;
+	}
+
+	/* Wait for the PM to stop requesting the controller core reset */
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
+				       !(val & BT1_CCU_PCIE_REQ_CORE_RST), 1, 1000);
+	if (ret) {
+		dev_err(dev, "Timed out waiting for PM to stop core resetting\n");
+		goto err_disable_core_clk;
+	}
+
+	/* PCS-PIPE interface and controller core can be now activated */
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert PIPE reset\n");
+		goto err_disable_core_clk;
+	}
+
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert core reset\n");
+		goto err_assert_pipe_rst;
+	}
+
+	/* It's recommended to reset the core and application logic together */
+	ret = reset_control_bulk_reset(DW_PCIE_NUM_APP_RSTS, pci->app_rsts);
+	if (ret) {
+		dev_err(dev, "Failed to reset app domain\n");
+		goto err_assert_core_rst;
+	}
+
+	/* Sticky/Non-sticky CSR flags can be now unreset too */
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert sticky reset\n");
+		goto err_assert_core_rst;
+	}
+
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_NON_STICKY_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert non-sticky reset\n");
+		goto err_assert_sticky_rst;
+	}
+
+	/* Activate the PCIe bus peripheral devices */
+	gpiod_set_value_cansleep(pci->pe_rst, 0);
+
+	/* Make sure the state is settled (LTSSM is still disabled though) */
+	usleep_range(1, 10);
+
+	return 0;
+
+err_assert_sticky_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
+
+err_assert_core_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
+
+err_assert_pipe_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
+
+err_disable_core_clk:
+	clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
+
+err_disable_app_clk:
+	clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
+
+err_assert_phy_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
+
+err_assert_hot_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
+
+err_assert_pwr_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
+
+	return ret;
+}
+
+static int bt1_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+	int ret;
+
+	ret = bt1_pcie_get_res(btpci);
+	if (ret)
+		return ret;
+
+	bt1_pcie_full_stop_bus(btpci, true);
+
+	return bt1_pcie_cold_start_bus(btpci);
+}
+
+static void bt1_pcie_host_deinit(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+
+	bt1_pcie_full_stop_bus(btpci, false);
+}
+
+struct dw_pcie_host_ops bt1_pcie_host_ops = {
+	.host_init = bt1_pcie_host_init,
+	.host_deinit = bt1_pcie_host_deinit,
+};
+
+static struct bt1_pcie *bt1_pcie_create_data(struct platform_device *pdev)
+{
+	struct bt1_pcie *btpci;
+
+	btpci = devm_kzalloc(&pdev->dev, sizeof(*btpci), GFP_KERNEL);
+	if (!btpci)
+		return ERR_PTR(-ENOMEM);
+
+	btpci->pdev = pdev;
+
+	platform_set_drvdata(pdev, btpci);
+
+	return btpci;
+}
+
+static int bt1_pcie_add_dw_port(struct bt1_pcie *btpci)
+{
+	struct device *dev = &btpci->pdev->dev;
+	int ret;
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (ret)
+		return ret;
+
+	btpci->dw.version = DW_PCIE_VER_460A;
+	btpci->dw.dev = dev;
+	btpci->dw.ops = &bt1_pcie_dw_ops;
+
+	btpci->dw.pp.num_vectors = MAX_MSI_IRQS;
+	btpci->dw.pp.ops = &bt1_pcie_host_ops;
+
+	dw_pcie_cap_set(&btpci->dw, REQ_RES);
+
+	ret = dw_pcie_host_init(&btpci->dw.pp);
+	if (ret)
+		dev_err_probe(dev, ret, "Failed to initialize DWC PCIe host\n");
+
+	return ret;
+}
+
+static void bt1_pcie_del_dw_port(struct bt1_pcie *btpci)
+{
+	dw_pcie_host_deinit(&btpci->dw.pp);
+}
+
+static int bt1_pcie_probe(struct platform_device *pdev)
+{
+	struct bt1_pcie *btpci;
+
+	btpci = bt1_pcie_create_data(pdev);
+	if (IS_ERR(btpci))
+		return PTR_ERR(btpci);
+
+	return bt1_pcie_add_dw_port(btpci);
+}
+
+static int bt1_pcie_remove(struct platform_device *pdev)
+{
+	struct bt1_pcie *btpci = platform_get_drvdata(pdev);
+
+	bt1_pcie_del_dw_port(btpci);
+
+	return 0;
+}
+
+static const struct of_device_id bt1_pcie_of_match[] = {
+	{ .compatible = "baikal,bt1-pcie" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, bt1_pcie_of_match);
+
+static struct platform_driver bt1_pcie_driver = {
+	.probe = bt1_pcie_probe,
+	.remove = bt1_pcie_remove,
+	.driver = {
+		.name	= "bt1-pcie",
+		.of_match_table = bt1_pcie_of_match,
+	},
+};
+module_platform_driver(bt1_pcie_driver);
+
+MODULE_AUTHOR("Serge Semin <Sergey.Semin@baikalelectronics.ru>");
+MODULE_DESCRIPTION("Baikal-T1 PCIe driver");
+MODULE_LICENSE("GPL");