diff mbox

[v2,3/5] PCI: rockchip: add remove() support

Message ID 20170310024617.67303-3-briannorris@chromium.org
State Accepted
Headers show

Commit Message

Brian Norris March 10, 2017, 2:46 a.m. UTC
Currently, if we try to unbind the platform device, the remove will
succeed, but the removal won't undo most of the registration, leaving
partially-configured PCI devices in the system.

This allows, for example, a simple 'lspci' to crash the system, as it
will try to touch the freed (via devm_*) driver structures.

So let's implement device remove().

Signed-off-by: Brian Norris <briannorris@chromium.org>
---
v2:
* unmap IO space with pci_unmap_iospace()
* remove IRQ domain
---
 drivers/pci/host/pcie-rockchip.c | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

Comments

Shawn Lin March 10, 2017, 3:22 a.m. UTC | #1
On 2017/3/10 10:46, Brian Norris wrote:
> Currently, if we try to unbind the platform device, the remove will
> succeed, but the removal won't undo most of the registration, leaving
> partially-configured PCI devices in the system.
>
> This allows, for example, a simple 'lspci' to crash the system, as it
> will try to touch the freed (via devm_*) driver structures.
>
> So let's implement device remove().
>

As this patchset seems to be merged together so I think the following
warning will be ok? if my git-am robot only pick your patch 1->compile->
patch 2->compile->patch 3 then

drivers/pci/host/pcie-rockchip.c: In function 'rockchip_pcie_remove':
drivers/pci/host/pcie-rockchip.c:1435:2: error: implicit declaration of 
function 'pci_unmap_iospace' [-Werror=implicit-function-declaration]
   pci_unmap_iospace(rockchip->io);

but I guess you may need to move your patch 4 ahead of patch 3?


> Signed-off-by: Brian Norris <briannorris@chromium.org>
> ---
> v2:
> * unmap IO space with pci_unmap_iospace()
> * remove IRQ domain
> ---
>  drivers/pci/host/pcie-rockchip.c | 36 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 34 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
> index 5d7b27b1e941..d2e5078ae331 100644
> --- a/drivers/pci/host/pcie-rockchip.c
> +++ b/drivers/pci/host/pcie-rockchip.c
> @@ -223,9 +223,11 @@ struct rockchip_pcie {
>  	int	link_gen;
>  	struct	device *dev;
>  	struct	irq_domain *irq_domain;
> -	u32     io_size;
>  	int     offset;
> +	struct pci_bus *root_bus;
> +	struct resource *io;
>  	phys_addr_t io_bus_addr;
> +	u32     io_size;
>  	void    __iomem *msg_region;
>  	u32     mem_size;
>  	phys_addr_t msg_bus_addr;
> @@ -1360,6 +1362,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
>  					 err, io);
>  				continue;
>  			}
> +			rockchip->io = io;
>  			break;
>  		case IORESOURCE_MEM:
>  			mem = win->res;
> @@ -1391,6 +1394,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
>  		err = -ENOMEM;
>  		goto err_free_res;
>  	}
> +	rockchip->root_bus = bus;
>
>  	pci_bus_size_bridges(bus);
>  	pci_bus_assign_resources(bus);
> @@ -1421,6 +1425,34 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
>  	return err;
>  }
>
> +static int rockchip_pcie_remove(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct rockchip_pcie *rockchip = dev_get_drvdata(dev);
> +
> +	pci_stop_root_bus(rockchip->root_bus);
> +	pci_remove_root_bus(rockchip->root_bus);
> +	pci_unmap_iospace(rockchip->io);
> +	irq_domain_remove(rockchip->irq_domain);
> +
> +	phy_power_off(rockchip->phy);
> +	phy_exit(rockchip->phy);
> +
> +	clk_disable_unprepare(rockchip->clk_pcie_pm);
> +	clk_disable_unprepare(rockchip->hclk_pcie);
> +	clk_disable_unprepare(rockchip->aclk_perf_pcie);
> +	clk_disable_unprepare(rockchip->aclk_pcie);
> +
> +	if (!IS_ERR(rockchip->vpcie3v3))
> +		regulator_disable(rockchip->vpcie3v3);
> +	if (!IS_ERR(rockchip->vpcie1v8))
> +		regulator_disable(rockchip->vpcie1v8);
> +	if (!IS_ERR(rockchip->vpcie0v9))
> +		regulator_disable(rockchip->vpcie0v9);
> +
> +	return 0;
> +}
> +
>  static const struct dev_pm_ops rockchip_pcie_pm_ops = {
>  	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(rockchip_pcie_suspend_noirq,
>  				      rockchip_pcie_resume_noirq)
> @@ -1438,6 +1470,6 @@ static struct platform_driver rockchip_pcie_driver = {
>  		.pm = &rockchip_pcie_pm_ops,
>  	},
>  	.probe = rockchip_pcie_probe,
> -
> +	.remove = rockchip_pcie_remove,
>  };
>  builtin_platform_driver(rockchip_pcie_driver);
>
Shawn Lin March 10, 2017, 4:20 a.m. UTC | #2
On 2017/3/10 11:22, Shawn Lin wrote:
> On 2017/3/10 10:46, Brian Norris wrote:
>> Currently, if we try to unbind the platform device, the remove will
>> succeed, but the removal won't undo most of the registration, leaving
>> partially-configured PCI devices in the system.
>>
>> This allows, for example, a simple 'lspci' to crash the system, as it
>> will try to touch the freed (via devm_*) driver structures.
>>
>> So let's implement device remove().
>>
>
> As this patchset seems to be merged together so I think the following
> warning will be ok? if my git-am robot only pick your patch 1->compile->
> patch 2->compile->patch 3 then
>
> drivers/pci/host/pcie-rockchip.c: In function 'rockchip_pcie_remove':
> drivers/pci/host/pcie-rockchip.c:1435:2: error: implicit declaration of
> function 'pci_unmap_iospace' [-Werror=implicit-function-declaration]
>   pci_unmap_iospace(rockchip->io);
>
> but I guess you may need to move your patch 4 ahead of patch 3?
>

Well, I am not sure if something is wrong here.

But when booting up the system for the first time, we got
[    0.527263] PCI host bridge /pcie@f8000000 ranges:
[    0.527293]   MEM 0xfa000000..0xfa5fffff -> 0xfa000000
[    0.527308]    IO 0xfa600000..0xfa6fffff -> 0xfa600000
[    0.527544] rockchip-pcie f8000000.pcie: PCI host bridge to bus 0000:0

so the hierarchy(lspci -t) looks like:
lspci -t
-[0000:00]---00.0-[01]----00.0

and lspci
0000:00:00.0 Class 0604: Device 1d87:0100
0001:01:00.0 Class 0108: Device 8086:f1a5 (rev 03)

but if I did unbind and bind, the bus number is different.

lspci
0001:00:00.0 Class 0604: Device 1d87:0100
0001:01:00.0 Class 0108: Device 8086:f1a5 (rev 03)

lspci -t
-+-[0001:00]---00.0-[01]----00.0
  \-[0000:00]-

This hierarchy looks wrong to me.

>
>> Signed-off-by: Brian Norris <briannorris@chromium.org>
>> ---
>> v2:
>> * unmap IO space with pci_unmap_iospace()
>> * remove IRQ domain
>> ---
>>  drivers/pci/host/pcie-rockchip.c | 36
>> ++++++++++++++++++++++++++++++++++--
>>  1 file changed, 34 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/pci/host/pcie-rockchip.c
>> b/drivers/pci/host/pcie-rockchip.c
>> index 5d7b27b1e941..d2e5078ae331 100644
>> --- a/drivers/pci/host/pcie-rockchip.c
>> +++ b/drivers/pci/host/pcie-rockchip.c
>> @@ -223,9 +223,11 @@ struct rockchip_pcie {
>>      int    link_gen;
>>      struct    device *dev;
>>      struct    irq_domain *irq_domain;
>> -    u32     io_size;
>>      int     offset;
>> +    struct pci_bus *root_bus;
>> +    struct resource *io;
>>      phys_addr_t io_bus_addr;
>> +    u32     io_size;
>>      void    __iomem *msg_region;
>>      u32     mem_size;
>>      phys_addr_t msg_bus_addr;
>> @@ -1360,6 +1362,7 @@ static int rockchip_pcie_probe(struct
>> platform_device *pdev)
>>                       err, io);
>>                  continue;
>>              }
>> +            rockchip->io = io;
>>              break;
>>          case IORESOURCE_MEM:
>>              mem = win->res;
>> @@ -1391,6 +1394,7 @@ static int rockchip_pcie_probe(struct
>> platform_device *pdev)
>>          err = -ENOMEM;
>>          goto err_free_res;
>>      }
>> +    rockchip->root_bus = bus;
>>
>>      pci_bus_size_bridges(bus);
>>      pci_bus_assign_resources(bus);
>> @@ -1421,6 +1425,34 @@ static int rockchip_pcie_probe(struct
>> platform_device *pdev)
>>      return err;
>>  }
>>
>> +static int rockchip_pcie_remove(struct platform_device *pdev)
>> +{
>> +    struct device *dev = &pdev->dev;
>> +    struct rockchip_pcie *rockchip = dev_get_drvdata(dev);
>> +
>> +    pci_stop_root_bus(rockchip->root_bus);
>> +    pci_remove_root_bus(rockchip->root_bus);
>> +    pci_unmap_iospace(rockchip->io);
>> +    irq_domain_remove(rockchip->irq_domain);
>> +
>> +    phy_power_off(rockchip->phy);
>> +    phy_exit(rockchip->phy);
>> +
>> +    clk_disable_unprepare(rockchip->clk_pcie_pm);
>> +    clk_disable_unprepare(rockchip->hclk_pcie);
>> +    clk_disable_unprepare(rockchip->aclk_perf_pcie);
>> +    clk_disable_unprepare(rockchip->aclk_pcie);
>> +
>> +    if (!IS_ERR(rockchip->vpcie3v3))
>> +        regulator_disable(rockchip->vpcie3v3);
>> +    if (!IS_ERR(rockchip->vpcie1v8))
>> +        regulator_disable(rockchip->vpcie1v8);
>> +    if (!IS_ERR(rockchip->vpcie0v9))
>> +        regulator_disable(rockchip->vpcie0v9);
>> +
>> +    return 0;
>> +}
>> +
>>  static const struct dev_pm_ops rockchip_pcie_pm_ops = {
>>      SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(rockchip_pcie_suspend_noirq,
>>                        rockchip_pcie_resume_noirq)
>> @@ -1438,6 +1470,6 @@ static struct platform_driver
>> rockchip_pcie_driver = {
>>          .pm = &rockchip_pcie_pm_ops,
>>      },
>>      .probe = rockchip_pcie_probe,
>> -
>> +    .remove = rockchip_pcie_remove,
>>  };
>>  builtin_platform_driver(rockchip_pcie_driver);
>>
>
>
Brian Norris March 10, 2017, 7:40 p.m. UTC | #3
On Fri, Mar 10, 2017 at 12:20:54PM +0800, Shawn Lin wrote:
> On 2017/3/10 11:22, Shawn Lin wrote:
> >On 2017/3/10 10:46, Brian Norris wrote:
> >>Currently, if we try to unbind the platform device, the remove will
> >>succeed, but the removal won't undo most of the registration, leaving
> >>partially-configured PCI devices in the system.
> >>
> >>This allows, for example, a simple 'lspci' to crash the system, as it
> >>will try to touch the freed (via devm_*) driver structures.
> >>
> >>So let's implement device remove().
> >>
> >
> >As this patchset seems to be merged together so I think the following
> >warning will be ok? if my git-am robot only pick your patch 1->compile->
> >patch 2->compile->patch 3 then
> >
> >drivers/pci/host/pcie-rockchip.c: In function 'rockchip_pcie_remove':
> >drivers/pci/host/pcie-rockchip.c:1435:2: error: implicit declaration of
> >function 'pci_unmap_iospace' [-Werror=implicit-function-declaration]
> >  pci_unmap_iospace(rockchip->io);

I'm not sure what you're doing here, but when I test patches 1-3, this
all compiles fine for me. Maybe you're testing an old kernel that does
not have pci_unmap_iospace()?

> >but I guess you may need to move your patch 4 ahead of patch 3?

No. Patch 4 is only necessary for building modules that can use those
functions; your PCIe driver doesn't build as a module until patch 5.

I'm going to guess that you're testing your hacky vendor tree, and not
pure upstream.

> Well, I am not sure if something is wrong here.
> 
> But when booting up the system for the first time, we got
> [    0.527263] PCI host bridge /pcie@f8000000 ranges:
> [    0.527293]   MEM 0xfa000000..0xfa5fffff -> 0xfa000000
> [    0.527308]    IO 0xfa600000..0xfa6fffff -> 0xfa600000
> [    0.527544] rockchip-pcie f8000000.pcie: PCI host bridge to bus 0000:0
> 
> so the hierarchy(lspci -t) looks like:
> lspci -t
> -[0000:00]---00.0-[01]----00.0
> 
> and lspci
> 0000:00:00.0 Class 0604: Device 1d87:0100
> 0001:01:00.0 Class 0108: Device 8086:f1a5 (rev 03)
> 
> but if I did unbind and bind, the bus number is different.
> 
> lspci
> 0001:00:00.0 Class 0604: Device 1d87:0100
> 0001:01:00.0 Class 0108: Device 8086:f1a5 (rev 03)
> 
> lspci -t
> -+-[0001:00]---00.0-[01]----00.0
>  \-[0000:00]-
> 
> This hierarchy looks wrong to me.

That looks like it's somewhat an artifact of lspci's tree view, which
manufactures the 0000:00 root.

I might comment on your "RFT" patch too but... it does touch on the
problem (that the domain numbers don't get reused), but I don't think
it's a good idea. What if we add another domain after the Rockchip
PCIe domain? You'll clobber all the domain allocations the first time
you remove the Rockchip one. Instead, if we want to actually stabilize
this indexing across hotplug, we need the core PCI code to take care of
this for us. e.g., maybe use one of the existing indexing ID mechanisms
in the kernel, like IDR?

Anyway, other than the bad lspci -t output, is there any actual bug
here? I didn't think the domain numbers were actually so special.

Brian
Shawn Lin March 13, 2017, 2:26 a.m. UTC | #4
On 2017/3/11 3:40, Brian Norris wrote:
> On Fri, Mar 10, 2017 at 12:20:54PM +0800, Shawn Lin wrote:
>> On 2017/3/10 11:22, Shawn Lin wrote:
>>> On 2017/3/10 10:46, Brian Norris wrote:
>>>> Currently, if we try to unbind the platform device, the remove will
>>>> succeed, but the removal won't undo most of the registration, leaving
>>>> partially-configured PCI devices in the system.
>>>>
>>>> This allows, for example, a simple 'lspci' to crash the system, as it
>>>> will try to touch the freed (via devm_*) driver structures.
>>>>
>>>> So let's implement device remove().
>>>>
>>>
>>> As this patchset seems to be merged together so I think the following
>>> warning will be ok? if my git-am robot only pick your patch 1->compile->
>>> patch 2->compile->patch 3 then
>>>
>>> drivers/pci/host/pcie-rockchip.c: In function 'rockchip_pcie_remove':
>>> drivers/pci/host/pcie-rockchip.c:1435:2: error: implicit declaration of
>>> function 'pci_unmap_iospace' [-Werror=implicit-function-declaration]
>>>  pci_unmap_iospace(rockchip->io);
>
> I'm not sure what you're doing here, but when I test patches 1-3, this
> all compiles fine for me. Maybe you're testing an old kernel that does
> not have pci_unmap_iospace()?
>
>>> but I guess you may need to move your patch 4 ahead of patch 3?
>
> No. Patch 4 is only necessary for building modules that can use those
> functions; your PCIe driver doesn't build as a module until patch 5.
>
> I'm going to guess that you're testing your hacky vendor tree, and not
> pure upstream.
>
>> Well, I am not sure if something is wrong here.
>>
>> But when booting up the system for the first time, we got
>> [    0.527263] PCI host bridge /pcie@f8000000 ranges:
>> [    0.527293]   MEM 0xfa000000..0xfa5fffff -> 0xfa000000
>> [    0.527308]    IO 0xfa600000..0xfa6fffff -> 0xfa600000
>> [    0.527544] rockchip-pcie f8000000.pcie: PCI host bridge to bus 0000:0
>>
>> so the hierarchy(lspci -t) looks like:
>> lspci -t
>> -[0000:00]---00.0-[01]----00.0
>>
>> and lspci
>> 0000:00:00.0 Class 0604: Device 1d87:0100
>> 0001:01:00.0 Class 0108: Device 8086:f1a5 (rev 03)
>>
>> but if I did unbind and bind, the bus number is different.
>>
>> lspci
>> 0001:00:00.0 Class 0604: Device 1d87:0100
>> 0001:01:00.0 Class 0108: Device 8086:f1a5 (rev 03)
>>
>> lspci -t
>> -+-[0001:00]---00.0-[01]----00.0
>>  \-[0000:00]-
>>
>> This hierarchy looks wrong to me.
>
> That looks like it's somewhat an artifact of lspci's tree view, which
> manufactures the 0000:00 root.
>
> I might comment on your "RFT" patch too but... it does touch on the
> problem (that the domain numbers don't get reused), but I don't think
> it's a good idea. What if we add another domain after the Rockchip
> PCIe domain? You'll clobber all the domain allocations the first time
> you remove the Rockchip one. Instead, if we want to actually stabilize
> this indexing across hotplug, we need the core PCI code to take care of
> this for us. e.g., maybe use one of the existing indexing ID mechanisms
> in the kernel, like IDR?
>
> Anyway, other than the bad lspci -t output, is there any actual bug
> here? I didn't think the domain numbers were actually so special.
>

No, it works fine. But I am going to guess
(1) is there any code or user-space binary care about the domain
numbers, so it will complain about this?
(2) if there are two doamins for PCI hierarchy, so when I unbind
and bind one of them continuously, is it possible that finally they
will share the same domain number as the domain number would be
overflow ? But actually they didn't share the same domain. :)

I just thought we should fix the domain number here by adding
"linux,pci-domain = <0>" for rk3399.dtsi, which seems more wise
to me now. Does it make sense to you?


> Brian
>
>
>
Brian Norris March 20, 2017, 10:29 p.m. UTC | #5
On Mon, Mar 13, 2017 at 10:26:12AM +0800, Shawn Lin wrote:
> I just thought we should fix the domain number here by adding
> "linux,pci-domain = <0>" for rk3399.dtsi, which seems more wise
> to me now. Does it make sense to you?

I think that's fine (as noted in response to your patch). That doesn't
really prevent this from being a core PCI domain bug though...

BTW, I've been using this patch set to do some repeated remove/probe
testing, and aside from the domain renumbering question and a small
memory leak noticed by kmemleak (an existing bug; sending a patch
shortly), this has been working well for me.

Brian
Bjorn Helgaas March 24, 2017, 2:25 p.m. UTC | #6
On Thu, Mar 09, 2017 at 06:46:15PM -0800, Brian Norris wrote:
> Currently, if we try to unbind the platform device, the remove will
> succeed, but the removal won't undo most of the registration, leaving
> partially-configured PCI devices in the system.
> 
> This allows, for example, a simple 'lspci' to crash the system, as it
> will try to touch the freed (via devm_*) driver structures.
> 
> So let's implement device remove().

How exactly do you reproduce this problem?

There are several other drivers that are superficially similar, e.g.,
they define a struct platform_driver without a .remove method.  Do
they all have this problem?  Some of them do set .suppress_bind_attrs
= true; is that relevant to this scenario?

In fact, the only other callers of pci_remove_root_bus() are
iproc_pcie_remove(), hv_pci_remove(), and vmd_remove().

These don't have .remove:

  imx6_pcie_driver
  ls_pcie_driver
  armada8k_pcie_driver
  artpec6_pcie_driver
  dw_plat_pcie_driver
  hisi_pcie_driver
  hisi_pcie_almost_ecam_driver
  spear13xx_pcie_driver
  gen_pci_driver

These don't have .remove but do set .suppress_bind_attrs = true:

  dra7xx_pcie_driver
  qcom_pcie_driver
  advk_pcie_driver
  mvebu_pcie_driver
  rcar_pci_driver
  rcar_pcie_driver
  tegra_pcie_driver
  altera_pcie_driver
  nwl_pcie_driver
  xilinx_pcie_driver


> Signed-off-by: Brian Norris <briannorris@chromium.org>
> ---
> v2:
> * unmap IO space with pci_unmap_iospace()
> * remove IRQ domain
> ---
>  drivers/pci/host/pcie-rockchip.c | 36 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 34 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
> index 5d7b27b1e941..d2e5078ae331 100644
> --- a/drivers/pci/host/pcie-rockchip.c
> +++ b/drivers/pci/host/pcie-rockchip.c
> @@ -223,9 +223,11 @@ struct rockchip_pcie {
>  	int	link_gen;
>  	struct	device *dev;
>  	struct	irq_domain *irq_domain;
> -	u32     io_size;
>  	int     offset;
> +	struct pci_bus *root_bus;
> +	struct resource *io;
>  	phys_addr_t io_bus_addr;
> +	u32     io_size;
>  	void    __iomem *msg_region;
>  	u32     mem_size;
>  	phys_addr_t msg_bus_addr;
> @@ -1360,6 +1362,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
>  					 err, io);
>  				continue;
>  			}
> +			rockchip->io = io;
>  			break;
>  		case IORESOURCE_MEM:
>  			mem = win->res;
> @@ -1391,6 +1394,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
>  		err = -ENOMEM;
>  		goto err_free_res;
>  	}
> +	rockchip->root_bus = bus;
>  
>  	pci_bus_size_bridges(bus);
>  	pci_bus_assign_resources(bus);
> @@ -1421,6 +1425,34 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
>  	return err;
>  }
>  
> +static int rockchip_pcie_remove(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct rockchip_pcie *rockchip = dev_get_drvdata(dev);
> +
> +	pci_stop_root_bus(rockchip->root_bus);
> +	pci_remove_root_bus(rockchip->root_bus);
> +	pci_unmap_iospace(rockchip->io);
> +	irq_domain_remove(rockchip->irq_domain);
> +
> +	phy_power_off(rockchip->phy);
> +	phy_exit(rockchip->phy);
> +
> +	clk_disable_unprepare(rockchip->clk_pcie_pm);
> +	clk_disable_unprepare(rockchip->hclk_pcie);
> +	clk_disable_unprepare(rockchip->aclk_perf_pcie);
> +	clk_disable_unprepare(rockchip->aclk_pcie);
> +
> +	if (!IS_ERR(rockchip->vpcie3v3))
> +		regulator_disable(rockchip->vpcie3v3);
> +	if (!IS_ERR(rockchip->vpcie1v8))
> +		regulator_disable(rockchip->vpcie1v8);
> +	if (!IS_ERR(rockchip->vpcie0v9))
> +		regulator_disable(rockchip->vpcie0v9);
> +
> +	return 0;
> +}
> +
>  static const struct dev_pm_ops rockchip_pcie_pm_ops = {
>  	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(rockchip_pcie_suspend_noirq,
>  				      rockchip_pcie_resume_noirq)
> @@ -1438,6 +1470,6 @@ static struct platform_driver rockchip_pcie_driver = {
>  		.pm = &rockchip_pcie_pm_ops,
>  	},
>  	.probe = rockchip_pcie_probe,
> -
> +	.remove = rockchip_pcie_remove,
>  };
>  builtin_platform_driver(rockchip_pcie_driver);
> -- 
> 2.12.0.246.ga2ecc84866-goog
>
Brian Norris March 24, 2017, 5:22 p.m. UTC | #7
Hi Bjorn,

On Fri, Mar 24, 2017 at 09:25:41AM -0500, Bjorn Helgaas wrote:
> On Thu, Mar 09, 2017 at 06:46:15PM -0800, Brian Norris wrote:
> > Currently, if we try to unbind the platform device, the remove will
> > succeed, but the removal won't undo most of the registration, leaving
> > partially-configured PCI devices in the system.
> > 
> > This allows, for example, a simple 'lspci' to crash the system, as it
> > will try to touch the freed (via devm_*) driver structures.
> > 
> > So let's implement device remove().
> 
> How exactly do you reproduce this problem?

On RK3399:

  # echo f8000000.pcie > /sys/bus/platform/drivers/rockchip-pcie/unbind
  # lspci

> There are several other drivers that are superficially similar, e.g.,
> they define a struct platform_driver without a .remove method.  Do
> they all have this problem?  Some of them do set .suppress_bind_attrs
> = true; is that relevant to this scenario?

Yes, I think .suppress_bind_attrs would be enough to prevent this,
according to my reading of the code and comments:

 * @suppress_bind_attrs: Disables bind/unbind via sysfs.

> In fact, the only other callers of pci_remove_root_bus() are
> iproc_pcie_remove(), hv_pci_remove(), and vmd_remove().

Then iProc would suffer from the same memory leak in
of_pci_get_host_bridge_resources() [1]. It *would* suffer from the same
domain allocation issues in of_pci_bus_find_domain_nr() ->
pci_get_new_domain_nr() [2], except that all iProc device trees (in
mainline at least) use the 'linux,pci-domain' property to avoid it.

HyperV and VMD drivers use ACPI, which uses neither
pci_get_new_domain_nr() nor of_pci_get_host_bridge_resources().

> These don't have .remove:
> 
>   imx6_pcie_driver
>   ls_pcie_driver
>   armada8k_pcie_driver
>   artpec6_pcie_driver
>   dw_plat_pcie_driver
>   hisi_pcie_driver
>   hisi_pcie_almost_ecam_driver
>   spear13xx_pcie_driver
>   gen_pci_driver

I think these are all technically broken.

> These don't have .remove but do set .suppress_bind_attrs = true:
> 
>   dra7xx_pcie_driver
>   qcom_pcie_driver
>   advk_pcie_driver
>   mvebu_pcie_driver
>   rcar_pci_driver
>   rcar_pcie_driver
>   tegra_pcie_driver
>   altera_pcie_driver
>   nwl_pcie_driver
>   xilinx_pcie_driver

Those are fine then, I suppose.

Brian

[1] PCI: return resource_entry in pci_add_resource helpers
    https://patchwork.kernel.org/patch/9642229/
    of/pci: Fix memory leak in of_pci_get_host_bridge_resources
    https://patchwork.kernel.org/patch/9642231/

[2] PCI: use IDA to manage domain number if not getting it from DT
    https://patchwork.kernel.org/patch/9638353/
Bjorn Helgaas March 30, 2017, 11:28 p.m. UTC | #8
On Fri, Mar 24, 2017 at 10:22:19AM -0700, Brian Norris wrote:
> Hi Bjorn,
> 
> On Fri, Mar 24, 2017 at 09:25:41AM -0500, Bjorn Helgaas wrote:
> > On Thu, Mar 09, 2017 at 06:46:15PM -0800, Brian Norris wrote:
> > > Currently, if we try to unbind the platform device, the remove will
> > > succeed, but the removal won't undo most of the registration, leaving
> > > partially-configured PCI devices in the system.
> > > 
> > > This allows, for example, a simple 'lspci' to crash the system, as it
> > > will try to touch the freed (via devm_*) driver structures.
> > > 
> > > So let's implement device remove().
> > 
> > How exactly do you reproduce this problem?
> 
> On RK3399:
> 
>   # echo f8000000.pcie > /sys/bus/platform/drivers/rockchip-pcie/unbind
>   # lspci
> 
> > There are several other drivers that are superficially similar, e.g.,
> > they define a struct platform_driver without a .remove method.  Do
> > they all have this problem?  Some of them do set .suppress_bind_attrs
> > = true; is that relevant to this scenario?
> 
> Yes, I think .suppress_bind_attrs would be enough to prevent this,
> according to my reading of the code and comments:
> 
>  * @suppress_bind_attrs: Disables bind/unbind via sysfs.
> 
> > In fact, the only other callers of pci_remove_root_bus() are
> > iproc_pcie_remove(), hv_pci_remove(), and vmd_remove().
> 
> Then iProc would suffer from the same memory leak in
> of_pci_get_host_bridge_resources() [1]. It *would* suffer from the same
> domain allocation issues in of_pci_bus_find_domain_nr() ->
> pci_get_new_domain_nr() [2], except that all iProc device trees (in
> mainline at least) use the 'linux,pci-domain' property to avoid it.
> 
> HyperV and VMD drivers use ACPI, which uses neither
> pci_get_new_domain_nr() nor of_pci_get_host_bridge_resources().
> 
> > These don't have .remove:
> > 
> >   imx6_pcie_driver
> >   ls_pcie_driver
> >   armada8k_pcie_driver
> >   artpec6_pcie_driver
> >   dw_plat_pcie_driver
> >   hisi_pcie_driver
> >   hisi_pcie_almost_ecam_driver
> >   spear13xx_pcie_driver
> >   gen_pci_driver
> 
> I think these are all technically broken.

Can we fix them all at the same time as you fix Rockchip?  Maybe we
should have a series that adds ".suppress_bind_attrs = true" to all
these drivers, including Rockchip.  Then you could have this current 
series to make Rockchip modular on top, if there's still value in it.

If we find a common problem, I'd like to fix it everywhere we know
about so it doesn't get forgotten or copied to even more places.

> > These don't have .remove but do set .suppress_bind_attrs = true:
> > 
> >   dra7xx_pcie_driver
> >   qcom_pcie_driver
> >   advk_pcie_driver
> >   mvebu_pcie_driver
> >   rcar_pci_driver
> >   rcar_pcie_driver
> >   tegra_pcie_driver
> >   altera_pcie_driver
> >   nwl_pcie_driver
> >   xilinx_pcie_driver
> 
> Those are fine then, I suppose.
> 
> Brian
> 
> [1] PCI: return resource_entry in pci_add_resource helpers
>     https://patchwork.kernel.org/patch/9642229/
>     of/pci: Fix memory leak in of_pci_get_host_bridge_resources
>     https://patchwork.kernel.org/patch/9642231/
> 
> [2] PCI: use IDA to manage domain number if not getting it from DT
>     https://patchwork.kernel.org/patch/9638353/
Brian Norris March 31, 2017, 12:26 a.m. UTC | #9
Hi Bjorn,

On Thu, Mar 30, 2017 at 06:28:25PM -0500, Bjorn Helgaas wrote:
> On Fri, Mar 24, 2017 at 10:22:19AM -0700, Brian Norris wrote:
> > On Fri, Mar 24, 2017 at 09:25:41AM -0500, Bjorn Helgaas wrote:
> > > These don't have .remove:
> > > 
> > >   imx6_pcie_driver
> > >   ls_pcie_driver
> > >   armada8k_pcie_driver
> > >   artpec6_pcie_driver
> > >   dw_plat_pcie_driver
> > >   hisi_pcie_driver
> > >   hisi_pcie_almost_ecam_driver
> > >   spear13xx_pcie_driver
> > >   gen_pci_driver
> > 
> > I think these are all technically broken.
> 
> Can we fix them all at the same time as you fix Rockchip?  Maybe we
> should have a series that adds ".suppress_bind_attrs = true" to all
> these drivers,

Sure, I can do that.

> including Rockchip.

Huh? Why? So I can revert that in the next patch?

> Then you could have this current 
> series to make Rockchip modular on top, if there's still value in it.

I do see value in it. That's the whole reason I wrote this patchset.
It's useful for stressing out certain behaviors that will happen all the
time (i.e., boot-time initialization, from platform probe, to bus init,
to client/EP init), via repeated bind/unbind (or modprobe/rmmod). It's
much faster than reboot testing.

Personally, I'd rather just patch the other drivers, and you can wait
until I follow through on that promise before applying my existing work
for the Rockchip driver, if that's what you'd prefer.

> If we find a common problem, I'd like to fix it everywhere we know
> about so it doesn't get forgotten or copied to even more places.

Sure. But you only just pointed out how broken several drivers were; I
didn't really notice :)

Brian
Bjorn Helgaas March 31, 2017, 5:17 a.m. UTC | #10
On Thu, Mar 30, 2017 at 05:26:09PM -0700, Brian Norris wrote:
> Hi Bjorn,
> 
> On Thu, Mar 30, 2017 at 06:28:25PM -0500, Bjorn Helgaas wrote:
> > On Fri, Mar 24, 2017 at 10:22:19AM -0700, Brian Norris wrote:
> > > On Fri, Mar 24, 2017 at 09:25:41AM -0500, Bjorn Helgaas wrote:
> > > > These don't have .remove:
> > > > 
> > > >   imx6_pcie_driver
> > > >   ls_pcie_driver
> > > >   armada8k_pcie_driver
> > > >   artpec6_pcie_driver
> > > >   dw_plat_pcie_driver
> > > >   hisi_pcie_driver
> > > >   hisi_pcie_almost_ecam_driver
> > > >   spear13xx_pcie_driver
> > > >   gen_pci_driver
> > > 
> > > I think these are all technically broken.
> > 
> > Can we fix them all at the same time as you fix Rockchip?  Maybe we
> > should have a series that adds ".suppress_bind_attrs = true" to all
> > these drivers,
> 
> Sure, I can do that.
> 
> > including Rockchip.
> 
> Huh? Why? So I can revert that in the next patch?
> 
> > Then you could have this current 
> > series to make Rockchip modular on top, if there's still value in it.
> 
> I do see value in it. That's the whole reason I wrote this patchset.
> It's useful for stressing out certain behaviors that will happen all the
> time (i.e., boot-time initialization, from platform probe, to bus init,
> to client/EP init), via repeated bind/unbind (or modprobe/rmmod). It's
> much faster than reboot testing.

I didn't phrase that very well.  There's certainly value in stressing
the bind/unbind paths, but I thought the primary reason you wrote this
was to fix the fact that you could crash the system like this:

  # echo f8000000.pcie > /sys/bus/platform/drivers/rockchip-pcie/unbind
  # lspci

From my point of view, that's the issue that *has* to be fixed.
Better test coverage is icing.

It sounds like several drivers have that same issue, and the simplest
possible fix is to set .suppress_bind_attrs, so I suggested doing that 
so it's easy to analyze the tree as a whole and say "these drivers
all have the same problem, and all the fixes look the same."

I guess if you'd rather skip that for Rockchip and apply a more
complicated fix there, I could go along with that.  But I don't think
it would hurt anything to set .suppress_bind_attrs, then remove it
when you add module support.  The concepts of .suppress_bind_attrs and
modularity are related, and doing this in a separate patch would make
it a nice example to follow if somebody wants to make other drivers
modular as well.

> Personally, I'd rather just patch the other drivers, and you can wait
> until I follow through on that promise before applying my existing work
> for the Rockchip driver, if that's what you'd prefer.

It's not so much a question of using the Rockchip change as a stick.
I'm just thinking that it makes a more logical progression to fix the
more important issue globally first.

> > If we find a common problem, I'd like to fix it everywhere we know
> > about so it doesn't get forgotten or copied to even more places.
> 
> Sure. But you only just pointed out how broken several drivers were; I
> didn't really notice :)

Yeah, you're right, I had in my head the idea that if we've identified
the same problem in several drivers, we should fix them all, but I
neglected to turn that into words.

Bjorn
Brian Norris March 31, 2017, 4:40 p.m. UTC | #11
Hi Bjorn,

On Fri, Mar 31, 2017 at 12:17:02AM -0500, Bjorn Helgaas wrote:
> On Thu, Mar 30, 2017 at 05:26:09PM -0700, Brian Norris wrote:
> > On Thu, Mar 30, 2017 at 06:28:25PM -0500, Bjorn Helgaas wrote:
> > > Can we fix them all at the same time as you fix Rockchip?  Maybe we
> > > should have a series that adds ".suppress_bind_attrs = true" to all
> > > these drivers,
> > 
> > Sure, I can do that.
> > 
> > > including Rockchip.
> > 
> > Huh? Why? So I can revert that in the next patch?
> > 
> > > Then you could have this current 
> > > series to make Rockchip modular on top, if there's still value in it.
> > 
> > I do see value in it. That's the whole reason I wrote this patchset.
> > It's useful for stressing out certain behaviors that will happen all the
> > time (i.e., boot-time initialization, from platform probe, to bus init,
> > to client/EP init), via repeated bind/unbind (or modprobe/rmmod). It's
> > much faster than reboot testing.
> 
> I didn't phrase that very well.  There's certainly value in stressing
> the bind/unbind paths, but I thought the primary reason you wrote this
> was to fix the fact that you could crash the system like this:
> 
>   # echo f8000000.pcie > /sys/bus/platform/drivers/rockchip-pcie/unbind
>   # lspci

Well, they're kinda two sides of the same coin; I was wanting to test
the bind path, and when I tried this, I noticed that I could trivially
crash the system. The crash seemed like a more important thing to
document (because otherwise, it just looks like I'm adding a feature).

> From my point of view, that's the issue that *has* to be fixed.
> Better test coverage is icing.

I didn't really view messing with /sys/.../unbind as a big issue,
outside of development and testing (there's a lot of damage a malicious
actor can do with unconstrained access to /sys/), so I guess I didn't
put that aspect as super-high priority. If you'd like to prioritize
that, then I'm OK with that.

> It sounds like several drivers have that same issue, and the simplest
> possible fix is to set .suppress_bind_attrs, so I suggested doing that 
> so it's easy to analyze the tree as a whole and say "these drivers
> all have the same problem, and all the fixes look the same."

Sure, that is the simplest approach.

> I guess if you'd rather skip that for Rockchip and apply a more
> complicated fix there, I could go along with that.  But I don't think
> it would hurt anything to set .suppress_bind_attrs, then remove it
> when you add module support.  The concepts of .suppress_bind_attrs and
> modularity are related, and doing this in a separate patch would make
> it a nice example to follow if somebody wants to make other drivers
> modular as well.

I'll leave that up to you, and I can resubmit things if desired. As you
have since noticed, I already sent a patch to add .suppress_bind_attrs
to all the other drivers. If you'd like, feel free to add
pcie-rockchip.c into that mix, it's not hard -- or I can redo it myself.
Then I can modify and resend (or you can do the trivial modification
required to) the current patch set.

Just let me know.

> > Personally, I'd rather just patch the other drivers, and you can wait
> > until I follow through on that promise before applying my existing work
> > for the Rockchip driver, if that's what you'd prefer.
> 
> It's not so much a question of using the Rockchip change as a stick.
> I'm just thinking that it makes a more logical progression to fix the
> more important issue globally first.

Sure, I can grok that. Just let me know if you want any more action from
me.

Brian
Brian Norris April 11, 2017, 6:18 p.m. UTC | #12
Hi Bjorn,

On Fri, Mar 31, 2017 at 09:40:15AM -0700, Brian Norris wrote:
> Sure, I can grok that. Just let me know if you want any more action from
> me.

Any thoughts here? Would you like me to prepare my patches any
differently?

Brian
diff mbox

Patch

diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
index 5d7b27b1e941..d2e5078ae331 100644
--- a/drivers/pci/host/pcie-rockchip.c
+++ b/drivers/pci/host/pcie-rockchip.c
@@ -223,9 +223,11 @@  struct rockchip_pcie {
 	int	link_gen;
 	struct	device *dev;
 	struct	irq_domain *irq_domain;
-	u32     io_size;
 	int     offset;
+	struct pci_bus *root_bus;
+	struct resource *io;
 	phys_addr_t io_bus_addr;
+	u32     io_size;
 	void    __iomem *msg_region;
 	u32     mem_size;
 	phys_addr_t msg_bus_addr;
@@ -1360,6 +1362,7 @@  static int rockchip_pcie_probe(struct platform_device *pdev)
 					 err, io);
 				continue;
 			}
+			rockchip->io = io;
 			break;
 		case IORESOURCE_MEM:
 			mem = win->res;
@@ -1391,6 +1394,7 @@  static int rockchip_pcie_probe(struct platform_device *pdev)
 		err = -ENOMEM;
 		goto err_free_res;
 	}
+	rockchip->root_bus = bus;
 
 	pci_bus_size_bridges(bus);
 	pci_bus_assign_resources(bus);
@@ -1421,6 +1425,34 @@  static int rockchip_pcie_probe(struct platform_device *pdev)
 	return err;
 }
 
+static int rockchip_pcie_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rockchip_pcie *rockchip = dev_get_drvdata(dev);
+
+	pci_stop_root_bus(rockchip->root_bus);
+	pci_remove_root_bus(rockchip->root_bus);
+	pci_unmap_iospace(rockchip->io);
+	irq_domain_remove(rockchip->irq_domain);
+
+	phy_power_off(rockchip->phy);
+	phy_exit(rockchip->phy);
+
+	clk_disable_unprepare(rockchip->clk_pcie_pm);
+	clk_disable_unprepare(rockchip->hclk_pcie);
+	clk_disable_unprepare(rockchip->aclk_perf_pcie);
+	clk_disable_unprepare(rockchip->aclk_pcie);
+
+	if (!IS_ERR(rockchip->vpcie3v3))
+		regulator_disable(rockchip->vpcie3v3);
+	if (!IS_ERR(rockchip->vpcie1v8))
+		regulator_disable(rockchip->vpcie1v8);
+	if (!IS_ERR(rockchip->vpcie0v9))
+		regulator_disable(rockchip->vpcie0v9);
+
+	return 0;
+}
+
 static const struct dev_pm_ops rockchip_pcie_pm_ops = {
 	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(rockchip_pcie_suspend_noirq,
 				      rockchip_pcie_resume_noirq)
@@ -1438,6 +1470,6 @@  static struct platform_driver rockchip_pcie_driver = {
 		.pm = &rockchip_pcie_pm_ops,
 	},
 	.probe = rockchip_pcie_probe,
-
+	.remove = rockchip_pcie_remove,
 };
 builtin_platform_driver(rockchip_pcie_driver);