diff mbox

[RFC,3/3] PCI: tegra: Support driver unbinding

Message ID 1376392346-14127-4-git-send-email-treding@nvidia.com
State Not Applicable
Headers show

Commit Message

Thierry Reding Aug. 13, 2013, 11:12 a.m. UTC
Implement the platform driver's .remove() callback to free all resources
allocated during driver setup and call pci_common_exit() to cleanup ARM
specific datastructures. Unmap the fixed PCI I/O mapping by calling the
new pci_iounmap_io() function in the new .teardown() callback.

Finally, no longer set the .suppress_bind_attrs field to true to allow
the driver to unbind from a device.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/pci/host/pci-tegra.c | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

Comments

Stephen Warren Aug. 14, 2013, 9:43 p.m. UTC | #1
On 08/13/2013 05:12 AM, Thierry Reding wrote:
> Implement the platform driver's .remove() callback to free all resources
> allocated during driver setup and call pci_common_exit() to cleanup ARM
> specific datastructures. Unmap the fixed PCI I/O mapping by calling the
> new pci_iounmap_io() function in the new .teardown() callback.
> 
> Finally, no longer set the .suppress_bind_attrs field to true to allow
> the driver to unbind from a device.

> +static int tegra_pcie_remove(struct platform_device *pdev)
> +{
> +	struct tegra_pcie *pcie = platform_get_drvdata(pdev);
> +	struct tegra_pcie_bus *bus, *tmp;
> +	int err;
> +
> +	pci_common_exit(&pcie->sys);
> +
> +	list_for_each_entry_safe(bus, tmp, &pcie->busses, list) {
> +		vunmap(bus->area->addr);
> +		kfree(bus);
> +	}
> +
> +	if (IS_ENABLED(CONFIG_PCI_MSI)) {
> +		err = tegra_pcie_disable_msi(pcie);
> +		if (err < 0)
> +			return err;
> +	}

Wouldn't it make sense to do that as early as possible in the function,
to make sure that no MSI accidentally fires after some of the cleanup
has already happened?

> +
> +	err = tegra_pcie_put_resources(pcie);
> +	if (err < 0)
> +		return err;
> +
> +	return 0;
> +}

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thierry Reding Aug. 15, 2013, 10:34 a.m. UTC | #2
On Wed, Aug 14, 2013 at 03:43:40PM -0600, Stephen Warren wrote:
> On 08/13/2013 05:12 AM, Thierry Reding wrote:
> > Implement the platform driver's .remove() callback to free all resources
> > allocated during driver setup and call pci_common_exit() to cleanup ARM
> > specific datastructures. Unmap the fixed PCI I/O mapping by calling the
> > new pci_iounmap_io() function in the new .teardown() callback.
> > 
> > Finally, no longer set the .suppress_bind_attrs field to true to allow
> > the driver to unbind from a device.
> 
> > +static int tegra_pcie_remove(struct platform_device *pdev)
> > +{
> > +	struct tegra_pcie *pcie = platform_get_drvdata(pdev);
> > +	struct tegra_pcie_bus *bus, *tmp;
> > +	int err;
> > +
> > +	pci_common_exit(&pcie->sys);
> > +
> > +	list_for_each_entry_safe(bus, tmp, &pcie->busses, list) {
> > +		vunmap(bus->area->addr);
> > +		kfree(bus);
> > +	}
> > +
> > +	if (IS_ENABLED(CONFIG_PCI_MSI)) {
> > +		err = tegra_pcie_disable_msi(pcie);
> > +		if (err < 0)
> > +			return err;
> > +	}
> 
> Wouldn't it make sense to do that as early as possible in the function,
> to make sure that no MSI accidentally fires after some of the cleanup
> has already happened?

I don't think that's strictly necessary in this case. After the call to
pci_common_exit() there are no PCI devices left, there's not even a bus
left. All MSI users should have cleaned up after themselves.

Given that I thought it more useful to mirror the setup done in .probe()
to make it clearer what's being undone (and potentially what's missing).

Thierry
Stephen Warren Aug. 15, 2013, 3:21 p.m. UTC | #3
On 08/15/2013 04:34 AM, Thierry Reding wrote:
> On Wed, Aug 14, 2013 at 03:43:40PM -0600, Stephen Warren wrote:
>> On 08/13/2013 05:12 AM, Thierry Reding wrote:
>>> Implement the platform driver's .remove() callback to free all
>>> resources allocated during driver setup and call
>>> pci_common_exit() to cleanup ARM specific datastructures. Unmap
>>> the fixed PCI I/O mapping by calling the new pci_iounmap_io()
>>> function in the new .teardown() callback.
>>> 
>>> Finally, no longer set the .suppress_bind_attrs field to true
>>> to allow the driver to unbind from a device.
>> 
>>> +static int tegra_pcie_remove(struct platform_device *pdev) +{ 
>>> +	struct tegra_pcie *pcie = platform_get_drvdata(pdev); +
>>> struct tegra_pcie_bus *bus, *tmp; +	int err; + +
>>> pci_common_exit(&pcie->sys); + +	list_for_each_entry_safe(bus,
>>> tmp, &pcie->busses, list) { +		vunmap(bus->area->addr); +
>>> kfree(bus); +	} + +	if (IS_ENABLED(CONFIG_PCI_MSI)) { +		err =
>>> tegra_pcie_disable_msi(pcie); +		if (err < 0) +			return err; +
>>> }
>> 
>> Wouldn't it make sense to do that as early as possible in the
>> function, to make sure that no MSI accidentally fires after some
>> of the cleanup has already happened?
> 
> I don't think that's strictly necessary in this case. After the
> call to pci_common_exit() there are no PCI devices left, there's
> not even a bus left. All MSI users should have cleaned up after
> themselves.
> 
> Given that I thought it more useful to mirror the setup done in
> .probe() to make it clearer what's being undone (and potentially
> what's missing).

That makes sense SW-wise, but what about mis-behaving HW that triggers
an MSI even when it's been told not to? I assume that
tegra_pcie_disable_msi() unrequests the IRQ, hence solves that
problem, if done early enough.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thierry Reding Aug. 19, 2013, 8:16 p.m. UTC | #4
On Thu, Aug 15, 2013 at 09:21:53AM -0600, Stephen Warren wrote:
> On 08/15/2013 04:34 AM, Thierry Reding wrote:
> > On Wed, Aug 14, 2013 at 03:43:40PM -0600, Stephen Warren wrote:
> >> On 08/13/2013 05:12 AM, Thierry Reding wrote:
> >>> Implement the platform driver's .remove() callback to free all
> >>> resources allocated during driver setup and call
> >>> pci_common_exit() to cleanup ARM specific datastructures. Unmap
> >>> the fixed PCI I/O mapping by calling the new pci_iounmap_io()
> >>> function in the new .teardown() callback.
> >>> 
> >>> Finally, no longer set the .suppress_bind_attrs field to true
> >>> to allow the driver to unbind from a device.
> >> 
> >>> +static int tegra_pcie_remove(struct platform_device *pdev) +{ 
> >>> +	struct tegra_pcie *pcie = platform_get_drvdata(pdev); +
> >>> struct tegra_pcie_bus *bus, *tmp; +	int err; + +
> >>> pci_common_exit(&pcie->sys); + +	list_for_each_entry_safe(bus,
> >>> tmp, &pcie->busses, list) { +		vunmap(bus->area->addr); +
> >>> kfree(bus); +	} + +	if (IS_ENABLED(CONFIG_PCI_MSI)) { +		err =
> >>> tegra_pcie_disable_msi(pcie); +		if (err < 0) +			return err; +
> >>> }
> >> 
> >> Wouldn't it make sense to do that as early as possible in the
> >> function, to make sure that no MSI accidentally fires after some
> >> of the cleanup has already happened?
> > 
> > I don't think that's strictly necessary in this case. After the
> > call to pci_common_exit() there are no PCI devices left, there's
> > not even a bus left. All MSI users should have cleaned up after
> > themselves.
> > 
> > Given that I thought it more useful to mirror the setup done in
> > .probe() to make it clearer what's being undone (and potentially
> > what's missing).
> 
> That makes sense SW-wise, but what about mis-behaving HW that triggers
> an MSI even when it's been told not to? I assume that
> tegra_pcie_disable_msi() unrequests the IRQ, hence solves that
> problem, if done early enough.

To be honest, I'm not sure about the side-effects that this will have.
tegra_pcie_disable_msi() does quite a bit more than just masking the
interrupts. It also completely removes the IRQ domain that provides the
MSI interrupts. While I haven't tried it yet I can imagine that it will
cause crashes at a later point when drivers want to disable MSI on a
device and the IRQ domain having vanished from underneath.

Thierry
Stephen Warren Aug. 19, 2013, 8:55 p.m. UTC | #5
On 08/19/2013 02:16 PM, Thierry Reding wrote:
> On Thu, Aug 15, 2013 at 09:21:53AM -0600, Stephen Warren wrote:
>> On 08/15/2013 04:34 AM, Thierry Reding wrote:
>>> On Wed, Aug 14, 2013 at 03:43:40PM -0600, Stephen Warren
>>> wrote:
>>>> On 08/13/2013 05:12 AM, Thierry Reding wrote:
>>>>> Implement the platform driver's .remove() callback to free
>>>>> all resources allocated during driver setup and call 
>>>>> pci_common_exit() to cleanup ARM specific datastructures.
>>>>> Unmap the fixed PCI I/O mapping by calling the new
>>>>> pci_iounmap_io() function in the new .teardown() callback.
>>>>> 
>>>>> Finally, no longer set the .suppress_bind_attrs field to
>>>>> true to allow the driver to unbind from a device.
>>>> 
>>>>> +static int tegra_pcie_remove(struct platform_device *pdev)
>>>>> +{ +	struct tegra_pcie *pcie = platform_get_drvdata(pdev);
>>>>> + struct tegra_pcie_bus *bus, *tmp; +	int err; + + 
>>>>> pci_common_exit(&pcie->sys); + +
>>>>> list_for_each_entry_safe(bus, tmp, &pcie->busses, list) { +
>>>>> vunmap(bus->area->addr); + kfree(bus); +	} + +	if
>>>>> (IS_ENABLED(CONFIG_PCI_MSI)) { +		err = 
>>>>> tegra_pcie_disable_msi(pcie); +		if (err < 0) +			return
>>>>> err; + }
>>>> 
>>>> Wouldn't it make sense to do that as early as possible in
>>>> the function, to make sure that no MSI accidentally fires
>>>> after some of the cleanup has already happened?
>>> 
>>> I don't think that's strictly necessary in this case. After
>>> the call to pci_common_exit() there are no PCI devices left,
>>> there's not even a bus left. All MSI users should have cleaned
>>> up after themselves.
>>> 
>>> Given that I thought it more useful to mirror the setup done
>>> in .probe() to make it clearer what's being undone (and
>>> potentially what's missing).
>> 
>> That makes sense SW-wise, but what about mis-behaving HW that
>> triggers an MSI even when it's been told not to? I assume that 
>> tegra_pcie_disable_msi() unrequests the IRQ, hence solves that 
>> problem, if done early enough.
> 
> To be honest, I'm not sure about the side-effects that this will
> have. tegra_pcie_disable_msi() does quite a bit more than just
> masking the interrupts. It also completely removes the IRQ domain
> that provides the MSI interrupts. While I haven't tried it yet I
> can imagine that it will cause crashes at a later point when
> drivers want to disable MSI on a device and the IRQ domain having
> vanished from underneath.

Surely by the time the PCIe controller device has been remove()d then
all devices for PCIe "client" devices have also been removed.

But I guess the problem is if the controller is added back, yet the
IRQ resources aren't re-parsed under the new IRQ domain? Still, that
seems like an unrelated issue to exactly where the MSI IRQ domain gets
cleaned up in the host controller's remove().
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thierry Reding Aug. 19, 2013, 9:52 p.m. UTC | #6
On Mon, Aug 19, 2013 at 02:55:44PM -0600, Stephen Warren wrote:
> On 08/19/2013 02:16 PM, Thierry Reding wrote:
> > On Thu, Aug 15, 2013 at 09:21:53AM -0600, Stephen Warren wrote:
> >> On 08/15/2013 04:34 AM, Thierry Reding wrote:
> >>> On Wed, Aug 14, 2013 at 03:43:40PM -0600, Stephen Warren
> >>> wrote:
> >>>> On 08/13/2013 05:12 AM, Thierry Reding wrote:
> >>>>> Implement the platform driver's .remove() callback to free
> >>>>> all resources allocated during driver setup and call 
> >>>>> pci_common_exit() to cleanup ARM specific datastructures.
> >>>>> Unmap the fixed PCI I/O mapping by calling the new
> >>>>> pci_iounmap_io() function in the new .teardown() callback.
> >>>>> 
> >>>>> Finally, no longer set the .suppress_bind_attrs field to
> >>>>> true to allow the driver to unbind from a device.
> >>>> 
> >>>>> +static int tegra_pcie_remove(struct platform_device *pdev)
> >>>>> +{ +	struct tegra_pcie *pcie = platform_get_drvdata(pdev);
> >>>>> + struct tegra_pcie_bus *bus, *tmp; +	int err; + + 
> >>>>> pci_common_exit(&pcie->sys); + +
> >>>>> list_for_each_entry_safe(bus, tmp, &pcie->busses, list) { +
> >>>>> vunmap(bus->area->addr); + kfree(bus); +	} + +	if
> >>>>> (IS_ENABLED(CONFIG_PCI_MSI)) { +		err = 
> >>>>> tegra_pcie_disable_msi(pcie); +		if (err < 0) +			return
> >>>>> err; + }
> >>>> 
> >>>> Wouldn't it make sense to do that as early as possible in
> >>>> the function, to make sure that no MSI accidentally fires
> >>>> after some of the cleanup has already happened?
> >>> 
> >>> I don't think that's strictly necessary in this case. After
> >>> the call to pci_common_exit() there are no PCI devices left,
> >>> there's not even a bus left. All MSI users should have cleaned
> >>> up after themselves.
> >>> 
> >>> Given that I thought it more useful to mirror the setup done
> >>> in .probe() to make it clearer what's being undone (and
> >>> potentially what's missing).
> >> 
> >> That makes sense SW-wise, but what about mis-behaving HW that
> >> triggers an MSI even when it's been told not to? I assume that 
> >> tegra_pcie_disable_msi() unrequests the IRQ, hence solves that 
> >> problem, if done early enough.
> > 
> > To be honest, I'm not sure about the side-effects that this will
> > have. tegra_pcie_disable_msi() does quite a bit more than just
> > masking the interrupts. It also completely removes the IRQ domain
> > that provides the MSI interrupts. While I haven't tried it yet I
> > can imagine that it will cause crashes at a later point when
> > drivers want to disable MSI on a device and the IRQ domain having
> > vanished from underneath.
> 
> Surely by the time the PCIe controller device has been remove()d then
> all devices for PCIe "client" devices have also been removed.

The PCIe controller is the device being removed. Part of that removal
involves stopping and removing all PCI devices. That's done as part of
pci_common_exit().

But I was under the impression that you were arguing that the call to
tegra_pcie_disable_msi() should be the first call in .remove() in order
to prevent any spurious MSIs from occurring. That in turn would mean
calling tegra_pcie_disable_msi() before pci_common_exit(), and that
would lead to the problem that I described.

> But I guess the problem is if the controller is added back, yet the
> IRQ resources aren't re-parsed under the new IRQ domain? Still, that
> seems like an unrelated issue to exactly where the MSI IRQ domain gets
> cleaned up in the host controller's remove().

I don't think that should be a problem. Given that both the MSI IRQ
domain and the PCI devices will be setup from scratch I don't see how
any stale resources could mess things up.

Thierry
Stephen Warren Aug. 19, 2013, 9:59 p.m. UTC | #7
On 08/19/2013 03:52 PM, Thierry Reding wrote:
> On Mon, Aug 19, 2013 at 02:55:44PM -0600, Stephen Warren wrote:
>> On 08/19/2013 02:16 PM, Thierry Reding wrote:
>>> On Thu, Aug 15, 2013 at 09:21:53AM -0600, Stephen Warren
>>> wrote:
>>>> On 08/15/2013 04:34 AM, Thierry Reding wrote:
>>>>> On Wed, Aug 14, 2013 at 03:43:40PM -0600, Stephen Warren 
>>>>> wrote:
>>>>>> On 08/13/2013 05:12 AM, Thierry Reding wrote:
>>>>>>> Implement the platform driver's .remove() callback to
>>>>>>> free all resources allocated during driver setup and
>>>>>>> call pci_common_exit() to cleanup ARM specific
>>>>>>> datastructures. Unmap the fixed PCI I/O mapping by
>>>>>>> calling the new pci_iounmap_io() function in the new
>>>>>>> .teardown() callback.
>>>>>>> 
>>>>>>> Finally, no longer set the .suppress_bind_attrs field
>>>>>>> to true to allow the driver to unbind from a device.
>>>>>> 
>>>>>>> +static int tegra_pcie_remove(struct platform_device
>>>>>>> *pdev) +{ +	struct tegra_pcie *pcie =
>>>>>>> platform_get_drvdata(pdev); + struct tegra_pcie_bus
>>>>>>> *bus, *tmp; +	int err; + + pci_common_exit(&pcie->sys);
>>>>>>> + + list_for_each_entry_safe(bus, tmp, &pcie->busses,
>>>>>>> list) { + vunmap(bus->area->addr); + kfree(bus); +	} +
>>>>>>> +	if (IS_ENABLED(CONFIG_PCI_MSI)) { +		err = 
>>>>>>> tegra_pcie_disable_msi(pcie); +		if (err < 0) +
>>>>>>> return err; + }
>>>>>> 
>>>>>> Wouldn't it make sense to do that as early as possible
>>>>>> in the function, to make sure that no MSI accidentally
>>>>>> fires after some of the cleanup has already happened?
>>>>> 
>>>>> I don't think that's strictly necessary in this case.
>>>>> After the call to pci_common_exit() there are no PCI
>>>>> devices left, there's not even a bus left. All MSI users
>>>>> should have cleaned up after themselves.
>>>>> 
>>>>> Given that I thought it more useful to mirror the setup
>>>>> done in .probe() to make it clearer what's being undone
>>>>> (and potentially what's missing).
>>>> 
>>>> That makes sense SW-wise, but what about mis-behaving HW
>>>> that triggers an MSI even when it's been told not to? I
>>>> assume that tegra_pcie_disable_msi() unrequests the IRQ,
>>>> hence solves that problem, if done early enough.
>>> 
>>> To be honest, I'm not sure about the side-effects that this
>>> will have. tegra_pcie_disable_msi() does quite a bit more than
>>> just masking the interrupts. It also completely removes the IRQ
>>> domain that provides the MSI interrupts. While I haven't tried
>>> it yet I can imagine that it will cause crashes at a later
>>> point when drivers want to disable MSI on a device and the IRQ
>>> domain having vanished from underneath.
>> 
>> Surely by the time the PCIe controller device has been remove()d
>> then all devices for PCIe "client" devices have also been
>> removed.
> 
> The PCIe controller is the device being removed. Part of that
> removal involves stopping and removing all PCI devices. That's done
> as part of pci_common_exit().
> 
> But I was under the impression that you were arguing that the call
> to tegra_pcie_disable_msi() should be the first call in .remove()
> in order to prevent any spurious MSIs from occurring. That in turn
> would mean calling tegra_pcie_disable_msi() before
> pci_common_exit(), and that would lead to the problem that I
> described.

Earlier yes, but perhaps not first. Right now the order is:

1) pci_common_exit

2) unmap some memory, free some buses

3) tear down MSI infra-structure

I suppose if the MSI IRQ handler is guaranteed to never touch the
stuff torn down by (2) then the code is fine as-is, but it might be
clearer to do the following instead:

1) pci_common_exit

2) tear down MSI infra-structure (and indeed unregister non-MSI IRQ too)

3) unmap some memory, free some buses

... since then no matter what, no IRQ can be handled before any
resource is torn down.

also, perhaps when initially responding I missed that
pci_common_exit() is what forcibly removed all PCIe "client" device
drivers; in other sub-systems, client devices take refcounts on their
resources, so the resource can't be .remove()d until all the objects
referencing them have been manually removed, but PCIe apparently works
the other way around - removing the controller removes all the users?
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index 7356741..396f352 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -250,6 +250,7 @@  struct tegra_pcie {
 	int irq;
 
 	struct list_head busses;
+	struct list_head sys;
 	struct resource *cs;
 
 	struct resource io;
@@ -666,6 +667,11 @@  static struct pci_bus *tegra_pcie_scan_bus(int nr, struct pci_sys_data *sys)
 	return bus;
 }
 
+static void tegra_pcie_teardown(int nr, struct pci_sys_data *sys)
+{
+	pci_iounmap_io(nr * SZ_64K);
+}
+
 static irqreturn_t tegra_pcie_isr(int irq, void *arg)
 {
 	const char *err_msg[] = {
@@ -1583,7 +1589,9 @@  static int tegra_pcie_enable(struct tegra_pcie *pcie)
 	hw.map_irq = tegra_pcie_map_irq;
 	hw.add_bus = tegra_pcie_add_bus;
 	hw.scan = tegra_pcie_scan_bus;
+	hw.teardown = tegra_pcie_teardown;
 	hw.ops = &tegra_pcie_ops;
+	hw.sys = &pcie->sys;
 
 	pci_common_init_dev(pcie->dev, &hw);
 
@@ -1637,6 +1645,7 @@  static int tegra_pcie_probe(struct platform_device *pdev)
 
 	INIT_LIST_HEAD(&pcie->busses);
 	INIT_LIST_HEAD(&pcie->ports);
+	INIT_LIST_HEAD(&pcie->sys);
 	pcie->soc_data = match->data;
 	pcie->dev = &pdev->dev;
 
@@ -1686,14 +1695,40 @@  put_resources:
 	return err;
 }
 
+static int tegra_pcie_remove(struct platform_device *pdev)
+{
+	struct tegra_pcie *pcie = platform_get_drvdata(pdev);
+	struct tegra_pcie_bus *bus, *tmp;
+	int err;
+
+	pci_common_exit(&pcie->sys);
+
+	list_for_each_entry_safe(bus, tmp, &pcie->busses, list) {
+		vunmap(bus->area->addr);
+		kfree(bus);
+	}
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		err = tegra_pcie_disable_msi(pcie);
+		if (err < 0)
+			return err;
+	}
+
+	err = tegra_pcie_put_resources(pcie);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
 static struct platform_driver tegra_pcie_driver = {
 	.driver = {
 		.name = "tegra-pcie",
 		.owner = THIS_MODULE,
 		.of_match_table = tegra_pcie_of_match,
-		.suppress_bind_attrs = true,
 	},
 	.probe = tegra_pcie_probe,
+	.remove = tegra_pcie_remove,
 };
 module_platform_driver(tegra_pcie_driver);