diff mbox series

[RFC,8/9] PCI/pwrctl: add PCI power control core code

Message ID 20240201155532.49707-9-brgl@bgdev.pl
State New
Headers show
Series power: sequencing: implement the subsystem and add first users | expand

Commit Message

Bartosz Golaszewski Feb. 1, 2024, 3:55 p.m. UTC
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>

Some PCI devices must be powered-on before they can be detected on the
bus. Introduce a simple framework reusing the existing PCI OF
infrastructure.

The way this works is: a DT node representing a PCI device connected to
the port can be matched against its power control platform driver. If
the match succeeds, the driver is responsible for powering-up the device
and calling pcie_pwrctl_device_enable() which will trigger a PCI bus
rescan as well as subscribe to PCI bus notifications.

When the device is detected and created, we'll make it consume the same
DT node that the platform device did. When the device is bound, we'll
create a device link between it and the parent power control device.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/pci/Kconfig         |  1 +
 drivers/pci/Makefile        |  1 +
 drivers/pci/pwrctl/Kconfig  |  8 ++++
 drivers/pci/pwrctl/Makefile |  3 ++
 drivers/pci/pwrctl/core.c   | 82 +++++++++++++++++++++++++++++++++++++
 include/linux/pci-pwrctl.h  | 24 +++++++++++
 6 files changed, 119 insertions(+)
 create mode 100644 drivers/pci/pwrctl/Kconfig
 create mode 100644 drivers/pci/pwrctl/Makefile
 create mode 100644 drivers/pci/pwrctl/core.c
 create mode 100644 include/linux/pci-pwrctl.h

Comments

Bjorn Andersson Feb. 2, 2024, 3:53 a.m. UTC | #1
On Thu, Feb 01, 2024 at 04:55:31PM +0100, Bartosz Golaszewski wrote:
> From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
> 
> Some PCI devices must be powered-on before they can be detected on the
> bus. Introduce a simple framework reusing the existing PCI OF
> infrastructure.
> 
> The way this works is: a DT node representing a PCI device connected to
> the port can be matched against its power control platform driver. If
> the match succeeds, the driver is responsible for powering-up the device
> and calling pcie_pwrctl_device_enable() which will trigger a PCI bus
> rescan as well as subscribe to PCI bus notifications.
> 
> When the device is detected and created, we'll make it consume the same
> DT node that the platform device did. When the device is bound, we'll
> create a device link between it and the parent power control device.
> 
> Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
> ---
>  drivers/pci/Kconfig         |  1 +
>  drivers/pci/Makefile        |  1 +
>  drivers/pci/pwrctl/Kconfig  |  8 ++++
>  drivers/pci/pwrctl/Makefile |  3 ++
>  drivers/pci/pwrctl/core.c   | 82 +++++++++++++++++++++++++++++++++++++
>  include/linux/pci-pwrctl.h  | 24 +++++++++++
>  6 files changed, 119 insertions(+)
>  create mode 100644 drivers/pci/pwrctl/Kconfig
>  create mode 100644 drivers/pci/pwrctl/Makefile
>  create mode 100644 drivers/pci/pwrctl/core.c
>  create mode 100644 include/linux/pci-pwrctl.h
> 
> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
> index 74147262625b..5b9b84f8774f 100644
> --- a/drivers/pci/Kconfig
> +++ b/drivers/pci/Kconfig
> @@ -291,5 +291,6 @@ source "drivers/pci/hotplug/Kconfig"
>  source "drivers/pci/controller/Kconfig"
>  source "drivers/pci/endpoint/Kconfig"
>  source "drivers/pci/switch/Kconfig"
> +source "drivers/pci/pwrctl/Kconfig"
>  
>  endif
> diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
> index cc8b4e01e29d..6ae202d950f8 100644
> --- a/drivers/pci/Makefile
> +++ b/drivers/pci/Makefile
> @@ -9,6 +9,7 @@ obj-$(CONFIG_PCI)		+= access.o bus.o probe.o host-bridge.o \
>  
>  obj-$(CONFIG_PCI)		+= msi/
>  obj-$(CONFIG_PCI)		+= pcie/
> +obj-$(CONFIG_PCI)		+= pwrctl/
>  
>  ifdef CONFIG_PCI
>  obj-$(CONFIG_PROC_FS)		+= proc.o
> diff --git a/drivers/pci/pwrctl/Kconfig b/drivers/pci/pwrctl/Kconfig
> new file mode 100644
> index 000000000000..e2dc5e5d2af1
> --- /dev/null
> +++ b/drivers/pci/pwrctl/Kconfig
> @@ -0,0 +1,8 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +menu "PCI Power control drivers"
> +
> +config PCI_PWRCTL
> +	bool
> +
> +endmenu
> diff --git a/drivers/pci/pwrctl/Makefile b/drivers/pci/pwrctl/Makefile
> new file mode 100644
> index 000000000000..4381cfbf3f21
> --- /dev/null
> +++ b/drivers/pci/pwrctl/Makefile
> @@ -0,0 +1,3 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +obj-$(CONFIG_PCI_PWRCTL)		+= core.o
> diff --git a/drivers/pci/pwrctl/core.c b/drivers/pci/pwrctl/core.c
> new file mode 100644
> index 000000000000..312e6fe95c31
> --- /dev/null
> +++ b/drivers/pci/pwrctl/core.c
> @@ -0,0 +1,82 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024 Linaro Ltd.
> + */
> +
> +#include <linux/device.h>
> +#include <linux/export.h>
> +#include <linux/kernel.h>
> +#include <linux/pci.h>
> +#include <linux/pci-pwrctl.h>
> +#include <linux/property.h>
> +#include <linux/slab.h>
> +
> +static int pci_pwrctl_notify(struct notifier_block *nb, unsigned long action,
> +			     void *data)
> +{
> +	struct pci_pwrctl *pwrctl = container_of(nb, struct pci_pwrctl, nb);
> +	struct device *dev = data;
> +
> +	if (dev_fwnode(dev) != dev_fwnode(pwrctl->dev))
> +		return NOTIFY_DONE;
> +
> +	switch (action) {
> +	case BUS_NOTIFY_ADD_DEVICE:
> +		device_set_of_node_from_dev(dev, pwrctl->dev);

What happens if the bootloader left the power on, and the
of_platform_populate() got probe deferred because the pwrseq wasn't
ready, so this happens after pci_set_of_node() has been called?

(I think dev->of_node will be put, then get and then node_reused
assigned...but I'm not entirely sure)

> +		break;
> +	case BUS_NOTIFY_BOUND_DRIVER:
> +		pwrctl->link = device_link_add(dev, pwrctl->dev,
> +					       DL_FLAG_AUTOREMOVE_CONSUMER);
> +		if (!pwrctl->link)
> +			dev_err(pwrctl->dev, "Failed to add device link\n");
> +		break;
> +	case BUS_NOTIFY_UNBOUND_DRIVER:
> +		device_link_del(pwrctl->link);

This might however become a NULL-pointer dereference, if dev was bound
to its driver before the pci_pwrctl_notify() was registered for the
pwrctl and then the PCI device is unbound.

This would also happen if device_link_add() failed when the PCI device
was bound...

> +		break;
> +	}
> +
> +	return NOTIFY_DONE;
> +}
> +
> +int pci_pwrctl_device_enable(struct pci_pwrctl *pwrctl)

This function doesn't really "enable the device", looking at the example
driver it's rather "device_enabled" than "device_enable"...

> +{
> +	if (!pwrctl->dev)
> +		return -ENODEV;
> +
> +	pwrctl->nb.notifier_call = pci_pwrctl_notify;
> +	bus_register_notifier(&pci_bus_type, &pwrctl->nb);
> +
> +	pci_lock_rescan_remove();
> +	pci_rescan_bus(to_pci_dev(pwrctl->dev->parent)->bus);
> +	pci_unlock_rescan_remove();
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(pci_pwrctl_device_enable);
> +
> +void pci_pwrctl_device_disable(struct pci_pwrctl *pwrctl)

Similarly this doesn't disable the device, the code calling this is
doing so...

Regards,
Bjorn

> +{
> +	bus_unregister_notifier(&pci_bus_type, &pwrctl->nb);
> +}
> +EXPORT_SYMBOL_GPL(pci_pwrctl_device_disable);
> +
> +static void devm_pci_pwrctl_device_disable(void *data)
> +{
> +	struct pci_pwrctl *pwrctl = data;
> +
> +	pci_pwrctl_device_disable(pwrctl);
> +}
> +
> +int devm_pci_pwrctl_device_enable(struct device *dev,
> +				  struct pci_pwrctl *pwrctl)
> +{
> +	int ret;
> +
> +	ret = pci_pwrctl_device_enable(pwrctl);
> +	if (ret)
> +		return ret;
> +
> +	return devm_add_action_or_reset(dev, devm_pci_pwrctl_device_disable,
> +					pwrctl);
> +}
> +EXPORT_SYMBOL_GPL(devm_pci_pwrctl_device_enable);
> diff --git a/include/linux/pci-pwrctl.h b/include/linux/pci-pwrctl.h
> new file mode 100644
> index 000000000000..8d16d27cbfeb
> --- /dev/null
> +++ b/include/linux/pci-pwrctl.h
> @@ -0,0 +1,24 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * Copyright (C) 2024 Linaro Ltd.
> + */
> +
> +#ifndef __PCI_PWRCTL_H__
> +#define __PCI_PWRCTL_H__
> +
> +#include <linux/notifier.h>
> +
> +struct device;
> +
> +struct pci_pwrctl {
> +	struct notifier_block nb;
> +	struct device *dev;
> +	struct device_link *link;
> +};
> +
> +int pci_pwrctl_device_enable(struct pci_pwrctl *pwrctl);
> +void pci_pwrctl_device_disable(struct pci_pwrctl *pwrctl);
> +int devm_pci_pwrctl_device_enable(struct device *dev,
> +				  struct pci_pwrctl *pwrctl);
> +
> +#endif /* __PCI_PWRCTL_H__ */
> -- 
> 2.40.1
>
Bartosz Golaszewski Feb. 2, 2024, 9:11 a.m. UTC | #2
On Fri, Feb 2, 2024 at 4:53 AM Bjorn Andersson <andersson@kernel.org> wrote:
>

[snip]

> > +
> > +static int pci_pwrctl_notify(struct notifier_block *nb, unsigned long action,
> > +                          void *data)
> > +{
> > +     struct pci_pwrctl *pwrctl = container_of(nb, struct pci_pwrctl, nb);
> > +     struct device *dev = data;
> > +
> > +     if (dev_fwnode(dev) != dev_fwnode(pwrctl->dev))
> > +             return NOTIFY_DONE;
> > +
> > +     switch (action) {
> > +     case BUS_NOTIFY_ADD_DEVICE:
> > +             device_set_of_node_from_dev(dev, pwrctl->dev);
>
> What happens if the bootloader left the power on, and the
> of_platform_populate() got probe deferred because the pwrseq wasn't
> ready, so this happens after pci_set_of_node() has been called?
>
> (I think dev->of_node will be put, then get and then node_reused
> assigned...but I'm not entirely sure)

That's exactly what will happen and the end result will be the same.

>
> > +             break;
> > +     case BUS_NOTIFY_BOUND_DRIVER:
> > +             pwrctl->link = device_link_add(dev, pwrctl->dev,
> > +                                            DL_FLAG_AUTOREMOVE_CONSUMER);
> > +             if (!pwrctl->link)
> > +                     dev_err(pwrctl->dev, "Failed to add device link\n");
> > +             break;
> > +     case BUS_NOTIFY_UNBOUND_DRIVER:
> > +             device_link_del(pwrctl->link);
>
> This might however become a NULL-pointer dereference, if dev was bound
> to its driver before the pci_pwrctl_notify() was registered for the
> pwrctl and then the PCI device is unbound.
>
> This would also happen if device_link_add() failed when the PCI device
> was bound...
>

Yes, I'll address it.

> > +             break;
> > +     }
> > +
> > +     return NOTIFY_DONE;
> > +}
> > +
> > +int pci_pwrctl_device_enable(struct pci_pwrctl *pwrctl)
>
> This function doesn't really "enable the device", looking at the example
> driver it's rather "device_enabled" than "device_enable"...
>

I was also thinking about pci_pwrctl_device_ready() or
pci_pwrctl_device_prepared().

Bart

[snip!]
Bjorn Andersson Feb. 2, 2024, 4:52 p.m. UTC | #3
On Fri, Feb 02, 2024 at 10:11:42AM +0100, Bartosz Golaszewski wrote:
> On Fri, Feb 2, 2024 at 4:53 AM Bjorn Andersson <andersson@kernel.org> wrote:
[..]
> > > +             break;
> > > +     }
> > > +
> > > +     return NOTIFY_DONE;
> > > +}
> > > +
> > > +int pci_pwrctl_device_enable(struct pci_pwrctl *pwrctl)
> >
> > This function doesn't really "enable the device", looking at the example
> > driver it's rather "device_enabled" than "device_enable"...
> >
> 
> I was also thinking about pci_pwrctl_device_ready() or
> pci_pwrctl_device_prepared().

I like both of these.

I guess the bigger question is how the flow would look like in the event
that we need to power-cycle the attached PCIe device, e.g. because
firmware has gotten into a really bad state.

Will we need an operation that removes the device first, and then cut
the power, or do we cut the power and then call unprepared()?

Regards,
Bjorn

> 
> Bart
> 
> [snip!]
Bartosz Golaszewski Feb. 7, 2024, 4:26 p.m. UTC | #4
On Fri, Feb 2, 2024 at 5:52 PM Bjorn Andersson <andersson@kernel.org> wrote:
>
> On Fri, Feb 02, 2024 at 10:11:42AM +0100, Bartosz Golaszewski wrote:
> > On Fri, Feb 2, 2024 at 4:53 AM Bjorn Andersson <andersson@kernel.org> wrote:
> [..]
> > > > +             break;
> > > > +     }
> > > > +
> > > > +     return NOTIFY_DONE;
> > > > +}
> > > > +
> > > > +int pci_pwrctl_device_enable(struct pci_pwrctl *pwrctl)
> > >
> > > This function doesn't really "enable the device", looking at the example
> > > driver it's rather "device_enabled" than "device_enable"...
> > >
> >
> > I was also thinking about pci_pwrctl_device_ready() or
> > pci_pwrctl_device_prepared().
>
> I like both of these.
>
> I guess the bigger question is how the flow would look like in the event
> that we need to power-cycle the attached PCIe device, e.g. because
> firmware has gotten into a really bad state.
>
> Will we need an operation that removes the device first, and then cut
> the power, or do we cut the power and then call unprepared()?
>

How would the core be notified about this power-cycle from the PCI
subsystem? I honestly don't know. Is there a notifier we could
subscribe to? Is the device unbound and rebound in such case?

Bart
Manivannan Sadhasivam Feb. 8, 2024, 11:32 a.m. UTC | #5
On Fri, Feb 02, 2024 at 10:52:11AM -0600, Bjorn Andersson wrote:
> On Fri, Feb 02, 2024 at 10:11:42AM +0100, Bartosz Golaszewski wrote:
> > On Fri, Feb 2, 2024 at 4:53 AM Bjorn Andersson <andersson@kernel.org> wrote:
> [..]
> > > > +             break;
> > > > +     }
> > > > +
> > > > +     return NOTIFY_DONE;
> > > > +}
> > > > +
> > > > +int pci_pwrctl_device_enable(struct pci_pwrctl *pwrctl)
> > >
> > > This function doesn't really "enable the device", looking at the example
> > > driver it's rather "device_enabled" than "device_enable"...
> > >
> > 
> > I was also thinking about pci_pwrctl_device_ready() or
> > pci_pwrctl_device_prepared().
> 
> I like both of these.
> 
> I guess the bigger question is how the flow would look like in the event
> that we need to power-cycle the attached PCIe device, e.g. because
> firmware has gotten into a really bad state.
> 
> Will we need an operation that removes the device first, and then cut
> the power, or do we cut the power and then call unprepared()?
> 

Currently, we don't power cycle while resetting the devices. Most of the drivers
just do a software reset using some register writes. Part of the reason for
that is, the drivers themselves don't control the power to the devices and there
would be no way to let the parent know about the firmware crash.

- Mani
Lukas Wunner Feb. 9, 2024, 9:04 a.m. UTC | #6
On Wed, Feb 07, 2024 at 05:26:16PM +0100, Bartosz Golaszewski wrote:
> On Fri, Feb 2, 2024 at 5:52???PM Bjorn Andersson <andersson@kernel.org> wrote:
> > On Fri, Feb 02, 2024 at 10:11:42AM +0100, Bartosz Golaszewski wrote:
> > > I was also thinking about pci_pwrctl_device_ready() or
> > > pci_pwrctl_device_prepared().
> >
> > I like both of these.
> >
> > I guess the bigger question is how the flow would look like in the event
> > that we need to power-cycle the attached PCIe device, e.g. because
> > firmware has gotten into a really bad state.
> >
> > Will we need an operation that removes the device first, and then cut
> > the power, or do we cut the power and then call unprepared()?
> 
> How would the core be notified about this power-cycle from the PCI
> subsystem? I honestly don't know. Is there a notifier we could
> subscribe to? Is the device unbound and rebound in such case?

To power-manage the PCI device for runtime PM (suspend to D3cold)
or system sleep, you need to amend:

  platform_pci_power_manageable()
  platform_pci_set_power_state()
  platform_pci_get_power_state()
  platform_pci_refresh_power_state()
  platform_pci_choose_state()

E.g. platform_pci_power_manageable() would check for presence of a
regulator in the DT and platform_pci_set_power_state() would disable
or enable the regulator.

To reset the device by power cycling it, amend pci_reset_fn_methods[]
to provide a reset method which disables and re-enables the regulator.
Then you can choose that reset method via sysfs and power-cycle the
device.  The PCI core will also automatically use that reset method
if there's nothing else available (e.g. if no Secondary Bus Reset
is available because the device has siblings or children, or if FLR
is not supported).

Thanks,

Lukas
Manivannan Sadhasivam Feb. 9, 2024, 9:38 a.m. UTC | #7
On Fri, Feb 09, 2024 at 10:04:33AM +0100, Lukas Wunner wrote:
> On Wed, Feb 07, 2024 at 05:26:16PM +0100, Bartosz Golaszewski wrote:
> > On Fri, Feb 2, 2024 at 5:52???PM Bjorn Andersson <andersson@kernel.org> wrote:
> > > On Fri, Feb 02, 2024 at 10:11:42AM +0100, Bartosz Golaszewski wrote:
> > > > I was also thinking about pci_pwrctl_device_ready() or
> > > > pci_pwrctl_device_prepared().
> > >
> > > I like both of these.
> > >
> > > I guess the bigger question is how the flow would look like in the event
> > > that we need to power-cycle the attached PCIe device, e.g. because
> > > firmware has gotten into a really bad state.
> > >
> > > Will we need an operation that removes the device first, and then cut
> > > the power, or do we cut the power and then call unprepared()?
> > 
> > How would the core be notified about this power-cycle from the PCI
> > subsystem? I honestly don't know. Is there a notifier we could
> > subscribe to? Is the device unbound and rebound in such case?
> 
> To power-manage the PCI device for runtime PM (suspend to D3cold)
> or system sleep, you need to amend:
> 
>   platform_pci_power_manageable()
>   platform_pci_set_power_state()
>   platform_pci_get_power_state()
>   platform_pci_refresh_power_state()
>   platform_pci_choose_state()
> 
> E.g. platform_pci_power_manageable() would check for presence of a
> regulator in the DT and platform_pci_set_power_state() would disable
> or enable the regulator.
> 

This will work if the sole control of the resources lies in these platform_*()
APIs. But in reality, the controller drivers are the ones controlling the power
supply to the devices  and with this series, the control would be shifted partly
to pwrctl driver.

I think what we need is to call in the callbacks of the drivers in a hierarchial
order.

- Mani

> To reset the device by power cycling it, amend pci_reset_fn_methods[]
> to provide a reset method which disables and re-enables the regulator.
> Then you can choose that reset method via sysfs and power-cycle the
> device.  The PCI core will also automatically use that reset method
> if there's nothing else available (e.g. if no Secondary Bus Reset
> is available because the device has siblings or children, or if FLR
> is not supported).
> 
> Thanks,
> 
> Lukas
Bjorn Andersson Feb. 9, 2024, 11:43 p.m. UTC | #8
On Thu, Feb 08, 2024 at 05:02:01PM +0530, Manivannan Sadhasivam wrote:
> On Fri, Feb 02, 2024 at 10:52:11AM -0600, Bjorn Andersson wrote:
> > On Fri, Feb 02, 2024 at 10:11:42AM +0100, Bartosz Golaszewski wrote:
> > > On Fri, Feb 2, 2024 at 4:53 AM Bjorn Andersson <andersson@kernel.org> wrote:
> > [..]
> > > > > +             break;
> > > > > +     }
> > > > > +
> > > > > +     return NOTIFY_DONE;
> > > > > +}
> > > > > +
> > > > > +int pci_pwrctl_device_enable(struct pci_pwrctl *pwrctl)
> > > >
> > > > This function doesn't really "enable the device", looking at the example
> > > > driver it's rather "device_enabled" than "device_enable"...
> > > >
> > > 
> > > I was also thinking about pci_pwrctl_device_ready() or
> > > pci_pwrctl_device_prepared().
> > 
> > I like both of these.
> > 
> > I guess the bigger question is how the flow would look like in the event
> > that we need to power-cycle the attached PCIe device, e.g. because
> > firmware has gotten into a really bad state.
> > 
> > Will we need an operation that removes the device first, and then cut
> > the power, or do we cut the power and then call unprepared()?
> > 
> 
> Currently, we don't power cycle while resetting the devices. Most of the drivers
> just do a software reset using some register writes. Part of the reason for
> that is, the drivers themselves don't control the power to the devices and there
> would be no way to let the parent know about the firmware crash.
> 

I don't know what the appropriate design for this is, but we do have a
need for being able to recover from this state by the means of
power-cycling the device.

If it's not possible to let the device do it (in any fashion), then
perhaps a user-space-assisted model is needed?

Turning on power is an important first step, but please do consider the
full scope of the known problem space.

Regards,
Bjorn
Manivannan Sadhasivam Feb. 14, 2024, 2:28 p.m. UTC | #9
On Fri, Feb 09, 2024 at 05:43:56PM -0600, Bjorn Andersson wrote:
> On Thu, Feb 08, 2024 at 05:02:01PM +0530, Manivannan Sadhasivam wrote:
> > On Fri, Feb 02, 2024 at 10:52:11AM -0600, Bjorn Andersson wrote:
> > > On Fri, Feb 02, 2024 at 10:11:42AM +0100, Bartosz Golaszewski wrote:
> > > > On Fri, Feb 2, 2024 at 4:53 AM Bjorn Andersson <andersson@kernel.org> wrote:
> > > [..]
> > > > > > +             break;
> > > > > > +     }
> > > > > > +
> > > > > > +     return NOTIFY_DONE;
> > > > > > +}
> > > > > > +
> > > > > > +int pci_pwrctl_device_enable(struct pci_pwrctl *pwrctl)
> > > > >
> > > > > This function doesn't really "enable the device", looking at the example
> > > > > driver it's rather "device_enabled" than "device_enable"...
> > > > >
> > > > 
> > > > I was also thinking about pci_pwrctl_device_ready() or
> > > > pci_pwrctl_device_prepared().
> > > 
> > > I like both of these.
> > > 
> > > I guess the bigger question is how the flow would look like in the event
> > > that we need to power-cycle the attached PCIe device, e.g. because
> > > firmware has gotten into a really bad state.
> > > 
> > > Will we need an operation that removes the device first, and then cut
> > > the power, or do we cut the power and then call unprepared()?
> > > 
> > 
> > Currently, we don't power cycle while resetting the devices. Most of the drivers
> > just do a software reset using some register writes. Part of the reason for
> > that is, the drivers themselves don't control the power to the devices and there
> > would be no way to let the parent know about the firmware crash.
> > 
> 
> I don't know what the appropriate design for this is, but we do have a
> need for being able to recover from this state by the means of
> power-cycling the device.
> 
> If it's not possible to let the device do it (in any fashion), then
> perhaps a user-space-assisted model is needed?
> 
> Turning on power is an important first step, but please do consider the
> full scope of the known problem space.
> 

Agree. I'm not ignoring this issue, but this is a separate topic IMO (or an
incremental change). Because, power cycling the device in the event of a
firmware crash or even upon receiving AER Fatal errors is valid for platforms
not making use of this driver and an existing issue.

For sure we can accomodate that functionality in this series itself, but that's
going to drag this series to many releases (you already know how long it took
for us to get to the current state). Instead, I'd recommend to merge it in its
current form and have Bartosz or someone work on incremental features such as:

1. Runtime/System PM
2. Resetting the device in the event of fw crash etc...

Wdyt?

- Mani
Bartosz Golaszewski Feb. 14, 2024, 3:46 p.m. UTC | #10
On Fri, Feb 2, 2024 at 4:53 AM Bjorn Andersson <andersson@kernel.org> wrote:
>

[snip]

>
> > +             break;
> > +     case BUS_NOTIFY_BOUND_DRIVER:
> > +             pwrctl->link = device_link_add(dev, pwrctl->dev,
> > +                                            DL_FLAG_AUTOREMOVE_CONSUMER);
> > +             if (!pwrctl->link)
> > +                     dev_err(pwrctl->dev, "Failed to add device link\n");
> > +             break;
> > +     case BUS_NOTIFY_UNBOUND_DRIVER:
> > +             device_link_del(pwrctl->link);
>
> This might however become a NULL-pointer dereference, if dev was bound
> to its driver before the pci_pwrctl_notify() was registered for the
> pwrctl and then the PCI device is unbound.
>

Right. And it also makes me think that right after registering with
the notifier, we should iterate over the PCI bus devices and see if
the relevant one is already there.

> This would also happen if device_link_add() failed when the PCI device
> was bound...
>

Right.

Bart

[snip]
diff mbox series

Patch

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 74147262625b..5b9b84f8774f 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -291,5 +291,6 @@  source "drivers/pci/hotplug/Kconfig"
 source "drivers/pci/controller/Kconfig"
 source "drivers/pci/endpoint/Kconfig"
 source "drivers/pci/switch/Kconfig"
+source "drivers/pci/pwrctl/Kconfig"
 
 endif
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index cc8b4e01e29d..6ae202d950f8 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -9,6 +9,7 @@  obj-$(CONFIG_PCI)		+= access.o bus.o probe.o host-bridge.o \
 
 obj-$(CONFIG_PCI)		+= msi/
 obj-$(CONFIG_PCI)		+= pcie/
+obj-$(CONFIG_PCI)		+= pwrctl/
 
 ifdef CONFIG_PCI
 obj-$(CONFIG_PROC_FS)		+= proc.o
diff --git a/drivers/pci/pwrctl/Kconfig b/drivers/pci/pwrctl/Kconfig
new file mode 100644
index 000000000000..e2dc5e5d2af1
--- /dev/null
+++ b/drivers/pci/pwrctl/Kconfig
@@ -0,0 +1,8 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+menu "PCI Power control drivers"
+
+config PCI_PWRCTL
+	bool
+
+endmenu
diff --git a/drivers/pci/pwrctl/Makefile b/drivers/pci/pwrctl/Makefile
new file mode 100644
index 000000000000..4381cfbf3f21
--- /dev/null
+++ b/drivers/pci/pwrctl/Makefile
@@ -0,0 +1,3 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_PCI_PWRCTL)		+= core.o
diff --git a/drivers/pci/pwrctl/core.c b/drivers/pci/pwrctl/core.c
new file mode 100644
index 000000000000..312e6fe95c31
--- /dev/null
+++ b/drivers/pci/pwrctl/core.c
@@ -0,0 +1,82 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2024 Linaro Ltd.
+ */
+
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci-pwrctl.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+
+static int pci_pwrctl_notify(struct notifier_block *nb, unsigned long action,
+			     void *data)
+{
+	struct pci_pwrctl *pwrctl = container_of(nb, struct pci_pwrctl, nb);
+	struct device *dev = data;
+
+	if (dev_fwnode(dev) != dev_fwnode(pwrctl->dev))
+		return NOTIFY_DONE;
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		device_set_of_node_from_dev(dev, pwrctl->dev);
+		break;
+	case BUS_NOTIFY_BOUND_DRIVER:
+		pwrctl->link = device_link_add(dev, pwrctl->dev,
+					       DL_FLAG_AUTOREMOVE_CONSUMER);
+		if (!pwrctl->link)
+			dev_err(pwrctl->dev, "Failed to add device link\n");
+		break;
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+		device_link_del(pwrctl->link);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+int pci_pwrctl_device_enable(struct pci_pwrctl *pwrctl)
+{
+	if (!pwrctl->dev)
+		return -ENODEV;
+
+	pwrctl->nb.notifier_call = pci_pwrctl_notify;
+	bus_register_notifier(&pci_bus_type, &pwrctl->nb);
+
+	pci_lock_rescan_remove();
+	pci_rescan_bus(to_pci_dev(pwrctl->dev->parent)->bus);
+	pci_unlock_rescan_remove();
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_pwrctl_device_enable);
+
+void pci_pwrctl_device_disable(struct pci_pwrctl *pwrctl)
+{
+	bus_unregister_notifier(&pci_bus_type, &pwrctl->nb);
+}
+EXPORT_SYMBOL_GPL(pci_pwrctl_device_disable);
+
+static void devm_pci_pwrctl_device_disable(void *data)
+{
+	struct pci_pwrctl *pwrctl = data;
+
+	pci_pwrctl_device_disable(pwrctl);
+}
+
+int devm_pci_pwrctl_device_enable(struct device *dev,
+				  struct pci_pwrctl *pwrctl)
+{
+	int ret;
+
+	ret = pci_pwrctl_device_enable(pwrctl);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(dev, devm_pci_pwrctl_device_disable,
+					pwrctl);
+}
+EXPORT_SYMBOL_GPL(devm_pci_pwrctl_device_enable);
diff --git a/include/linux/pci-pwrctl.h b/include/linux/pci-pwrctl.h
new file mode 100644
index 000000000000..8d16d27cbfeb
--- /dev/null
+++ b/include/linux/pci-pwrctl.h
@@ -0,0 +1,24 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2024 Linaro Ltd.
+ */
+
+#ifndef __PCI_PWRCTL_H__
+#define __PCI_PWRCTL_H__
+
+#include <linux/notifier.h>
+
+struct device;
+
+struct pci_pwrctl {
+	struct notifier_block nb;
+	struct device *dev;
+	struct device_link *link;
+};
+
+int pci_pwrctl_device_enable(struct pci_pwrctl *pwrctl);
+void pci_pwrctl_device_disable(struct pci_pwrctl *pwrctl);
+int devm_pci_pwrctl_device_enable(struct device *dev,
+				  struct pci_pwrctl *pwrctl);
+
+#endif /* __PCI_PWRCTL_H__ */