diff mbox series

PCI: pciehp: Avoid returning prematurely from sysfs requests

Message ID 4174210466e27eb7e2243dd1d801d5f75baaffd8.1565345211.git.lukas@wunner.de
State Accepted
Delegated to: Bjorn Helgaas
Headers show
Series PCI: pciehp: Avoid returning prematurely from sysfs requests | expand

Commit Message

Lukas Wunner Aug. 9, 2019, 10:28 a.m. UTC
A sysfs request to enable or disable a PCIe hotplug slot should not
return before it has been carried out.  That is sought to be achieved
by waiting until the controller's "pending_events" have been cleared.

However the IRQ thread pciehp_ist() clears the "pending_events" before
it acts on them.  If pciehp_sysfs_enable_slot() / _disable_slot() happen
to check the "pending_events" after they have been cleared but while
pciehp_ist() is still running, the functions may return prematurely
with an incorrect return value.

Fix by introducing an "ist_running" flag which must be false before a
sysfs request is allowed to return.

Fixes: 32a8cef274fe ("PCI: pciehp: Enable/disable exclusively from IRQ thread")
Link: https://lore.kernel.org/linux-pci/1562226638-54134-1-git-send-email-wangxiongfeng2@huawei.com
Reported-and-tested-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: stable@vger.kernel.org # v4.19+
---
 drivers/pci/hotplug/pciehp.h      | 2 ++
 drivers/pci/hotplug/pciehp_ctrl.c | 6 ++++--
 drivers/pci/hotplug/pciehp_hpc.c  | 2 ++
 3 files changed, 8 insertions(+), 2 deletions(-)

Comments

Kuppuswamy Sathyanarayanan Aug. 9, 2019, 5:28 p.m. UTC | #1
Hi,

On 8/9/19 3:28 AM, Lukas Wunner wrote:
> A sysfs request to enable or disable a PCIe hotplug slot should not
> return before it has been carried out.  That is sought to be achieved
> by waiting until the controller's "pending_events" have been cleared.
>
> However the IRQ thread pciehp_ist() clears the "pending_events" before
> it acts on them.  If pciehp_sysfs_enable_slot() / _disable_slot() happen
> to check the "pending_events" after they have been cleared but while
> pciehp_ist() is still running, the functions may return prematurely
> with an incorrect return value.
Can this be fixed by changing the sequence of clearing the 
pending_events in pciehp_ist() ?
>
> Fix by introducing an "ist_running" flag which must be false before a
> sysfs request is allowed to return.
>
> Fixes: 32a8cef274fe ("PCI: pciehp: Enable/disable exclusively from IRQ thread")
> Link: https://lore.kernel.org/linux-pci/1562226638-54134-1-git-send-email-wangxiongfeng2@huawei.com
> Reported-and-tested-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
> Signed-off-by: Lukas Wunner <lukas@wunner.de>
> Cc: stable@vger.kernel.org # v4.19+
> ---
>   drivers/pci/hotplug/pciehp.h      | 2 ++
>   drivers/pci/hotplug/pciehp_ctrl.c | 6 ++++--
>   drivers/pci/hotplug/pciehp_hpc.c  | 2 ++
>   3 files changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
> index 8c51a04b8083..e316bde45c7b 100644
> --- a/drivers/pci/hotplug/pciehp.h
> +++ b/drivers/pci/hotplug/pciehp.h
> @@ -72,6 +72,7 @@ extern int pciehp_poll_time;
>    * @reset_lock: prevents access to the Data Link Layer Link Active bit in the
>    *	Link Status register and to the Presence Detect State bit in the Slot
>    *	Status register during a slot reset which may cause them to flap
> + * @ist_running: flag to keep user request waiting while IRQ thread is running
>    * @request_result: result of last user request submitted to the IRQ thread
>    * @requester: wait queue to wake up on completion of user request,
>    *	used for synchronous slot enable/disable request via sysfs
> @@ -101,6 +102,7 @@ struct controller {
>   
>   	struct hotplug_slot hotplug_slot;	/* hotplug core interface */
>   	struct rw_semaphore reset_lock;
> +	unsigned int ist_running;
>   	int request_result;
>   	wait_queue_head_t requester;
>   };
> diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
> index 631ced0ab28a..1ce9ce335291 100644
> --- a/drivers/pci/hotplug/pciehp_ctrl.c
> +++ b/drivers/pci/hotplug/pciehp_ctrl.c
> @@ -368,7 +368,8 @@ int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
>   		ctrl->request_result = -ENODEV;
>   		pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
>   		wait_event(ctrl->requester,
> -			   !atomic_read(&ctrl->pending_events));
> +			   !atomic_read(&ctrl->pending_events) &&
> +			   !ctrl->ist_running);
>   		return ctrl->request_result;
>   	case POWERON_STATE:
>   		ctrl_info(ctrl, "Slot(%s): Already in powering on state\n",
> @@ -401,7 +402,8 @@ int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot)
>   		mutex_unlock(&ctrl->state_lock);
>   		pciehp_request(ctrl, DISABLE_SLOT);
>   		wait_event(ctrl->requester,
> -			   !atomic_read(&ctrl->pending_events));
> +			   !atomic_read(&ctrl->pending_events) &&
> +			   !ctrl->ist_running);
>   		return ctrl->request_result;
>   	case POWEROFF_STATE:
>   		ctrl_info(ctrl, "Slot(%s): Already in powering off state\n",
> diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
> index bd990e3371e3..9e2d7688e8cc 100644
> --- a/drivers/pci/hotplug/pciehp_hpc.c
> +++ b/drivers/pci/hotplug/pciehp_hpc.c
> @@ -608,6 +608,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
>   	irqreturn_t ret;
>   	u32 events;
>   
> +	ctrl->ist_running = true;
>   	pci_config_pm_runtime_get(pdev);
>   
>   	/* rerun pciehp_isr() if the port was inaccessible on interrupt */
> @@ -654,6 +655,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
>   	up_read(&ctrl->reset_lock);
>   
>   	pci_config_pm_runtime_put(pdev);
> +	ctrl->ist_running = false;
>   	wake_up(&ctrl->requester);
>   	return IRQ_HANDLED;
>   }
Lukas Wunner Aug. 9, 2019, 6:28 p.m. UTC | #2
On Fri, Aug 09, 2019 at 10:28:15AM -0700, sathyanarayanan kuppuswamy wrote:
> On 8/9/19 3:28 AM, Lukas Wunner wrote:
> > A sysfs request to enable or disable a PCIe hotplug slot should not
> > return before it has been carried out.  That is sought to be achieved
> > by waiting until the controller's "pending_events" have been cleared.
> > 
> > However the IRQ thread pciehp_ist() clears the "pending_events" before
> > it acts on them.  If pciehp_sysfs_enable_slot() / _disable_slot() happen
> > to check the "pending_events" after they have been cleared but while
> > pciehp_ist() is still running, the functions may return prematurely
> > with an incorrect return value.
> 
> Can this be fixed by changing the sequence of clearing the pending_events in
> pciehp_ist() ?

It can't.  The processing logic is such that pciehp_ist() atomically
removes bits from pending_events and acts upon them.  Simultaneously, new
events may be queued up by adding bits to pending_events (through a
hardirq handled by pciehp_isr(), through a sysfs request, etc).
Those will be handled in an additional iteration of pciehp_ist().

If I'd delay removing bits from pending_events, I then couldn't tell if
new events have accumulated while others have been processed.
E.g. a PDS event may occur while another one is being processed.
The second PDS events may signify a card removal immediately after
the card has been brought up.  It's crucial not to lose the second PDS
event but act properly on it by bringing the slot down again.

This way of processing events also allows me to easily filter events.
E.g. we tolerate link flaps occurring during the first 100 ms after
enabling the slot simply by atomically removing bits from pending_events
at a certain point.  See commit 6c35a1ac3da6 ("PCI: pciehp: Tolerate
initially unstable link").

Now what I *could* do would be to make the events currently being
processed public, e.g. by adding an "atomic_t current_events" to
struct controller.  Then I could wait in pciehp_sysfs_enable_slot() /
_disable_slot() until both "pending_events" and "current_events"
becomes empty.  But it would basically amount to the same as this patch,
and we don't really need to know *which* events are being processed,
only the *fact* that events are being processed.

Let me know if you have further questions regarding the pciehp
processing logic.

Thanks,

Lukas
Keith Busch Aug. 9, 2019, 7:32 p.m. UTC | #3
On Fri, Aug 09, 2019 at 12:28:43PM +0200, Lukas Wunner wrote:
> A sysfs request to enable or disable a PCIe hotplug slot should not
> return before it has been carried out.  That is sought to be achieved
> by waiting until the controller's "pending_events" have been cleared.
> 
> However the IRQ thread pciehp_ist() clears the "pending_events" before
> it acts on them.  If pciehp_sysfs_enable_slot() / _disable_slot() happen
> to check the "pending_events" after they have been cleared but while
> pciehp_ist() is still running, the functions may return prematurely
> with an incorrect return value.
> 
> Fix by introducing an "ist_running" flag which must be false before a
> sysfs request is allowed to return.

Can you instead just call synchronize_irq(ctrl->pcie->irq) after the
pending events is cleared?
Lukas Wunner Aug. 9, 2019, 8:28 p.m. UTC | #4
On Fri, Aug 09, 2019 at 01:32:16PM -0600, Keith Busch wrote:
> On Fri, Aug 09, 2019 at 12:28:43PM +0200, Lukas Wunner wrote:
> > A sysfs request to enable or disable a PCIe hotplug slot should not
> > return before it has been carried out.  That is sought to be achieved
> > by waiting until the controller's "pending_events" have been cleared.
> > 
> > However the IRQ thread pciehp_ist() clears the "pending_events" before
> > it acts on them.  If pciehp_sysfs_enable_slot() / _disable_slot() happen
> > to check the "pending_events" after they have been cleared but while
> > pciehp_ist() is still running, the functions may return prematurely
> > with an incorrect return value.
> > 
> > Fix by introducing an "ist_running" flag which must be false before a
> > sysfs request is allowed to return.
> 
> Can you instead just call synchronize_irq(ctrl->pcie->irq) after the
> pending events is cleared?

You mean call synchronize_irq() from pciehp_sysfs_enable_slot() /
disable_slot()?  That's a good idea, let me think that through and
try to make it work that way.

Thanks!

Lukas
Lukas Wunner Aug. 10, 2019, 10:12 a.m. UTC | #5
On Fri, Aug 09, 2019 at 10:28:15PM +0200, Lukas Wunner wrote:
> On Fri, Aug 09, 2019 at 01:32:16PM -0600, Keith Busch wrote:
> > On Fri, Aug 09, 2019 at 12:28:43PM +0200, Lukas Wunner wrote:
> > > A sysfs request to enable or disable a PCIe hotplug slot should not
> > > return before it has been carried out.  That is sought to be achieved
> > > by waiting until the controller's "pending_events" have been cleared.
> > > 
> > > However the IRQ thread pciehp_ist() clears the "pending_events" before
> > > it acts on them.  If pciehp_sysfs_enable_slot() / _disable_slot() happen
> > > to check the "pending_events" after they have been cleared but while
> > > pciehp_ist() is still running, the functions may return prematurely
> > > with an incorrect return value.
> > > 
> > > Fix by introducing an "ist_running" flag which must be false before a
> > > sysfs request is allowed to return.
> > 
> > Can you instead just call synchronize_irq(ctrl->pcie->irq) after the
> > pending events is cleared?
> 
> You mean call synchronize_irq() from pciehp_sysfs_enable_slot() /
> disable_slot()?  That's a good idea, let me think that through and
> try to make it work that way.

After a bit of thinking:

synchronize_irq() doesn't work for poll mode.

A secondary concern is that if the IRQ is shared, synchronize_irq()
waits for all the other (unrelated) IRQ threads to finish,
i.e. longer than necessary.

So I can't think of a better way to solve this.

Thanks,

Lukas
Bjorn Helgaas Oct. 4, 2019, 8:41 p.m. UTC | #6
On Fri, Aug 09, 2019 at 12:28:43PM +0200, Lukas Wunner wrote:
> A sysfs request to enable or disable a PCIe hotplug slot should not
> return before it has been carried out.  That is sought to be achieved
> by waiting until the controller's "pending_events" have been cleared.
> 
> However the IRQ thread pciehp_ist() clears the "pending_events" before
> it acts on them.  If pciehp_sysfs_enable_slot() / _disable_slot() happen
> to check the "pending_events" after they have been cleared but while
> pciehp_ist() is still running, the functions may return prematurely
> with an incorrect return value.
> 
> Fix by introducing an "ist_running" flag which must be false before a
> sysfs request is allowed to return.
> 
> Fixes: 32a8cef274fe ("PCI: pciehp: Enable/disable exclusively from IRQ thread")
> Link: https://lore.kernel.org/linux-pci/1562226638-54134-1-git-send-email-wangxiongfeng2@huawei.com
> Reported-and-tested-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
> Signed-off-by: Lukas Wunner <lukas@wunner.de>
> Cc: stable@vger.kernel.org # v4.19+

Applied to pci/hotplug for v5.5, thanks!

> ---
>  drivers/pci/hotplug/pciehp.h      | 2 ++
>  drivers/pci/hotplug/pciehp_ctrl.c | 6 ++++--
>  drivers/pci/hotplug/pciehp_hpc.c  | 2 ++
>  3 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
> index 8c51a04b8083..e316bde45c7b 100644
> --- a/drivers/pci/hotplug/pciehp.h
> +++ b/drivers/pci/hotplug/pciehp.h
> @@ -72,6 +72,7 @@ extern int pciehp_poll_time;
>   * @reset_lock: prevents access to the Data Link Layer Link Active bit in the
>   *	Link Status register and to the Presence Detect State bit in the Slot
>   *	Status register during a slot reset which may cause them to flap
> + * @ist_running: flag to keep user request waiting while IRQ thread is running
>   * @request_result: result of last user request submitted to the IRQ thread
>   * @requester: wait queue to wake up on completion of user request,
>   *	used for synchronous slot enable/disable request via sysfs
> @@ -101,6 +102,7 @@ struct controller {
>  
>  	struct hotplug_slot hotplug_slot;	/* hotplug core interface */
>  	struct rw_semaphore reset_lock;
> +	unsigned int ist_running;
>  	int request_result;
>  	wait_queue_head_t requester;
>  };
> diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
> index 631ced0ab28a..1ce9ce335291 100644
> --- a/drivers/pci/hotplug/pciehp_ctrl.c
> +++ b/drivers/pci/hotplug/pciehp_ctrl.c
> @@ -368,7 +368,8 @@ int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
>  		ctrl->request_result = -ENODEV;
>  		pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
>  		wait_event(ctrl->requester,
> -			   !atomic_read(&ctrl->pending_events));
> +			   !atomic_read(&ctrl->pending_events) &&
> +			   !ctrl->ist_running);
>  		return ctrl->request_result;
>  	case POWERON_STATE:
>  		ctrl_info(ctrl, "Slot(%s): Already in powering on state\n",
> @@ -401,7 +402,8 @@ int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot)
>  		mutex_unlock(&ctrl->state_lock);
>  		pciehp_request(ctrl, DISABLE_SLOT);
>  		wait_event(ctrl->requester,
> -			   !atomic_read(&ctrl->pending_events));
> +			   !atomic_read(&ctrl->pending_events) &&
> +			   !ctrl->ist_running);
>  		return ctrl->request_result;
>  	case POWEROFF_STATE:
>  		ctrl_info(ctrl, "Slot(%s): Already in powering off state\n",
> diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
> index bd990e3371e3..9e2d7688e8cc 100644
> --- a/drivers/pci/hotplug/pciehp_hpc.c
> +++ b/drivers/pci/hotplug/pciehp_hpc.c
> @@ -608,6 +608,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
>  	irqreturn_t ret;
>  	u32 events;
>  
> +	ctrl->ist_running = true;
>  	pci_config_pm_runtime_get(pdev);
>  
>  	/* rerun pciehp_isr() if the port was inaccessible on interrupt */
> @@ -654,6 +655,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
>  	up_read(&ctrl->reset_lock);
>  
>  	pci_config_pm_runtime_put(pdev);
> +	ctrl->ist_running = false;
>  	wake_up(&ctrl->requester);
>  	return IRQ_HANDLED;
>  }
> -- 
> 2.20.1
>
diff mbox series

Patch

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 8c51a04b8083..e316bde45c7b 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -72,6 +72,7 @@  extern int pciehp_poll_time;
  * @reset_lock: prevents access to the Data Link Layer Link Active bit in the
  *	Link Status register and to the Presence Detect State bit in the Slot
  *	Status register during a slot reset which may cause them to flap
+ * @ist_running: flag to keep user request waiting while IRQ thread is running
  * @request_result: result of last user request submitted to the IRQ thread
  * @requester: wait queue to wake up on completion of user request,
  *	used for synchronous slot enable/disable request via sysfs
@@ -101,6 +102,7 @@  struct controller {
 
 	struct hotplug_slot hotplug_slot;	/* hotplug core interface */
 	struct rw_semaphore reset_lock;
+	unsigned int ist_running;
 	int request_result;
 	wait_queue_head_t requester;
 };
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 631ced0ab28a..1ce9ce335291 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -368,7 +368,8 @@  int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
 		ctrl->request_result = -ENODEV;
 		pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
 		wait_event(ctrl->requester,
-			   !atomic_read(&ctrl->pending_events));
+			   !atomic_read(&ctrl->pending_events) &&
+			   !ctrl->ist_running);
 		return ctrl->request_result;
 	case POWERON_STATE:
 		ctrl_info(ctrl, "Slot(%s): Already in powering on state\n",
@@ -401,7 +402,8 @@  int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot)
 		mutex_unlock(&ctrl->state_lock);
 		pciehp_request(ctrl, DISABLE_SLOT);
 		wait_event(ctrl->requester,
-			   !atomic_read(&ctrl->pending_events));
+			   !atomic_read(&ctrl->pending_events) &&
+			   !ctrl->ist_running);
 		return ctrl->request_result;
 	case POWEROFF_STATE:
 		ctrl_info(ctrl, "Slot(%s): Already in powering off state\n",
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index bd990e3371e3..9e2d7688e8cc 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -608,6 +608,7 @@  static irqreturn_t pciehp_ist(int irq, void *dev_id)
 	irqreturn_t ret;
 	u32 events;
 
+	ctrl->ist_running = true;
 	pci_config_pm_runtime_get(pdev);
 
 	/* rerun pciehp_isr() if the port was inaccessible on interrupt */
@@ -654,6 +655,7 @@  static irqreturn_t pciehp_ist(int irq, void *dev_id)
 	up_read(&ctrl->reset_lock);
 
 	pci_config_pm_runtime_put(pdev);
+	ctrl->ist_running = false;
 	wake_up(&ctrl->requester);
 	return IRQ_HANDLED;
 }