diff mbox series

[02/16] core/pci: Add missing lock in set_power_timer

Message ID 20190909123151.21944-3-fbarrat@linux.ibm.com
State Superseded
Headers show
Series opencapi: enable card reset and link retraining | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch master (470ffb5f29d741c3bed600f7bb7bf0cbb270e05a)
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot success Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco success Signed-off-by present

Commit Message

Frederic Barrat Sept. 9, 2019, 12:31 p.m. UTC
set_power_timer() was not using any lock, though it alters the slot
state and devices found under it. So lock the PHB under which the slot
is found to avoid concurrent operations.

Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
---
 core/pci-opal.c | 3 +++
 1 file changed, 3 insertions(+)

Comments

Christophe Lombard Sept. 17, 2019, 8:20 a.m. UTC | #1
On 09/09/2019 14:31, Frederic Barrat wrote:
> set_power_timer() was not using any lock, though it alters the slot
> state and devices found under it. So lock the PHB under which the slot
> is found to avoid concurrent operations.
> 
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> ---

It seems logical

Reviewed-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
Andrew Donnellan Sept. 24, 2019, 12:18 p.m. UTC | #2
On 9/9/19 2:31 pm, Frederic Barrat wrote:
> set_power_timer() was not using any lock, though it alters the slot
> state and devices found under it. So lock the PHB under which the slot
> is found to avoid concurrent operations.
> 
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>

This seems sensible

Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>

> ---
>   core/pci-opal.c | 3 +++
>   1 file changed, 3 insertions(+)
> 
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index d5209600..175810d0 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -674,7 +674,9 @@ static void set_power_timer(struct timer *t __unused, void *data,
>   	struct pci_device *pd = slot->pd;
>   	struct dt_node *dn = pd->dn;
>   	uint8_t link;
> +	struct phb *phb = slot->phb;
>   
> +	phb_lock(phb);
>   	switch (slot->state) {
>   	case PCI_SLOT_STATE_SPOWER_START:
>   		if (slot->retries-- == 0) {
> @@ -720,6 +722,7 @@ static void set_power_timer(struct timer *t __unused, void *data,
>   		prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
>   		      slot->id, slot->state);
>   	}
> +	phb_unlock(phb);
>   }
>   
>   static int64_t opal_pci_set_power_state(uint64_t async_token,
>
Oliver O'Halloran Oct. 1, 2019, 6:19 a.m. UTC | #3
On Mon, 2019-09-09 at 14:31 +0200, Frederic Barrat wrote:
> set_power_timer() was not using any lock, though it alters the slot
> state and devices found under it. So lock the PHB under which the slot
> is found to avoid concurrent operations.

This is probably fine, but I was always worried there was potential for
deadlocks if we took it here. 

> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> ---
>  core/pci-opal.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index d5209600..175810d0 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -674,7 +674,9 @@ static void set_power_timer(struct timer *t __unused, void *data,
>  	struct pci_device *pd = slot->pd;
>  	struct dt_node *dn = pd->dn;
>  	uint8_t link;
> +	struct phb *phb = slot->phb;
>  
> +	phb_lock(phb);
>  	switch (slot->state) {
>  	case PCI_SLOT_STATE_SPOWER_START:
>  		if (slot->retries-- == 0) {
> @@ -720,6 +722,7 @@ static void set_power_timer(struct timer *t __unused, void *data,
>  		prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
>  		      slot->id, slot->state);
>  	}
> +	phb_unlock(phb);
>  }
>  
>  static int64_t opal_pci_set_power_state(uint64_t async_token,
Frederic Barrat Oct. 9, 2019, 9 a.m. UTC | #4
Le 01/10/2019 à 08:19, Oliver O'Halloran a écrit :
> On Mon, 2019-09-09 at 14:31 +0200, Frederic Barrat wrote:
>> set_power_timer() was not using any lock, though it alters the slot
>> state and devices found under it. So lock the PHB under which the slot
>> is found to avoid concurrent operations.
> 
> This is probably fine, but I was always worried there was potential for
> deadlocks if we took it here.


Mmmm... I'm afraid I can't rule it out completely: somebody calling 
check_timers() while holding that same phb lock. Something else would 
need to be requested at the same time as the power off/on of the slot, 
but it's theoretically possible.
I think there's another possible scenario if we have a PCI topology with 
2 slots under the same PHB. In which case, 2 threads could be fighting 
for the lock with perfectly valid reasons. One would just need to wait.
I'll replace the lock() in the timer function with a try_lock() and 
yield if necessary.

   Fred


>> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
>> ---
>>   core/pci-opal.c | 3 +++
>>   1 file changed, 3 insertions(+)
>>
>> diff --git a/core/pci-opal.c b/core/pci-opal.c
>> index d5209600..175810d0 100644
>> --- a/core/pci-opal.c
>> +++ b/core/pci-opal.c
>> @@ -674,7 +674,9 @@ static void set_power_timer(struct timer *t __unused, void *data,
>>   	struct pci_device *pd = slot->pd;
>>   	struct dt_node *dn = pd->dn;
>>   	uint8_t link;
>> +	struct phb *phb = slot->phb;
>>   
>> +	phb_lock(phb);
>>   	switch (slot->state) {
>>   	case PCI_SLOT_STATE_SPOWER_START:
>>   		if (slot->retries-- == 0) {
>> @@ -720,6 +722,7 @@ static void set_power_timer(struct timer *t __unused, void *data,
>>   		prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
>>   		      slot->id, slot->state);
>>   	}
>> +	phb_unlock(phb);
>>   }
>>   
>>   static int64_t opal_pci_set_power_state(uint64_t async_token,
>
diff mbox series

Patch

diff --git a/core/pci-opal.c b/core/pci-opal.c
index d5209600..175810d0 100644
--- a/core/pci-opal.c
+++ b/core/pci-opal.c
@@ -674,7 +674,9 @@  static void set_power_timer(struct timer *t __unused, void *data,
 	struct pci_device *pd = slot->pd;
 	struct dt_node *dn = pd->dn;
 	uint8_t link;
+	struct phb *phb = slot->phb;
 
+	phb_lock(phb);
 	switch (slot->state) {
 	case PCI_SLOT_STATE_SPOWER_START:
 		if (slot->retries-- == 0) {
@@ -720,6 +722,7 @@  static void set_power_timer(struct timer *t __unused, void *data,
 		prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
 		      slot->id, slot->state);
 	}
+	phb_unlock(phb);
 }
 
 static int64_t opal_pci_set_power_state(uint64_t async_token,