diff mbox series

[04/16] core/pci: Train link of PHB slots when hotplugging

Message ID 20190909123151.21944-5-fbarrat@linux.ibm.com
State Superseded
Headers show
Series opencapi: enable card reset and link retraining | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch master (470ffb5f29d741c3bed600f7bb7bf0cbb270e05a)
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot success Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco success Signed-off-by present

Commit Message

Frederic Barrat Sept. 9, 2019, 12:31 p.m. UTC
The link of PHB slots must be trained after powering on. This can be
done by calling the fundamental reset callback of the slot.

We could force a reset for all the slots and have a common path in
set_power_state(). But this patch only resets the PHB slot. Some slot
implementations do a power cycle during fundamental reset, so calling
a reset after powering on would repeat that operation.

Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
---
 core/pci-opal.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 108 insertions(+), 22 deletions(-)

Comments

Christophe Lombard Sept. 17, 2019, 9:02 a.m. UTC | #1
On 09/09/2019 14:31, Frederic Barrat wrote:
> The link of PHB slots must be trained after powering on. This can be
> done by calling the fundamental reset callback of the slot.
> 
> We could force a reset for all the slots and have a common path in
> set_power_state(). But this patch only resets the PHB slot. Some slot
> implementations do a power cycle during fundamental reset, so calling
> a reset after powering on would repeat that operation.
> 
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> ---
>   core/pci-opal.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
>   1 file changed, 108 insertions(+), 22 deletions(-)
> 
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index 69bd65c6..6c070030 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -678,11 +678,111 @@ static void remove_slot_devices(struct pci_slot *slot)
>   	pci_remove_bus(phb, &pd->children);
>   }
>   
> +static void link_training_timer(struct timer *t, void *data,
> +				uint64_t now __unused)
> +{
> +	struct pci_slot *slot = data;
> +	struct phb *phb = slot->phb;
> +	uint8_t link;
> +	int64_t rc = 0;
> +
> +	phb_lock(phb);
> +

lock already taken in the main function set_power_timer()

> +	rc = slot->ops.run_sm(slot);
> +	if (rc < 0)
> +		goto out;
> +	if (rc > 0) {
> +		schedule_timer(t, rc);
> +		phb_unlock(phb);
> +		return;
> +	}
> +
> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> +		link = 0;
> +	if (!link) {
> +		rc = OPAL_HARDWARE;
> +		goto out;
> +	}
> +
> +	rescan_slot_devices(slot);
> +out:
> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +		       slot->async_token, get_slot_phandle(slot),
> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> +	phb_unlock(phb);
> +}
> +
> +static void link_up_timer(struct timer *t, void *data, uint64_t now __unused)
> +{
> +	struct pci_slot *slot = data;
> +	struct phb *phb = slot->phb;
> +	uint8_t link;
> +	int64_t rc = OPAL_SUCCESS;
> +
> +	phb_lock(phb);
> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> +		link = 0;
> +	if (!link) {
> +		if (slot->retries-- == 0) {
> +			rc = OPAL_BUSY;
> +			goto out;
> +		} else {
> +			schedule_timer(t, msecs_to_tb(10));
> +			phb_unlock(phb);
> +			return;
> +		}
> +	}
> +
> +	rescan_slot_devices(slot);
> +out:
> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +		       slot->async_token, get_slot_phandle(slot),
> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> +	phb_unlock(phb);
> +}
> +
> +static bool training_needed(struct pci_slot *slot)
> +{
> +	struct phb *phb = slot->phb;
> +	struct pci_device *pd = slot->pd;
> +
> +	if ((pd && !pd->parent) || /* "real" PHB */
> +	    (!pd && phb->phb_type == phb_type_npu_v2_opencapi))
> +		return true;
> +	return false;
> +}
> +
> +static void wait_for_link_up_and_rescan(struct pci_slot *slot)
> +{
> +	int64_t rc;
> +
> +	/*
> +	 * Links for PHB slots need to be retrained by triggering a
> +	 * fundamental reset. Other slots also need to be tested for
> +	 * readiness
> +	 */
> +	if (training_needed(slot)) {
> +		rc = slot->ops.freset(slot);
> +		if (rc < 0) {
> +			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +				       slot->async_token,
> +				       get_slot_phandle(slot),
> +				       slot->power_state,
> +				       rc <= 0 ? rc : OPAL_BUSY);
> +			return;
> +		}
> +		init_timer(&slot->timer, link_training_timer, slot);
> +	} else {
> +		init_timer(&slot->timer, link_up_timer, slot);
> +		rc = 1;
> +	}
> +	schedule_timer(&slot->timer, rc);
> +}
> +
>   static void set_power_timer(struct timer *t __unused, void *data,
>   			    uint64_t now __unused)
>   {
>   	struct pci_slot *slot = data;
> -	uint8_t link;
>   	struct phb *phb = slot->phb;
>   
>   	phb_lock(phb);
> @@ -699,9 +799,9 @@ static void set_power_timer(struct timer *t __unused, void *data,
>   
>   		break;
>   	case PCI_SLOT_STATE_SPOWER_DONE:
> +		pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);

any reason to update the state here ?

>   		if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
>   			remove_slot_devices(slot);
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>   			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>   				       slot->async_token, get_slot_phandle(slot),
>   				       OPAL_PCI_SLOT_POWER_OFF, OPAL_SUCCESS);
> @@ -709,23 +809,7 @@ static void set_power_timer(struct timer *t __unused, void *data,
>   		}
>   
>   		/* Power on */
> -		if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> -			link = 0;
> -		if (link) {
> -			rescan_slot_devices(slot);
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> -			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> -				       slot->async_token, get_slot_phandle(slot),
> -				       OPAL_PCI_SLOT_POWER_ON, OPAL_SUCCESS);
> -		} else if (slot->retries-- == 0) {
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> -			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> -				       slot->async_token, get_slot_phandle(slot),
> -				       OPAL_PCI_SLOT_POWER_ON, OPAL_BUSY);
> -		} else {
> -			schedule_timer(&slot->timer, msecs_to_tb(10));
> -		}
> -
> +		wait_for_link_up_and_rescan(slot);
>   		break;
>   	default:
>   		prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
> @@ -808,10 +892,12 @@ static int64_t opal_pci_set_power_state(uint64_t async_token,
>   		init_timer(&slot->timer, set_power_timer, slot);
>   		schedule_timer(&slot->timer, msecs_to_tb(10));
>   	} else if (rc == OPAL_SUCCESS) {
> -		if (*state == OPAL_PCI_SLOT_POWER_OFF)
> +		if (*state == OPAL_PCI_SLOT_POWER_OFF) {
>   			remove_slot_devices(slot);
> -		else
> -			rescan_slot_devices(slot);
> +		} else {
> +			wait_for_link_up_and_rescan(slot);
> +			rc = OPAL_ASYNC_COMPLETION;
> +		}
>   	}
>   
>   	phb_unlock(phb);
>
Frederic Barrat Sept. 17, 2019, 3:05 p.m. UTC | #2
Le 17/09/2019 à 11:02, christophe lombard a écrit :
> On 09/09/2019 14:31, Frederic Barrat wrote:
>> The link of PHB slots must be trained after powering on. This can be
>> done by calling the fundamental reset callback of the slot.
>>
>> We could force a reset for all the slots and have a common path in
>> set_power_state(). But this patch only resets the PHB slot. Some slot
>> implementations do a power cycle during fundamental reset, so calling
>> a reset after powering on would repeat that operation.
>>
>> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
>> ---
>>   core/pci-opal.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
>>   1 file changed, 108 insertions(+), 22 deletions(-)
>>
>> diff --git a/core/pci-opal.c b/core/pci-opal.c
>> index 69bd65c6..6c070030 100644
>> --- a/core/pci-opal.c
>> +++ b/core/pci-opal.c
>> @@ -678,11 +678,111 @@ static void remove_slot_devices(struct pci_slot 
>> *slot)
>>       pci_remove_bus(phb, &pd->children);
>>   }
>> +static void link_training_timer(struct timer *t, void *data,
>> +                uint64_t now __unused)
>> +{
>> +    struct pci_slot *slot = data;
>> +    struct phb *phb = slot->phb;
>> +    uint8_t link;
>> +    int64_t rc = 0;
>> +
>> +    phb_lock(phb);
>> +
> 
> lock already taken in the main function set_power_timer()


link_training_timer() is called in a different context, when the timer 
pops after being scheduled in set_power_time(). So I think we need to 
take the lock as link_training_timer() could be executed concurrently 
with something else touching the link. And we don't deadlock.



>> +    rc = slot->ops.run_sm(slot);
>> +    if (rc < 0)
>> +        goto out;
>> +    if (rc > 0) {
>> +        schedule_timer(t, rc);
>> +        phb_unlock(phb);
>> +        return;
>> +    }
>> +
>> +    if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>> +        link = 0;
>> +    if (!link) {
>> +        rc = OPAL_HARDWARE;
>> +        goto out;
>> +    }
>> +
>> +    rescan_slot_devices(slot);
>> +out:
>> +    opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> +               slot->async_token, get_slot_phandle(slot),
>> +               slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
>> +    phb_unlock(phb);
>> +}
>> +
>> +static void link_up_timer(struct timer *t, void *data, uint64_t now 
>> __unused)
>> +{
>> +    struct pci_slot *slot = data;
>> +    struct phb *phb = slot->phb;
>> +    uint8_t link;
>> +    int64_t rc = OPAL_SUCCESS;
>> +
>> +    phb_lock(phb);
>> +    if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>> +        link = 0;
>> +    if (!link) {
>> +        if (slot->retries-- == 0) {
>> +            rc = OPAL_BUSY;
>> +            goto out;
>> +        } else {
>> +            schedule_timer(t, msecs_to_tb(10));
>> +            phb_unlock(phb);
>> +            return;
>> +        }
>> +    }
>> +
>> +    rescan_slot_devices(slot);
>> +out:
>> +    opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> +               slot->async_token, get_slot_phandle(slot),
>> +               slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
>> +    phb_unlock(phb);
>> +}
>> +
>> +static bool training_needed(struct pci_slot *slot)
>> +{
>> +    struct phb *phb = slot->phb;
>> +    struct pci_device *pd = slot->pd;
>> +
>> +    if ((pd && !pd->parent) || /* "real" PHB */
>> +        (!pd && phb->phb_type == phb_type_npu_v2_opencapi))
>> +        return true;
>> +    return false;
>> +}
>> +
>> +static void wait_for_link_up_and_rescan(struct pci_slot *slot)
>> +{
>> +    int64_t rc;
>> +
>> +    /*
>> +     * Links for PHB slots need to be retrained by triggering a
>> +     * fundamental reset. Other slots also need to be tested for
>> +     * readiness
>> +     */
>> +    if (training_needed(slot)) {
>> +        rc = slot->ops.freset(slot);
>> +        if (rc < 0) {
>> +            opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> +                       slot->async_token,
>> +                       get_slot_phandle(slot),
>> +                       slot->power_state,
>> +                       rc <= 0 ? rc : OPAL_BUSY);
>> +            return;
>> +        }
>> +        init_timer(&slot->timer, link_training_timer, slot);
>> +    } else {
>> +        init_timer(&slot->timer, link_up_timer, slot);
>> +        rc = 1;
>> +    }
>> +    schedule_timer(&slot->timer, rc);
>> +}
>> +
>>   static void set_power_timer(struct timer *t __unused, void *data,
>>                   uint64_t now __unused)
>>   {
>>       struct pci_slot *slot = data;
>> -    uint8_t link;
>>       struct phb *phb = slot->phb;
>>       phb_lock(phb);
>> @@ -699,9 +799,9 @@ static void set_power_timer(struct timer *t 
>> __unused, void *data,
>>           break;
>>       case PCI_SLOT_STATE_SPOWER_DONE:
>> +        pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> 
> any reason to update the state here ?


The power is now on and we'll soon have 2 bifurcated paths: either we 
retrain or we don't. We need to eventually reset the state to 
PCL_SLOT_STATE_NORMAL and this location happens to be common. I don't 
think it hurts to do it now instead of later (and in the case of link 
retraining, it's needed pretty early, before we call freset() )

   Fred



>>           if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
>>               remove_slot_devices(slot);
>> -            pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>>               opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>>                          slot->async_token, get_slot_phandle(slot),
>>                          OPAL_PCI_SLOT_POWER_OFF, OPAL_SUCCESS);
>> @@ -709,23 +809,7 @@ static void set_power_timer(struct timer *t 
>> __unused, void *data,
>>           }
>>           /* Power on */
>> -        if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>> -            link = 0;
>> -        if (link) {
>> -            rescan_slot_devices(slot);
>> -            pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>> -            opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> -                       slot->async_token, get_slot_phandle(slot),
>> -                       OPAL_PCI_SLOT_POWER_ON, OPAL_SUCCESS);
>> -        } else if (slot->retries-- == 0) {
>> -            pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>> -            opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> -                       slot->async_token, get_slot_phandle(slot),
>> -                       OPAL_PCI_SLOT_POWER_ON, OPAL_BUSY);
>> -        } else {
>> -            schedule_timer(&slot->timer, msecs_to_tb(10));
>> -        }
>> -
>> +        wait_for_link_up_and_rescan(slot);
>>           break;
>>       default:
>>           prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
>> @@ -808,10 +892,12 @@ static int64_t opal_pci_set_power_state(uint64_t 
>> async_token,
>>           init_timer(&slot->timer, set_power_timer, slot);
>>           schedule_timer(&slot->timer, msecs_to_tb(10));
>>       } else if (rc == OPAL_SUCCESS) {
>> -        if (*state == OPAL_PCI_SLOT_POWER_OFF)
>> +        if (*state == OPAL_PCI_SLOT_POWER_OFF) {
>>               remove_slot_devices(slot);
>> -        else
>> -            rescan_slot_devices(slot);
>> +        } else {
>> +            wait_for_link_up_and_rescan(slot);
>> +            rc = OPAL_ASYNC_COMPLETION;
>> +        }
>>       }
>>       phb_unlock(phb);
>>
>
Andrew Donnellan Sept. 24, 2019, 4:16 p.m. UTC | #3
On 9/9/19 2:31 pm, Frederic Barrat wrote:
> The link of PHB slots must be trained after powering on. This can be
> done by calling the fundamental reset callback of the slot.
> 
> We could force a reset for all the slots and have a common path in
> set_power_state(). But this patch only resets the PHB slot. Some slot
> implementations do a power cycle during fundamental reset, so calling
> a reset after powering on would repeat that operation.
> 
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>

Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>

one very minor comment below

> ---
>   core/pci-opal.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
>   1 file changed, 108 insertions(+), 22 deletions(-)
> 
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index 69bd65c6..6c070030 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -678,11 +678,111 @@ static void remove_slot_devices(struct pci_slot *slot)
>   	pci_remove_bus(phb, &pd->children);
>   }
>   
> +static void link_training_timer(struct timer *t, void *data,
> +				uint64_t now __unused)
> +{
> +	struct pci_slot *slot = data;
> +	struct phb *phb = slot->phb;
> +	uint8_t link;
> +	int64_t rc = 0;
> +
> +	phb_lock(phb);
> +
> +	rc = slot->ops.run_sm(slot);
> +	if (rc < 0)
> +		goto out;
> +	if (rc > 0) {
> +		schedule_timer(t, rc);
> +		phb_unlock(phb);
> +		return;
> +	}
> +
> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> +		link = 0;
> +	if (!link) {
> +		rc = OPAL_HARDWARE;
> +		goto out;
> +	}
> +
> +	rescan_slot_devices(slot);
> +out:
> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +		       slot->async_token, get_slot_phandle(slot),
> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> +	phb_unlock(phb);
> +}
> +
> +static void link_up_timer(struct timer *t, void *data, uint64_t now __unused)
> +{
> +	struct pci_slot *slot = data;
> +	struct phb *phb = slot->phb;
> +	uint8_t link;
> +	int64_t rc = OPAL_SUCCESS;
> +
> +	phb_lock(phb);
> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> +		link = 0;
> +	if (!link) {
> +		if (slot->retries-- == 0) {
> +			rc = OPAL_BUSY;
> +			goto out;
> +		} else {
> +			schedule_timer(t, msecs_to_tb(10));
> +			phb_unlock(phb);
> +			return;
> +		}
> +	}
> +
> +	rescan_slot_devices(slot);
> +out:
> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +		       slot->async_token, get_slot_phandle(slot),
> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> +	phb_unlock(phb);
> +}
> +
> +static bool training_needed(struct pci_slot *slot)
> +{
> +	struct phb *phb = slot->phb;
> +	struct pci_device *pd = slot->pd;
> +
> +	if ((pd && !pd->parent) || /* "real" PHB */
> +	    (!pd && phb->phb_type == phb_type_npu_v2_opencapi))
> +		return true;
> +	return false;
> +}
> +
> +static void wait_for_link_up_and_rescan(struct pci_slot *slot)
> +{
> +	int64_t rc;
> +
> +	/*
> +	 * Links for PHB slots need to be retrained by triggering a
> +	 * fundamental reset. Other slots also need to be tested for
> +	 * readiness
> +	 */
> +	if (training_needed(slot)) {
> +		rc = slot->ops.freset(slot);
> +		if (rc < 0) {
> +			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +				       slot->async_token,
> +				       get_slot_phandle(slot),
> +				       slot->power_state,
> +				       rc <= 0 ? rc : OPAL_BUSY);

This can just be rc

> +			return;
> +		}
> +		init_timer(&slot->timer, link_training_timer, slot);
> +	} else {
> +		init_timer(&slot->timer, link_up_timer, slot);
> +		rc = 1;
> +	}
> +	schedule_timer(&slot->timer, rc);
> +}
> +
>   static void set_power_timer(struct timer *t __unused, void *data,
>   			    uint64_t now __unused)
>   {
>   	struct pci_slot *slot = data;
> -	uint8_t link;
>   	struct phb *phb = slot->phb;
>   
>   	phb_lock(phb);
> @@ -699,9 +799,9 @@ static void set_power_timer(struct timer *t __unused, void *data,
>   
>   		break;
>   	case PCI_SLOT_STATE_SPOWER_DONE:
> +		pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>   		if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
>   			remove_slot_devices(slot);
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>   			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>   				       slot->async_token, get_slot_phandle(slot),
>   				       OPAL_PCI_SLOT_POWER_OFF, OPAL_SUCCESS);
> @@ -709,23 +809,7 @@ static void set_power_timer(struct timer *t __unused, void *data,
>   		}
>   
>   		/* Power on */
> -		if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> -			link = 0;
> -		if (link) {
> -			rescan_slot_devices(slot);
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> -			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> -				       slot->async_token, get_slot_phandle(slot),
> -				       OPAL_PCI_SLOT_POWER_ON, OPAL_SUCCESS);
> -		} else if (slot->retries-- == 0) {
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> -			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> -				       slot->async_token, get_slot_phandle(slot),
> -				       OPAL_PCI_SLOT_POWER_ON, OPAL_BUSY);
> -		} else {
> -			schedule_timer(&slot->timer, msecs_to_tb(10));
> -		}
> -
> +		wait_for_link_up_and_rescan(slot);
>   		break;
>   	default:
>   		prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
> @@ -808,10 +892,12 @@ static int64_t opal_pci_set_power_state(uint64_t async_token,
>   		init_timer(&slot->timer, set_power_timer, slot);
>   		schedule_timer(&slot->timer, msecs_to_tb(10));
>   	} else if (rc == OPAL_SUCCESS) {
> -		if (*state == OPAL_PCI_SLOT_POWER_OFF)
> +		if (*state == OPAL_PCI_SLOT_POWER_OFF) {
>   			remove_slot_devices(slot);
> -		else
> -			rescan_slot_devices(slot);
> +		} else {
> +			wait_for_link_up_and_rescan(slot);
> +			rc = OPAL_ASYNC_COMPLETION;
> +		}
>   	}
>   
>   	phb_unlock(phb);
>
Alexey Kardashevskiy Sept. 25, 2019, 5:14 a.m. UTC | #4
On 09/09/2019 22:31, Frederic Barrat wrote:
> The link of PHB slots must be trained after powering on. This can be
> done by calling the fundamental reset callback of the slot.
> 
> We could force a reset for all the slots and have a common path in
> set_power_state(). But this patch only resets the PHB slot. Some slot
> implementations do a power cycle during fundamental reset, so calling
> a reset after powering on would repeat that operation.
> 
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> ---
>  core/pci-opal.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 108 insertions(+), 22 deletions(-)
> 
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index 69bd65c6..6c070030 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -678,11 +678,111 @@ static void remove_slot_devices(struct pci_slot *slot)
>  	pci_remove_bus(phb, &pd->children);
>  }
>  
> +static void link_training_timer(struct timer *t, void *data,
> +				uint64_t now __unused)
> +{
> +	struct pci_slot *slot = data;
> +	struct phb *phb = slot->phb;
> +	uint8_t link;
> +	int64_t rc = 0;
> +
> +	phb_lock(phb);
> +
> +	rc = slot->ops.run_sm(slot);
> +	if (rc < 0)
> +		goto out;
> +	if (rc > 0) {
> +		schedule_timer(t, rc);
> +		phb_unlock(phb);
> +		return;
> +	}
> +
> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> +		link = 0;
> +	if (!link) {
> +		rc = OPAL_HARDWARE;
> +		goto out;
> +	}
> +
> +	rescan_slot_devices(slot);
> +out:
> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +		       slot->async_token, get_slot_phandle(slot),
> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> +	phb_unlock(phb);
> +}
> +
> +static void link_up_timer(struct timer *t, void *data, uint64_t now __unused)
> +{
> +	struct pci_slot *slot = data;
> +	struct phb *phb = slot->phb;
> +	uint8_t link;
> +	int64_t rc = OPAL_SUCCESS;
> +
> +	phb_lock(phb);
> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> +		link = 0;
> +	if (!link) {
> +		if (slot->retries-- == 0) {
> +			rc = OPAL_BUSY;
> +			goto out;
> +		} else {
> +			schedule_timer(t, msecs_to_tb(10));
> +			phb_unlock(phb);
> +			return;
> +		}
> +	}
> +
> +	rescan_slot_devices(slot);
> +out:
> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +		       slot->async_token, get_slot_phandle(slot),
> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> +	phb_unlock(phb);
> +}
> +
> +static bool training_needed(struct pci_slot *slot)
> +{
> +	struct phb *phb = slot->phb;
> +	struct pci_device *pd = slot->pd;
> +
> +	if ((pd && !pd->parent) || /* "real" PHB */
> +	    (!pd && phb->phb_type == phb_type_npu_v2_opencapi))



Having several checks for opencapi in otherwise opencapi-agnostic file says that opencapi phb/slot is not modeled correctly.

btw I remember asking if there is a machine I can use to get myself familiar with opencapi and lost the response, can
you please remind what was it (if this was you :) )? I'll just ssh to it and do lspci/sysfs kinda thing. Thanks,
Frederic Barrat Sept. 25, 2019, 11:57 a.m. UTC | #5
Le 25/09/2019 à 07:14, Alexey Kardashevskiy a écrit :
> 
> 
> On 09/09/2019 22:31, Frederic Barrat wrote:
>> The link of PHB slots must be trained after powering on. This can be
>> done by calling the fundamental reset callback of the slot.
>>
>> We could force a reset for all the slots and have a common path in
>> set_power_state(). But this patch only resets the PHB slot. Some slot
>> implementations do a power cycle during fundamental reset, so calling
>> a reset after powering on would repeat that operation.
>>
>> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
>> ---
>>   core/pci-opal.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
>>   1 file changed, 108 insertions(+), 22 deletions(-)
>>
>> diff --git a/core/pci-opal.c b/core/pci-opal.c
>> index 69bd65c6..6c070030 100644
>> --- a/core/pci-opal.c
>> +++ b/core/pci-opal.c
>> @@ -678,11 +678,111 @@ static void remove_slot_devices(struct pci_slot *slot)
>>   	pci_remove_bus(phb, &pd->children);
>>   }
>>   
>> +static void link_training_timer(struct timer *t, void *data,
>> +				uint64_t now __unused)
>> +{
>> +	struct pci_slot *slot = data;
>> +	struct phb *phb = slot->phb;
>> +	uint8_t link;
>> +	int64_t rc = 0;
>> +
>> +	phb_lock(phb);
>> +
>> +	rc = slot->ops.run_sm(slot);
>> +	if (rc < 0)
>> +		goto out;
>> +	if (rc > 0) {
>> +		schedule_timer(t, rc);
>> +		phb_unlock(phb);
>> +		return;
>> +	}
>> +
>> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>> +		link = 0;
>> +	if (!link) {
>> +		rc = OPAL_HARDWARE;
>> +		goto out;
>> +	}
>> +
>> +	rescan_slot_devices(slot);
>> +out:
>> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> +		       slot->async_token, get_slot_phandle(slot),
>> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
>> +	phb_unlock(phb);
>> +}
>> +
>> +static void link_up_timer(struct timer *t, void *data, uint64_t now __unused)
>> +{
>> +	struct pci_slot *slot = data;
>> +	struct phb *phb = slot->phb;
>> +	uint8_t link;
>> +	int64_t rc = OPAL_SUCCESS;
>> +
>> +	phb_lock(phb);
>> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>> +		link = 0;
>> +	if (!link) {
>> +		if (slot->retries-- == 0) {
>> +			rc = OPAL_BUSY;
>> +			goto out;
>> +		} else {
>> +			schedule_timer(t, msecs_to_tb(10));
>> +			phb_unlock(phb);
>> +			return;
>> +		}
>> +	}
>> +
>> +	rescan_slot_devices(slot);
>> +out:
>> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> +		       slot->async_token, get_slot_phandle(slot),
>> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
>> +	phb_unlock(phb);
>> +}
>> +
>> +static bool training_needed(struct pci_slot *slot)
>> +{
>> +	struct phb *phb = slot->phb;
>> +	struct pci_device *pd = slot->pd;
>> +
>> +	if ((pd && !pd->parent) || /* "real" PHB */
>> +	    (!pd && phb->phb_type == phb_type_npu_v2_opencapi))
> 
> 
> 
> Having several checks for opencapi in otherwise opencapi-agnostic file says that opencapi phb/slot is not modeled correctly.

Hi Alexey,

In that file, there are 2 things we do differently for opencapi:
1. determine if the link needs to be retrained or not. Note that in the 
training_needed() function here, the "real" PHB case is a bit of a 
stretch, I don't think it's needed yet, but I believe Oliver has 
something cooking related to PHB hotplug. I should probably remove it 
for now though.
2. populate the correct list of devices when rescanning/removing a slot.

Both shouldn't be related to opencapi per se, but they could also apply 
to a "real" PHB, I guess. So we could have a marker on a PHB slot and 
branch off that, it would be more general, but I'm not sure how that 
would/could be used with real PHBs. Is that more or less what you had in 
mind?

   Fred



> btw I remember asking if there is a machine I can use to get myself familiar with opencapi and lost the response, can
> you please remind what was it (if this was you :) )? I'll just ssh to it and do lspci/sysfs kinda thing. Thanks,
> 
> 
>
Alexey Kardashevskiy Sept. 27, 2019, 1:51 a.m. UTC | #6
On 25/09/2019 21:57, Frederic Barrat wrote:
> 
> 
> Le 25/09/2019 à 07:14, Alexey Kardashevskiy a écrit :
>>
>>
>> On 09/09/2019 22:31, Frederic Barrat wrote:
>>> The link of PHB slots must be trained after powering on. This can be
>>> done by calling the fundamental reset callback of the slot.
>>>
>>> We could force a reset for all the slots and have a common path in
>>> set_power_state(). But this patch only resets the PHB slot. Some slot
>>> implementations do a power cycle during fundamental reset, so calling
>>> a reset after powering on would repeat that operation.
>>>
>>> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
>>> ---
>>>   core/pci-opal.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
>>>   1 file changed, 108 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/core/pci-opal.c b/core/pci-opal.c
>>> index 69bd65c6..6c070030 100644
>>> --- a/core/pci-opal.c
>>> +++ b/core/pci-opal.c
>>> @@ -678,11 +678,111 @@ static void remove_slot_devices(struct pci_slot *slot)
>>>       pci_remove_bus(phb, &pd->children);
>>>   }
>>>   +static void link_training_timer(struct timer *t, void *data,
>>> +                uint64_t now __unused)
>>> +{
>>> +    struct pci_slot *slot = data;
>>> +    struct phb *phb = slot->phb;
>>> +    uint8_t link;
>>> +    int64_t rc = 0;
>>> +
>>> +    phb_lock(phb);
>>> +
>>> +    rc = slot->ops.run_sm(slot);
>>> +    if (rc < 0)
>>> +        goto out;
>>> +    if (rc > 0) {
>>> +        schedule_timer(t, rc);
>>> +        phb_unlock(phb);
>>> +        return;
>>> +    }
>>> +
>>> +    if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>>> +        link = 0;
>>> +    if (!link) {
>>> +        rc = OPAL_HARDWARE;
>>> +        goto out;
>>> +    }
>>> +
>>> +    rescan_slot_devices(slot);
>>> +out:
>>> +    opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>>> +               slot->async_token, get_slot_phandle(slot),
>>> +               slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
>>> +    phb_unlock(phb);
>>> +}
>>> +
>>> +static void link_up_timer(struct timer *t, void *data, uint64_t now __unused)
>>> +{
>>> +    struct pci_slot *slot = data;
>>> +    struct phb *phb = slot->phb;
>>> +    uint8_t link;
>>> +    int64_t rc = OPAL_SUCCESS;
>>> +
>>> +    phb_lock(phb);
>>> +    if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>>> +        link = 0;
>>> +    if (!link) {
>>> +        if (slot->retries-- == 0) {
>>> +            rc = OPAL_BUSY;
>>> +            goto out;
>>> +        } else {
>>> +            schedule_timer(t, msecs_to_tb(10));
>>> +            phb_unlock(phb);
>>> +            return;
>>> +        }
>>> +    }
>>> +
>>> +    rescan_slot_devices(slot);
>>> +out:
>>> +    opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>>> +               slot->async_token, get_slot_phandle(slot),
>>> +               slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
>>> +    phb_unlock(phb);
>>> +}
>>> +
>>> +static bool training_needed(struct pci_slot *slot)
>>> +{
>>> +    struct phb *phb = slot->phb;
>>> +    struct pci_device *pd = slot->pd;
>>> +
>>> +    if ((pd && !pd->parent) || /* "real" PHB */
>>> +        (!pd && phb->phb_type == phb_type_npu_v2_opencapi))
>>
>>
>>
>> Having several checks for opencapi in otherwise opencapi-agnostic file says that opencapi phb/slot is not modeled
>> correctly.
> 
> Hi Alexey,
> 
> In that file, there are 2 things we do differently for opencapi:
> 1. determine if the link needs to be retrained or not. Note that in the training_needed() function here, the "real" PHB
> case is a bit of a stretch, I don't think it's needed yet, but I believe Oliver has something cooking related to PHB
> hotplug. I should probably remove it for now though.
> 2. populate the correct list of devices when rescanning/removing a slot.
> 
> Both shouldn't be related to opencapi per se, but they could also apply to a "real" PHB, I guess. So we could have a
> marker on a PHB slot and branch off that, it would be more general, but I'm not sure how that would/could be used with
> real PHBs. Is that more or less what you had in mind?

Not exactly, I did not get that far :)

The commit log says it can use a common path but some slots do something sometime and this is where I got lost. I
expected a slot-specific pci_slot_ops::set_link_state/set_power_state rather than branching in training_needed(). But
you can ignore me if Oliver is happy, he knows this code way better :)



> 
>   Fred
> 
> 
> 
>> btw I remember asking if there is a machine I can use to get myself familiar with opencapi and lost the response, can
>> you please remind what was it (if this was you :) )? I'll just ssh to it and do lspci/sysfs kinda thing. Thanks,
>>
>>
>>
>
Oliver O'Halloran Oct. 1, 2019, 12:02 p.m. UTC | #7
On Mon, 2019-09-09 at 14:31 +0200, Frederic Barrat wrote:
> The link of PHB slots must be trained after powering on. This can be
> done by calling the fundamental reset callback of the slot.
> 
> We could force a reset for all the slots and have a common path in
> set_power_state(). But this patch only resets the PHB slot. Some slot
> implementations do a power cycle during fundamental reset, so calling
> a reset after powering on would repeat that operation.
> 
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> ---
>  core/pci-opal.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 108 insertions(+), 22 deletions(-)
> 
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index 69bd65c6..6c070030 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -678,11 +678,111 @@ static void remove_slot_devices(struct pci_slot *slot)
>  	pci_remove_bus(phb, &pd->children);
>  }
>  
> +static void link_training_timer(struct timer *t, void *data,
> +				uint64_t now __unused)
> +{
> +	struct pci_slot *slot = data;
> +	struct phb *phb = slot->phb;
> +	uint8_t link;
> +	int64_t rc = 0;
> +
> +	phb_lock(phb);
> +
> +	rc = slot->ops.run_sm(slot);
> +	if (rc < 0)
> +		goto out;
> +	if (rc > 0) {
> +		schedule_timer(t, rc);
> +		phb_unlock(phb);
> +		return;
> +	}
> +
> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> +		link = 0;
> +	if (!link) {
> +		rc = OPAL_HARDWARE;
> +		goto out;
> +	}
> +
> +	rescan_slot_devices(slot);
> +out:
> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +		       slot->async_token, get_slot_phandle(slot),
> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> +	phb_unlock(phb);
> +}
> +
> +static void link_up_timer(struct timer *t, void *data, uint64_t now __unused)
> +{
> +	struct pci_slot *slot = data;
> +	struct phb *phb = slot->phb;
> +	uint8_t link;
> +	int64_t rc = OPAL_SUCCESS;
> +
> +	phb_lock(phb);
> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> +		link = 0;
> +	if (!link) {
> +		if (slot->retries-- == 0) {
> +			rc = OPAL_BUSY;
> +			goto out;
> +		} else {
> +			schedule_timer(t, msecs_to_tb(10));
> +			phb_unlock(phb);
> +			return;
> +		}
> +	}
> +
> +	rescan_slot_devices(slot);
> +out:
> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +		       slot->async_token, get_slot_phandle(slot),
> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> +	phb_unlock(phb);
> +}
> +

> +static bool training_needed(struct pci_slot *slot)
> +{
> +	struct phb *phb = slot->phb;
> +	struct pci_device *pd = slot->pd;
> +
> +	if ((pd && !pd->parent) || /* "real" PHB */
> +	    (!pd && phb->phb_type == phb_type_npu_v2_opencapi))
> +		return true;
> +	return false;
> +}

Any reason why training_needed() couldn't just be a link state check?
That's basicly what's happening here anyway with a one timer tick wait
added 

 If that wait is important for some reason then that should be
reflected via a state machine transition rather.

> +static void wait_for_link_up_and_rescan(struct pci_slot *slot)
> +{
> +	int64_t rc;
> +
> +	/*
> +	 * Links for PHB slots need to be retrained by triggering a
> +	 * fundamental reset. Other slots also need to be tested for
> +	 * readiness
> +	 */
> +	if (training_needed(slot)) {
> +		rc = slot->ops.freset(slot);
> +		if (rc < 0) {
> +			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +				       slot->async_token,
> +				       get_slot_phandle(slot),
> +				       slot->power_state,
> +				       rc <= 0 ? rc : OPAL_BUSY);
> +			return;
> +		}
> +		init_timer(&slot->timer, link_training_timer, slot);
> +	} else {
> +		init_timer(&slot->timer, link_up_timer, slot);
> +		rc = 1;
> +	}
> +	schedule_timer(&slot->timer, rc);
> +}

This doesn't make much sense to me. The only difference between the two
paths here is that in the !training_needed() case we call link_up_timer
which polls the link state with ops.get_link_state() rather than
cranking the state machine. However, this is more or less the scenario
that the POLL_LINK state is for, so... why not use that?

>  static void set_power_timer(struct timer *t __unused, void *data,
>  			    uint64_t now __unused)
>  {
>  	struct pci_slot *slot = data;
> -	uint8_t link;
>  	struct phb *phb = slot->phb;
>  
>  	phb_lock(phb);
> @@ -699,9 +799,9 @@ static void set_power_timer(struct timer *t __unused, void *data,
>  
>  		break;
>  	case PCI_SLOT_STATE_SPOWER_DONE:
> +		pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>  		if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
>  			remove_slot_devices(slot);
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> 			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>  				       slot->async_token, get_slot_phandle(slot),
>  				       OPAL_PCI_SLOT_POWER_OFF, OPAL_SUCCESS);
> @@ -709,23 +809,7 @@ static void set_power_timer(struct timer *t __unused, void *data,
>  		}
>  
>  		/* Power on */
> -		if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> -			link = 0;
> -		if (link) {
> -			rescan_slot_devices(slot);
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> -			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> -				       slot->async_token, get_slot_phandle(slot),
> -				       OPAL_PCI_SLOT_POWER_ON, OPAL_SUCCESS);
> -		} else if (slot->retries-- == 0) {
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> -			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> -				       slot->async_token, get_slot_phandle(slot),
> -				       OPAL_PCI_SLOT_POWER_ON, OPAL_BUSY);
> -		} else {
> -			schedule_timer(&slot->timer, msecs_to_tb(10));
> -		}
> -
> +		wait_for_link_up_and_rescan(slot);
>  		break;
>  	default:
>  		prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
> @@ -808,10 +892,12 @@ static int64_t opal_pci_set_power_state(uint64_t async_token,
>  		init_timer(&slot->timer, set_power_timer, slot);
>  		schedule_timer(&slot->timer, msecs_to_tb(10));
>  	} else if (rc == OPAL_SUCCESS) {
> -		if (*state == OPAL_PCI_SLOT_POWER_OFF)
> +		if (*state == OPAL_PCI_SLOT_POWER_OFF) {
>  			remove_slot_devices(slot);
> -		else
> -			rescan_slot_devices(slot);
> +		} else {
> +			wait_for_link_up_and_rescan(slot);
> +			rc = OPAL_ASYNC_COMPLETION;
> +		}
>  	}
>  
>  	phb_unlock(phb);
Frederic Barrat Oct. 1, 2019, 5:27 p.m. UTC | #8
Le 01/10/2019 à 14:02, Oliver O'Halloran a écrit :
> On Mon, 2019-09-09 at 14:31 +0200, Frederic Barrat wrote:
>> The link of PHB slots must be trained after powering on. This can be
>> done by calling the fundamental reset callback of the slot.
>>
>> We could force a reset for all the slots and have a common path in
>> set_power_state(). But this patch only resets the PHB slot. Some slot
>> implementations do a power cycle during fundamental reset, so calling
>> a reset after powering on would repeat that operation.
>>
>> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
>> ---
>>   core/pci-opal.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
>>   1 file changed, 108 insertions(+), 22 deletions(-)
>>
>> diff --git a/core/pci-opal.c b/core/pci-opal.c
>> index 69bd65c6..6c070030 100644
>> --- a/core/pci-opal.c
>> +++ b/core/pci-opal.c
>> @@ -678,11 +678,111 @@ static void remove_slot_devices(struct pci_slot *slot)
>>   	pci_remove_bus(phb, &pd->children);
>>   }
>>   
>> +static void link_training_timer(struct timer *t, void *data,
>> +				uint64_t now __unused)
>> +{
>> +	struct pci_slot *slot = data;
>> +	struct phb *phb = slot->phb;
>> +	uint8_t link;
>> +	int64_t rc = 0;
>> +
>> +	phb_lock(phb);
>> +
>> +	rc = slot->ops.run_sm(slot);
>> +	if (rc < 0)
>> +		goto out;
>> +	if (rc > 0) {
>> +		schedule_timer(t, rc);
>> +		phb_unlock(phb);
>> +		return;
>> +	}
>> +
>> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>> +		link = 0;
>> +	if (!link) {
>> +		rc = OPAL_HARDWARE;
>> +		goto out;
>> +	}
>> +
>> +	rescan_slot_devices(slot);
>> +out:
>> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> +		       slot->async_token, get_slot_phandle(slot),
>> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
>> +	phb_unlock(phb);
>> +}
>> +
>> +static void link_up_timer(struct timer *t, void *data, uint64_t now __unused)
>> +{
>> +	struct pci_slot *slot = data;
>> +	struct phb *phb = slot->phb;
>> +	uint8_t link;
>> +	int64_t rc = OPAL_SUCCESS;
>> +
>> +	phb_lock(phb);
>> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>> +		link = 0;
>> +	if (!link) {
>> +		if (slot->retries-- == 0) {
>> +			rc = OPAL_BUSY;
>> +			goto out;
>> +		} else {
>> +			schedule_timer(t, msecs_to_tb(10));
>> +			phb_unlock(phb);
>> +			return;
>> +		}
>> +	}
>> +
>> +	rescan_slot_devices(slot);
>> +out:
>> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> +		       slot->async_token, get_slot_phandle(slot),
>> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
>> +	phb_unlock(phb);
>> +}
>> +
> 
>> +static bool training_needed(struct pci_slot *slot)
>> +{
>> +	struct phb *phb = slot->phb;
>> +	struct pci_device *pd = slot->pd;
>> +
>> +	if ((pd && !pd->parent) || /* "real" PHB */
>> +	    (!pd && phb->phb_type == phb_type_npu_v2_opencapi))
>> +		return true;
>> +	return false;
>> +}
> 
> Any reason why training_needed() couldn't just be a link state check?
> That's basicly what's happening here anyway with a one timer tick wait
> added
> 
>   If that wait is important for some reason then that should be
> reflected via a state machine transition rather.


At least in the opencapi case, where the power of the adapter stays on, 
the link is still up at the time. The card is fenced and in reset, but 
the link will drop when we release the reset (due to the implementation 
of the DL on the fpga). So reading the link state would tell everything 
is fine, when we know we'll need to retrain the link.

For the cases were we don't retrain and just wait, the 1 time tick 
doesn't matter, we'll loop in link_up_timer() anyway. I guess I could 
default it to something like the equivalent if 1ms which could save us 
one iteration of the loop?


>> +static void wait_for_link_up_and_rescan(struct pci_slot *slot)
>> +{
>> +	int64_t rc;
>> +
>> +	/*
>> +	 * Links for PHB slots need to be retrained by triggering a
>> +	 * fundamental reset. Other slots also need to be tested for
>> +	 * readiness
>> +	 */
>> +	if (training_needed(slot)) {
>> +		rc = slot->ops.freset(slot);
>> +		if (rc < 0) {
>> +			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> +				       slot->async_token,
>> +				       get_slot_phandle(slot),
>> +				       slot->power_state,
>> +				       rc <= 0 ? rc : OPAL_BUSY);
>> +			return;
>> +		}
>> +		init_timer(&slot->timer, link_training_timer, slot);
>> +	} else {
>> +		init_timer(&slot->timer, link_up_timer, slot);
>> +		rc = 1;
>> +	}
>> +	schedule_timer(&slot->timer, rc);
>> +}
> 
> This doesn't make much sense to me. The only difference between the two
> paths here is that in the !training_needed() case we call link_up_timer
> which polls the link state with ops.get_link_state() rather than
> cranking the state machine. However, this is more or less the scenario
> that the POLL_LINK state is for, so... why not use that?


For the case we need to retrain, we need to crank the state machine as 
we'll likely need more than one call to ops.freset(), which will end in 
ops.poll_link().
For the case where we don't retrain, are you suggesting that setting the 
slot state to PCI_SLOT_STATE_LINK_START_POLL and crank the state machine 
would do? My understanding is that when polling the link, skiboot is bit 
more "active" then just waiting in get_link_state()

   Fred



>>   static void set_power_timer(struct timer *t __unused, void *data,
>>   			    uint64_t now __unused)
>>   {
>>   	struct pci_slot *slot = data;
>> -	uint8_t link;
>>   	struct phb *phb = slot->phb;
>>   
>>   	phb_lock(phb);
>> @@ -699,9 +799,9 @@ static void set_power_timer(struct timer *t __unused, void *data,
>>   
>>   		break;
>>   	case PCI_SLOT_STATE_SPOWER_DONE:
>> +		pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>>   		if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
>>   			remove_slot_devices(slot);
>> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>> 			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>>   				       slot->async_token, get_slot_phandle(slot),
>>   				       OPAL_PCI_SLOT_POWER_OFF, OPAL_SUCCESS);
>> @@ -709,23 +809,7 @@ static void set_power_timer(struct timer *t __unused, void *data,
>>   		}
>>   
>>   		/* Power on */
>> -		if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>> -			link = 0;
>> -		if (link) {
>> -			rescan_slot_devices(slot);
>> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>> -			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> -				       slot->async_token, get_slot_phandle(slot),
>> -				       OPAL_PCI_SLOT_POWER_ON, OPAL_SUCCESS);
>> -		} else if (slot->retries-- == 0) {
>> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>> -			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> -				       slot->async_token, get_slot_phandle(slot),
>> -				       OPAL_PCI_SLOT_POWER_ON, OPAL_BUSY);
>> -		} else {
>> -			schedule_timer(&slot->timer, msecs_to_tb(10));
>> -		}
>> -
>> +		wait_for_link_up_and_rescan(slot);
>>   		break;
>>   	default:
>>   		prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
>> @@ -808,10 +892,12 @@ static int64_t opal_pci_set_power_state(uint64_t async_token,
>>   		init_timer(&slot->timer, set_power_timer, slot);
>>   		schedule_timer(&slot->timer, msecs_to_tb(10));
>>   	} else if (rc == OPAL_SUCCESS) {
>> -		if (*state == OPAL_PCI_SLOT_POWER_OFF)
>> +		if (*state == OPAL_PCI_SLOT_POWER_OFF) {
>>   			remove_slot_devices(slot);
>> -		else
>> -			rescan_slot_devices(slot);
>> +		} else {
>> +			wait_for_link_up_and_rescan(slot);
>> +			rc = OPAL_ASYNC_COMPLETION;
>> +		}
>>   	}
>>   
>>   	phb_unlock(phb);
>
diff mbox series

Patch

diff --git a/core/pci-opal.c b/core/pci-opal.c
index 69bd65c6..6c070030 100644
--- a/core/pci-opal.c
+++ b/core/pci-opal.c
@@ -678,11 +678,111 @@  static void remove_slot_devices(struct pci_slot *slot)
 	pci_remove_bus(phb, &pd->children);
 }
 
+static void link_training_timer(struct timer *t, void *data,
+				uint64_t now __unused)
+{
+	struct pci_slot *slot = data;
+	struct phb *phb = slot->phb;
+	uint8_t link;
+	int64_t rc = 0;
+
+	phb_lock(phb);
+
+	rc = slot->ops.run_sm(slot);
+	if (rc < 0)
+		goto out;
+	if (rc > 0) {
+		schedule_timer(t, rc);
+		phb_unlock(phb);
+		return;
+	}
+
+	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
+		link = 0;
+	if (!link) {
+		rc = OPAL_HARDWARE;
+		goto out;
+	}
+
+	rescan_slot_devices(slot);
+out:
+	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
+		       slot->async_token, get_slot_phandle(slot),
+		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
+	phb_unlock(phb);
+}
+
+static void link_up_timer(struct timer *t, void *data, uint64_t now __unused)
+{
+	struct pci_slot *slot = data;
+	struct phb *phb = slot->phb;
+	uint8_t link;
+	int64_t rc = OPAL_SUCCESS;
+
+	phb_lock(phb);
+	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
+		link = 0;
+	if (!link) {
+		if (slot->retries-- == 0) {
+			rc = OPAL_BUSY;
+			goto out;
+		} else {
+			schedule_timer(t, msecs_to_tb(10));
+			phb_unlock(phb);
+			return;
+		}
+	}
+
+	rescan_slot_devices(slot);
+out:
+	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
+		       slot->async_token, get_slot_phandle(slot),
+		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
+	phb_unlock(phb);
+}
+
+static bool training_needed(struct pci_slot *slot)
+{
+	struct phb *phb = slot->phb;
+	struct pci_device *pd = slot->pd;
+
+	if ((pd && !pd->parent) || /* "real" PHB */
+	    (!pd && phb->phb_type == phb_type_npu_v2_opencapi))
+		return true;
+	return false;
+}
+
+static void wait_for_link_up_and_rescan(struct pci_slot *slot)
+{
+	int64_t rc;
+
+	/*
+	 * Links for PHB slots need to be retrained by triggering a
+	 * fundamental reset. Other slots also need to be tested for
+	 * readiness
+	 */
+	if (training_needed(slot)) {
+		rc = slot->ops.freset(slot);
+		if (rc < 0) {
+			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
+				       slot->async_token,
+				       get_slot_phandle(slot),
+				       slot->power_state,
+				       rc <= 0 ? rc : OPAL_BUSY);
+			return;
+		}
+		init_timer(&slot->timer, link_training_timer, slot);
+	} else {
+		init_timer(&slot->timer, link_up_timer, slot);
+		rc = 1;
+	}
+	schedule_timer(&slot->timer, rc);
+}
+
 static void set_power_timer(struct timer *t __unused, void *data,
 			    uint64_t now __unused)
 {
 	struct pci_slot *slot = data;
-	uint8_t link;
 	struct phb *phb = slot->phb;
 
 	phb_lock(phb);
@@ -699,9 +799,9 @@  static void set_power_timer(struct timer *t __unused, void *data,
 
 		break;
 	case PCI_SLOT_STATE_SPOWER_DONE:
+		pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
 		if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
 			remove_slot_devices(slot);
-			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
 			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
 				       slot->async_token, get_slot_phandle(slot),
 				       OPAL_PCI_SLOT_POWER_OFF, OPAL_SUCCESS);
@@ -709,23 +809,7 @@  static void set_power_timer(struct timer *t __unused, void *data,
 		}
 
 		/* Power on */
-		if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
-			link = 0;
-		if (link) {
-			rescan_slot_devices(slot);
-			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
-			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
-				       slot->async_token, get_slot_phandle(slot),
-				       OPAL_PCI_SLOT_POWER_ON, OPAL_SUCCESS);
-		} else if (slot->retries-- == 0) {
-			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
-			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
-				       slot->async_token, get_slot_phandle(slot),
-				       OPAL_PCI_SLOT_POWER_ON, OPAL_BUSY);
-		} else {
-			schedule_timer(&slot->timer, msecs_to_tb(10));
-		}
-
+		wait_for_link_up_and_rescan(slot);
 		break;
 	default:
 		prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
@@ -808,10 +892,12 @@  static int64_t opal_pci_set_power_state(uint64_t async_token,
 		init_timer(&slot->timer, set_power_timer, slot);
 		schedule_timer(&slot->timer, msecs_to_tb(10));
 	} else if (rc == OPAL_SUCCESS) {
-		if (*state == OPAL_PCI_SLOT_POWER_OFF)
+		if (*state == OPAL_PCI_SLOT_POWER_OFF) {
 			remove_slot_devices(slot);
-		else
-			rescan_slot_devices(slot);
+		} else {
+			wait_for_link_up_and_rescan(slot);
+			rc = OPAL_ASYNC_COMPLETION;
+		}
 	}
 
 	phb_unlock(phb);