[5/5] npu2-opencapi: Fix link state to report link down

Message ID 20180412124526.12662-6-fbarrat@linux.ibm.com
State New
Headers show
Series
  • npu2-opencapi: Use presence detection and reset
Related show

Commit Message

Frederic Barrat April 12, 2018, 12:45 p.m.
From: Frederic Barrat <fbarrat@linux.vnet.ibm.com>

The PHB callback 'get_link_state' is always reporting the link width,
irrespective of the link status and even when the link is down. It is
causing too much work (and failures) when the PHB is probed during pci
init.
The fix is to look at the link status first and report the link as
down when appropriate.

Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
---
 hw/npu2-opencapi.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

Comments

Andrew Donnellan April 13, 2018, 4:37 a.m. | #1
On 12/04/18 22:45, Frederic Barrat wrote:
> From: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
> 
> The PHB callback 'get_link_state' is always reporting the link width,
> irrespective of the link status and even when the link is down. It is
> causing too much work (and failures) when the PHB is probed during pci
> init.
> The fix is to look at the link status first and report the link as
> down when appropriate.
> 
> Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>

Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

> ---
>   hw/npu2-opencapi.c | 13 +++++++++++--
>   1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 24adf5e1..a0fdf22b 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -80,6 +80,7 @@
>   
>   #define OCAPI_LINK_TRAINING_RETRIES	5
>   #define OCAPI_LINK_TRAINING_TIMEOUT	3000 /* ms */
> +#define OCAPI_LINK_STATE_TRAINED        0x7
>   
>   enum npu2_link_training_state {
>   	NPU2_TRAIN_DEFAULT, /* fully train the link */
> @@ -1032,10 +1033,17 @@ static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
>   {
>   	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
>   	uint64_t reg;
> -	int64_t link_width, rc = OPAL_SUCCESS;
> +	int64_t link_width, training_status, rc = OPAL_SUCCESS;
>   
>   	reg = get_odl_status(dev->npu->chip_id, dev->index);
>   	link_width = GETFIELD(OB_ODL_STATUS_TRAINED_MODE, reg);
> +	training_status = GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg);
> +
> +	if (training_status != OCAPI_LINK_STATE_TRAINED) {
> +		*val = OPAL_SHPC_LINK_DOWN;
> +		return OPAL_SUCCESS;
> +	}
> +
>   	switch (link_width) {
>   	case 0b0001:
>   		*val = OPAL_SHPC_LINK_UP_x4;
> @@ -1086,7 +1094,8 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
>   		/* fall-through */
>   	case OCAPI_SLOT_LINK_WAIT:
>   		reg = get_odl_status(chip_id, dev->index);
> -		if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) == 0x7) {
> +		if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) ==
> +			OCAPI_LINK_STATE_TRAINED) {
>   			OCAPIINF(dev, "link trained in %lld ms\n",
>   				OCAPI_LINK_TRAINING_TIMEOUT - slot->retries);
>   			pci_slot_set_state(slot, OCAPI_SLOT_LINK_TRAINED);
>

Patch

diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 24adf5e1..a0fdf22b 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -80,6 +80,7 @@ 
 
 #define OCAPI_LINK_TRAINING_RETRIES	5
 #define OCAPI_LINK_TRAINING_TIMEOUT	3000 /* ms */
+#define OCAPI_LINK_STATE_TRAINED        0x7
 
 enum npu2_link_training_state {
 	NPU2_TRAIN_DEFAULT, /* fully train the link */
@@ -1032,10 +1033,17 @@  static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
 {
 	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
 	uint64_t reg;
-	int64_t link_width, rc = OPAL_SUCCESS;
+	int64_t link_width, training_status, rc = OPAL_SUCCESS;
 
 	reg = get_odl_status(dev->npu->chip_id, dev->index);
 	link_width = GETFIELD(OB_ODL_STATUS_TRAINED_MODE, reg);
+	training_status = GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg);
+
+	if (training_status != OCAPI_LINK_STATE_TRAINED) {
+		*val = OPAL_SHPC_LINK_DOWN;
+		return OPAL_SUCCESS;
+	}
+
 	switch (link_width) {
 	case 0b0001:
 		*val = OPAL_SHPC_LINK_UP_x4;
@@ -1086,7 +1094,8 @@  static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
 		/* fall-through */
 	case OCAPI_SLOT_LINK_WAIT:
 		reg = get_odl_status(chip_id, dev->index);
-		if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) == 0x7) {
+		if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) ==
+			OCAPI_LINK_STATE_TRAINED) {
 			OCAPIINF(dev, "link trained in %lld ms\n",
 				OCAPI_LINK_TRAINING_TIMEOUT - slot->retries);
 			pci_slot_set_state(slot, OCAPI_SLOT_LINK_TRAINED);