[09/16] npu2-opencapi: Rework link training timeout
diff mbox series

Message ID 20190909123151.21944-10-fbarrat@linux.ibm.com
State New
Headers show
Series
  • opencapi: enable card reset and link retraining
Related show

Checks

Context Check Description
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco success Signed-off-by present
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot success Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/apply_patch success Successfully applied on branch master (470ffb5f29d741c3bed600f7bb7bf0cbb270e05a)

Commit Message

Frederic Barrat Sept. 9, 2019, 12:31 p.m. UTC
Opencapi link state should be polled for up to 3 seconds. Current code
assumes a tight retry loop during fundamental reset at boot, which is
not going to be true on link retraining. So update the timeout
detection code to use a timebase instead of a simple retry count which
could be way too long.

Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
---
 hw/npu2-opencapi.c | 9 +++++----
 include/npu2.h     | 2 ++
 2 files changed, 7 insertions(+), 4 deletions(-)

Patch
diff mbox series

diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 504c9208..f7be9f09 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -1127,13 +1127,13 @@  static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
 		reg = get_odl_status(chip_id, dev->brick_index);
 		if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) ==
 			OCAPI_LINK_STATE_TRAINED) {
-			OCAPIINF(dev, "link trained in %lld ms\n",
-				OCAPI_LINK_TRAINING_TIMEOUT - slot->retries);
+			OCAPIINF(dev, "link trained in %ld ms\n",
+				 tb_to_msecs(mftb() - dev->train_start));
 			check_trained_link(dev, reg);
 			pci_slot_set_state(slot, OCAPI_SLOT_LINK_TRAINED);
 			return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
 		}
-		if (slot->retries-- == 0)
+		if (tb_compare(mftb(), dev->train_timeout) == TB_AAFTERB)
 			return npu2_opencapi_retry_state(slot, reg);
 
 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
@@ -1239,7 +1239,8 @@  static int64_t npu2_opencapi_freset(struct pci_slot *slot)
 		/* Bump lanes - this improves training reliability */
 		npu2_opencapi_bump_ui_lane(dev);
 		start_training(chip_id, dev);
-		slot->retries = OCAPI_LINK_TRAINING_TIMEOUT;
+		dev->train_start = mftb();
+		dev->train_timeout = dev->train_start + msecs_to_tb(OCAPI_LINK_TRAINING_TIMEOUT);
 		pci_slot_set_state(slot, OCAPI_SLOT_LINK_START);
 		return slot->ops.poll_link(slot);
 
diff --git a/include/npu2.h b/include/npu2.h
index aac7e7a5..d2316dc1 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -147,6 +147,8 @@  struct npu2_dev {
 	uint64_t		linux_pe;
 	bool			train_need_fence;
 	bool			train_fenced;
+	unsigned long		train_start;
+	unsigned long		train_timeout;
 };
 
 struct npu2 {