diff mbox series

[09/13] crypto: qat - limit heartbeat notifications

Message ID 20240307220551.3529171-10-thibault.ferrante@canonical.com
State New
Headers show
Series crypto: qat - improve recovery flows | expand

Commit Message

Thibault Ferrante March 7, 2024, 10:05 p.m. UTC
From: Furong Zhou <furong.zhou@intel.com>

BugLink: https://bugs.launchpad.net/bugs/2056354

When the driver detects an heartbeat failure, it starts the recovery
flow. Set a limit so that the number of events is limited in case the
heartbeat status is read too frequently.

Signed-off-by: Furong Zhou <furong.zhou@intel.com>
Reviewed-by: Ahsan Atta <ahsan.atta@intel.com>
Reviewed-by: Markas Rapoportas <markas.rapoportas@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Mun Chun Yep <mun.chun.yep@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
(cherry picked from commit 750fa7c20e60926431ec50d63899771ffcd9fd5c linux-next)
Signed-off-by: Thibault Ferrante <thibault.ferrante@canonical.com>
---
 .../crypto/intel/qat/qat_common/adf_heartbeat.c | 17 ++++++++++++++---
 .../crypto/intel/qat/qat_common/adf_heartbeat.h |  3 +++
 2 files changed, 17 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
index fe8428d4ff39..b19aa1ef8eee 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
@@ -205,6 +205,19 @@  static int adf_hb_get_status(struct adf_accel_dev *accel_dev)
 	return ret;
 }
 
+static void adf_heartbeat_reset(struct adf_accel_dev *accel_dev)
+{
+	u64 curr_time = adf_clock_get_current_time();
+	u64 time_since_reset = curr_time - accel_dev->heartbeat->last_hb_reset_time;
+
+	if (time_since_reset < ADF_CFG_HB_RESET_MS)
+		return;
+
+	accel_dev->heartbeat->last_hb_reset_time = curr_time;
+	if (adf_notify_fatal_error(accel_dev))
+		dev_err(&GET_DEV(accel_dev), "Failed to notify fatal error\n");
+}
+
 void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
 			  enum adf_device_heartbeat_status *hb_status)
 {
@@ -229,9 +242,7 @@  void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
 			"Heartbeat ERROR: QAT is not responding.\n");
 		*hb_status = HB_DEV_UNRESPONSIVE;
 		hb->hb_failed_counter++;
-		if (adf_notify_fatal_error(accel_dev))
-			dev_err(&GET_DEV(accel_dev),
-				"Failed to notify fatal error\n");
+		adf_heartbeat_reset(accel_dev);
 		return;
 	}
 
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
index 24c3f4f24c86..16fdfb48b196 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
@@ -13,6 +13,8 @@  struct dentry;
 #define ADF_CFG_HB_TIMER_DEFAULT_MS 500
 #define ADF_CFG_HB_COUNT_THRESHOLD 3
 
+#define ADF_CFG_HB_RESET_MS 5000
+
 enum adf_device_heartbeat_status {
 	HB_DEV_UNRESPONSIVE = 0,
 	HB_DEV_ALIVE,
@@ -30,6 +32,7 @@  struct adf_heartbeat {
 	unsigned int hb_failed_counter;
 	unsigned int hb_timer;
 	u64 last_hb_check_time;
+	u64 last_hb_reset_time;
 	bool ctrs_cnt_checked;
 	struct hb_dma_addr {
 		dma_addr_t phy_addr;