diff mbox series

[03/13] crypto: qat - add fatal error notify method

Message ID 20240307220551.3529171-4-thibault.ferrante@canonical.com
State New
Headers show
Series crypto: qat - improve recovery flows | expand

Commit Message

Thibault Ferrante March 7, 2024, 10:05 p.m. UTC
From: Furong Zhou <furong.zhou@intel.com>

Buglink: https://bugs.launchpad.net/bugs/2056354

Add error notify method to report a fatal error event to all the
subsystems registered. In addition expose an API,
adf_notify_fatal_error(), that allows to trigger a fatal error
notification asynchronously in the context of a workqueue.

This will be invoked when a fatal error is detected by the ISR or
through Heartbeat.

Signed-off-by: Furong Zhou <furong.zhou@intel.com>
Reviewed-by: Ahsan Atta <ahsan.atta@intel.com>
Reviewed-by: Markas Rapoportas <markas.rapoportas@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Mun Chun Yep <mun.chun.yep@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
(cherry picked from commit ae508d7afb753f7576c435226e32b9535b7f8b10 linux-next)
Signed-off-by: Thibault Ferrante <thibault.ferrante@canonical.com>
---
 drivers/crypto/intel/qat/qat_common/adf_aer.c | 30 +++++++++++++++++++
 .../intel/qat/qat_common/adf_common_drv.h     |  3 ++
 .../crypto/intel/qat/qat_common/adf_init.c    | 12 ++++++++
 3 files changed, 45 insertions(+)
diff mbox series

Patch

diff --git a/drivers/crypto/intel/qat/qat_common/adf_aer.c b/drivers/crypto/intel/qat/qat_common/adf_aer.c
index a39e70bd4b21..22a43b4b8315 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_aer.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_aer.c
@@ -8,6 +8,11 @@ 
 #include "adf_accel_devices.h"
 #include "adf_common_drv.h"
 
+struct adf_fatal_error_data {
+	struct adf_accel_dev *accel_dev;
+	struct work_struct work;
+};
+
 static struct workqueue_struct *device_reset_wq;
 
 static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
@@ -171,6 +176,31 @@  const struct pci_error_handlers adf_err_handler = {
 };
 EXPORT_SYMBOL_GPL(adf_err_handler);
 
+static void adf_notify_fatal_error_worker(struct work_struct *work)
+{
+	struct adf_fatal_error_data *wq_data =
+			container_of(work, struct adf_fatal_error_data, work);
+	struct adf_accel_dev *accel_dev = wq_data->accel_dev;
+
+	adf_error_notifier(accel_dev);
+	kfree(wq_data);
+}
+
+int adf_notify_fatal_error(struct adf_accel_dev *accel_dev)
+{
+	struct adf_fatal_error_data *wq_data;
+
+	wq_data = kzalloc(sizeof(*wq_data), GFP_ATOMIC);
+	if (!wq_data)
+		return -ENOMEM;
+
+	wq_data->accel_dev = accel_dev;
+	INIT_WORK(&wq_data->work, adf_notify_fatal_error_worker);
+	adf_misc_wq_queue_work(&wq_data->work);
+
+	return 0;
+}
+
 int adf_init_aer(void)
 {
 	device_reset_wq = alloc_workqueue("qat_device_reset_wq",
diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
index 0baae42deb3a..8c062d5a8db2 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
@@ -40,6 +40,7 @@  enum adf_event {
 	ADF_EVENT_SHUTDOWN,
 	ADF_EVENT_RESTARTING,
 	ADF_EVENT_RESTARTED,
+	ADF_EVENT_FATAL_ERROR,
 };
 
 struct service_hndl {
@@ -60,6 +61,8 @@  int adf_dev_restart(struct adf_accel_dev *accel_dev);
 
 void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data);
 void adf_clean_vf_map(bool);
+int adf_notify_fatal_error(struct adf_accel_dev *accel_dev);
+void adf_error_notifier(struct adf_accel_dev *accel_dev);
 int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev,
 		       struct adf_accel_dev *pf);
 void adf_devmgr_rm_dev(struct adf_accel_dev *accel_dev,
diff --git a/drivers/crypto/intel/qat/qat_common/adf_init.c b/drivers/crypto/intel/qat/qat_common/adf_init.c
index f43ae9111553..74f0818c0703 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_init.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_init.c
@@ -433,6 +433,18 @@  int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev)
 	return 0;
 }
 
+void adf_error_notifier(struct adf_accel_dev *accel_dev)
+{
+	struct service_hndl *service;
+
+	list_for_each_entry(service, &service_table, list) {
+		if (service->event_hld(accel_dev, ADF_EVENT_FATAL_ERROR))
+			dev_err(&GET_DEV(accel_dev),
+				"Failed to send error event to %s.\n",
+				service->name);
+	}
+}
+
 static int adf_dev_shutdown_cache_cfg(struct adf_accel_dev *accel_dev)
 {
 	char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};