@@ -63,6 +63,11 @@
/* Max size of area it should be safe to request */
#define NFP_CPP_SAFE_AREA_SIZE SZ_2M
+/* NFP_MUTEX_WAIT_* are timeouts in seconds when waiting for a mutex */
+#define NFP_MUTEX_WAIT_FIRST_WARN 15
+#define NFP_MUTEX_WAIT_NEXT_WARN 5
+#define NFP_MUTEX_WAIT_ERROR 60
+
struct device;
struct nfp_cpp_area;
@@ -195,7 +195,8 @@ void nfp_cpp_mutex_free(struct nfp_cpp_mutex *mutex)
*/
int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex)
{
- unsigned long warn_at = jiffies + 15 * HZ;
+ unsigned long warn_at = jiffies + NFP_MUTEX_WAIT_FIRST_WARN * HZ;
+ unsigned long err_at = jiffies + NFP_MUTEX_WAIT_ERROR * HZ;
unsigned int timeout_ms = 1;
int err;
@@ -214,12 +215,16 @@ int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex)
return -ERESTARTSYS;
if (time_is_before_eq_jiffies(warn_at)) {
- warn_at = jiffies + 60 * HZ;
+ warn_at = jiffies + NFP_MUTEX_WAIT_NEXT_WARN * HZ;
nfp_warn(mutex->cpp,
"Warning: waiting for NFP mutex [depth:%hd target:%d addr:%llx key:%08x]\n",
mutex->depth,
mutex->target, mutex->address, mutex->key);
}
+ if (time_is_before_eq_jiffies(err_at)) {
+ nfp_err(mutex->cpp, "Error: mutex wait timed out\n");
+ return -EBUSY;
+ }
}
return err;
@@ -181,7 +181,8 @@ nfp_resource_try_acquire(struct nfp_cpp *cpp, struct nfp_resource *res,
struct nfp_resource *
nfp_resource_acquire(struct nfp_cpp *cpp, const char *name)
{
- unsigned long warn_at = jiffies + 15 * HZ;
+ unsigned long warn_at = jiffies + NFP_MUTEX_WAIT_FIRST_WARN * HZ;
+ unsigned long err_at = jiffies + NFP_MUTEX_WAIT_ERROR * HZ;
struct nfp_cpp_mutex *dev_mutex;
struct nfp_resource *res;
int err;
@@ -214,10 +215,15 @@ nfp_resource_acquire(struct nfp_cpp *cpp, const char *name)
}
if (time_is_before_eq_jiffies(warn_at)) {
- warn_at = jiffies + 60 * HZ;
+ warn_at = jiffies + NFP_MUTEX_WAIT_NEXT_WARN * HZ;
nfp_warn(cpp, "Warning: waiting for NFP resource %s\n",
name);
}
+ if (time_is_before_eq_jiffies(err_at)) {
+ nfp_err(cpp, "Error: resource %s timed out\n", name);
+ err = -EBUSY;
+ goto err_free;
+ }
}
nfp_cpp_mutex_free(dev_mutex);
There is currently no timeout to the resource and lock acquiring loops. We printed warnings and depended on user sending a signal to the waiting process to stop the waiting. This doesn't work very well when wait happens out of a work queue. The simplest example of that is PCI probe. When user loads the module and card is in a broken state modprobe will wait forever and signals sent to it will not actually reach the probing thread. Make sure all wait loops have a time out. Set the upper wait time to 60 seconds to stay on the safe side. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h | 5 +++++ drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c | 9 +++++++-- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c | 10 ++++++++-- 3 files changed, 20 insertions(+), 4 deletions(-)