[v3,2/2] core/lock: Add lock timeout warnings

Submitted by Matt Brown on July 7, 2017, 1:52 a.m.

Details

Message ID 20170707015244.9260-2-matthew.brown.dev@gmail.com
State New
Headers show

Commit Message

Matt Brown July 7, 2017, 1:52 a.m.
There are currently no timeout warnings for locks in skiboot. We assume
that the lock will eventually become free, which may not always be the
case.

This patch adds timeout warnings for locks. Any lock which spins for more
than 5 seconds will throw a warning and stacktrace for that thread. This is
useful for debugging siturations where a lock which hang, waiting for the
lock to be freed.

Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
 core/lock.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

Comments

Cyril Bur Aug. 10, 2017, 7:35 a.m.
On Fri, 2017-07-07 at 11:52 +1000, Matt Brown wrote:
> There are currently no timeout warnings for locks in skiboot. We assume
> that the lock will eventually become free, which may not always be the
> case.
> 
> This patch adds timeout warnings for locks. Any lock which spins for more
> than 5 seconds will throw a warning and stacktrace for that thread. This is
> useful for debugging siturations where a lock which hang, waiting for the
> lock to be freed.
> 
> Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>

Apart from the fact that 10 seconds (as per the #define later) vs ... 5
seconds (in the commit message)... eh

Reviewed-by: Cyril Bur <cyril.bur@au1.ibm.com>

> ---
>  core/lock.c | 28 ++++++++++++++++++++++++++++
>  1 file changed, 28 insertions(+)
> 
> diff --git a/core/lock.c b/core/lock.c
> index a9d1894..50aa26f 100644
> --- a/core/lock.c
> +++ b/core/lock.c
> @@ -20,6 +20,7 @@
>  #include <processor.h>
>  #include <cpu.h>
>  #include <console.h>
> +#include <timebase.h>
>  
>  /* Set to bust locks. Note, this is initialized to true because our
>   * lock debugging code is not going to work until we have the per
> @@ -132,11 +133,27 @@ static void remove_lock_request(void)
>  	this_cpu()->requested_lock = NULL;
>  }
>  
> +#define LOCK_TIMEOUT_MS 10000
> +static inline bool lock_timeout(unsigned long start)
> +{
> +	/* Print warning if lock has been spinning for more than TIMEOUT_MS */
> +	unsigned long wait = tb_to_msecs(mftb());
> +
> +	if (wait - start > LOCK_TIMEOUT_MS) {
> +		prlog(PR_WARNING, "WARNING: Lock has been "\
> +		      "spinning for %lums\n", wait - start);
> +		backtrace();
> +		return true;
> +	}
> +
> +	return false;
> +}
>  #else
>  static inline void lock_check(struct lock *l) { };
>  static inline void unlock_check(struct lock *l) { };
>  static inline void add_lock_request(struct lock *l) { };
>  static inline void remove_lock_request(void) { };
> +static inline bool lock_timeout(unsigned long s) { return false; }
>  #endif /* DEBUG_LOCKS */
>  
>  bool lock_held_by_me(struct lock *l)
> @@ -157,8 +174,12 @@ bool try_lock(struct lock *l)
>  	return false;
>  }
>  
> +
>  void lock(struct lock *l)
>  {
> +	bool timeout_warn = false;
> +	unsigned long start;
> +
>  	if (bust_locks)
>  		return;
>  
> @@ -168,6 +189,10 @@ void lock(struct lock *l)
>  		return;
>  	add_lock_request(l);
>  
> +#ifdef DEBUG_LOCKS
> +	start = tb_to_msecs(mftb());
> +#endif
> +
>  	for (;;) {
>  		if (try_lock(l))
>  			break;
> @@ -175,6 +200,9 @@ void lock(struct lock *l)
>  		while (l->lock_val)
>  			barrier();
>  		smt_medium();
> +
> +		if (!timeout_warn)
> +			timeout_warn = lock_timeout(start);
>  	}
>  
>  	remove_lock_request();

Patch hide | download patch | download mbox

diff --git a/core/lock.c b/core/lock.c
index a9d1894..50aa26f 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -20,6 +20,7 @@ 
 #include <processor.h>
 #include <cpu.h>
 #include <console.h>
+#include <timebase.h>
 
 /* Set to bust locks. Note, this is initialized to true because our
  * lock debugging code is not going to work until we have the per
@@ -132,11 +133,27 @@  static void remove_lock_request(void)
 	this_cpu()->requested_lock = NULL;
 }
 
+#define LOCK_TIMEOUT_MS 10000
+static inline bool lock_timeout(unsigned long start)
+{
+	/* Print warning if lock has been spinning for more than TIMEOUT_MS */
+	unsigned long wait = tb_to_msecs(mftb());
+
+	if (wait - start > LOCK_TIMEOUT_MS) {
+		prlog(PR_WARNING, "WARNING: Lock has been "\
+		      "spinning for %lums\n", wait - start);
+		backtrace();
+		return true;
+	}
+
+	return false;
+}
 #else
 static inline void lock_check(struct lock *l) { };
 static inline void unlock_check(struct lock *l) { };
 static inline void add_lock_request(struct lock *l) { };
 static inline void remove_lock_request(void) { };
+static inline bool lock_timeout(unsigned long s) { return false; }
 #endif /* DEBUG_LOCKS */
 
 bool lock_held_by_me(struct lock *l)
@@ -157,8 +174,12 @@  bool try_lock(struct lock *l)
 	return false;
 }
 
+
 void lock(struct lock *l)
 {
+	bool timeout_warn = false;
+	unsigned long start;
+
 	if (bust_locks)
 		return;
 
@@ -168,6 +189,10 @@  void lock(struct lock *l)
 		return;
 	add_lock_request(l);
 
+#ifdef DEBUG_LOCKS
+	start = tb_to_msecs(mftb());
+#endif
+
 	for (;;) {
 		if (try_lock(l))
 			break;
@@ -175,6 +200,9 @@  void lock(struct lock *l)
 		while (l->lock_val)
 			barrier();
 		smt_medium();
+
+		if (!timeout_warn)
+			timeout_warn = lock_timeout(start);
 	}
 
 	remove_lock_request();