core/lock: Add deadlock detection

Message ID 20170623043506.9653-1-matthew.brown.dev@gmail.com
State New
Headers show

Commit Message

Matt Brown June 23, 2017, 4:35 a.m.
This adds simple deadlock detection.
The detection looks for circular dependencies in the lock requests.
It will abort and display a stack trace when a deadlock occurs.
The detection will reduce performance so only use for debugging.
To use, enable DEBUG_DEADLOCKS in include/config.h (disabled by default).

Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
 core/lock.c      | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/config.h |  3 ++
 include/lock.h   |  5 ++++
 3 files changed, 90 insertions(+), 1 deletion(-)

Comments

Andrew Donnellan June 23, 2017, 6:43 a.m. | #1
On 23/06/17 14:35, Matt Brown wrote:
> This adds simple deadlock detection.
> The detection looks for circular dependencies in the lock requests.
> It will abort and display a stack trace when a deadlock occurs.
> The detection will reduce performance so only use for debugging.
> To use, enable DEBUG_DEADLOCKS in include/config.h (disabled by default).
>
> Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
> ---
>  core/lock.c      | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/config.h |  3 ++
>  include/lock.h   |  5 ++++
>  3 files changed, 90 insertions(+), 1 deletion(-)
>

This looks like it might need a respin on top of master.

> diff --git a/core/lock.c b/core/lock.c
> index e82048b..0379fb0 100644
> --- a/core/lock.c
> +++ b/core/lock.c
> @@ -1,4 +1,4 @@
> -/* Copyright 2013-2014 IBM Corp.
> +/* Copyright 2013-2017 IBM Corp.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -66,6 +66,73 @@ static inline void lock_check(struct lock *l) { };
>  static inline void unlock_check(struct lock *l) { };
>  #endif /* DEBUG_LOCKS */
>
> +#ifdef DEBUG_DEADLOCKS
> +
> +#define MAX_THREADS 2048
> +static struct lock *lock_table[MAX_THREADS];
> +
> +/* Find circular dependencies in the lock requests. */
> +static bool check_deadlock(void)
> +{
> +	int lock_owner, i, start;
> +	struct lock *next;
> +
> +	start = this_cpu()->pir;
> +	next = lock_table[start];
> +	i = 0;
> +
> +	while (i < MAX_THREADS) {
> +
> +		if (!next)
> +			return false;
> +
> +		if (!(next->lock_val & 1) || next->in_con_path)
> +			return false;
> +
> +		lock_owner = next->lock_val >> 32;
> +
> +		if (lock_owner >= MAX_THREADS)
> +			return false;
> +
> +		if (lock_owner == start && i > 0)
> +			return true;
> +
> +		next = lock_table[lock_owner];
> +		i++;
> +	}
> +
> +	return false;
> +}
> +
> +void add_lock_request(struct lock *l)

Can this + remove_lock_request() be static and get rid of the definition 
in the headers too?

> +{
> +	if (this_cpu()->state == cpu_state_active ||
> +	    this_cpu()->state == cpu_state_os) {
> +
> +		if (this_cpu()->pir >= MAX_THREADS)
> +			return;
> +
> +		lock_table[this_cpu()->pir] = l;
> +
> +		if (check_deadlock() && check_deadlock()) {
> +#ifdef DEBUG_LOCKS
> +			lock_error(l, "Deadlock detected", 0);
> +#else
> +			prlog(PR_EMERG, "LOCK: Deadlock detected\n");
> +			backtrace();
> +			abort();
> +#endif /* DEBUG_LOCKS */
> +		}
> +	}
> +}
> +
> +void remove_lock_request(void)
> +{
> +	if (this_cpu()->pir < MAX_THREADS)
> +		lock_table[this_cpu()->pir] = NULL;
> +}
> +#endif /* DEBUG_DEADLOCKS */
> +
>  bool lock_held_by_me(struct lock *l)
>  {
>  	uint64_t pir64 = this_cpu()->pir;
> @@ -86,15 +153,29 @@ bool try_lock(struct lock *l)
>
>  void lock(struct lock *l)
>  {
> +
>  	if (bust_locks)
>  		return;
>
>  	lock_check(l);
> +
> +#ifdef DEBUG_DEADLOCKS
> +	if (try_lock(l))
> +		return;
> +
> +	add_lock_request(l);
> +#endif
> +
>  	for (;;) {
>  		if (try_lock(l))
>  			break;
>  		cpu_relax();
>  	}
> +
> +#ifdef DEBUG_DEADLOCKS
> +	remove_lock_request();
> +#endif
> +
>  }
>
>  void unlock(struct lock *l)
> diff --git a/include/config.h b/include/config.h
> index cd8a0a6..c2f80e9 100644
> --- a/include/config.h
> +++ b/include/config.h
> @@ -39,6 +39,9 @@
>  /* Enable lock debugging */
>  #define DEBUG_LOCKS		1
>
> +/* Enable deadlock detection */
> +//#define DEBUG_DEADLOCKS	1
> +
>  /* Enable malloc debugging */
>  #define DEBUG_MALLOC		1
>
> diff --git a/include/lock.h b/include/lock.h
> index 0ac943d..27bedc7 100644
> --- a/include/lock.h
> +++ b/include/lock.h
> @@ -81,4 +81,9 @@ extern bool lock_recursive(struct lock *l);
>  /* Called after per-cpu data structures are available */
>  extern void init_locks(void);
>
> +#ifdef DEBUG_DEADLOCKS
> +extern void add_lock_request(struct lock *l);
> +extern void remove_lock_request(void);
> +#endif /* DEBUG_DEADLOCKS*/
> +
>  #endif /* __LOCK_H */
>
Oliver June 23, 2017, 7:14 a.m. | #2
Hey nick, you're a concurrency wizard can you review this?


On Fri, Jun 23, 2017 at 2:35 PM, Matt Brown <matthew.brown.dev@gmail.com> wrote:
> This adds simple deadlock detection.
> The detection looks for circular dependencies in the lock requests.
> It will abort and display a stack trace when a deadlock occurs.
> The detection will reduce performance so only use for debugging.
> To use, enable DEBUG_DEADLOCKS in include/config.h (disabled by default).

It might be useful if we can get stack traces for every thread
involved in the deadlock. You might be able
to use the NMI injection that nick was working on to wake up
individual threads to get them to print the stack
trace.

>
> Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
> ---
>  core/lock.c      | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/config.h |  3 ++
>  include/lock.h   |  5 ++++
>  3 files changed, 90 insertions(+), 1 deletion(-)
>
> diff --git a/core/lock.c b/core/lock.c
> index e82048b..0379fb0 100644
> --- a/core/lock.c
> +++ b/core/lock.c
> @@ -1,4 +1,4 @@
> -/* Copyright 2013-2014 IBM Corp.
> +/* Copyright 2013-2017 IBM Corp.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -66,6 +66,73 @@ static inline void lock_check(struct lock *l) { };
>  static inline void unlock_check(struct lock *l) { };
>  #endif /* DEBUG_LOCKS */
>
> +#ifdef DEBUG_DEADLOCKS
> +
> +#define MAX_THREADS 2048
> +static struct lock *lock_table[MAX_THREADS];

Rename this to target_lock_table or something. You could probably
remove the array and track this information as a field in the
cpu_thread structure too since skiboot is effectively single thread
until we call cpu_bringup().

> +
> +/* Find circular dependencies in the lock requests. */
> +static bool check_deadlock(void)

I'd rather you passed the pir/lock that you want to check as function
arguments rather than wacking globals at random, but that's just me.

> +{
> +       int lock_owner, i, start;
> +       struct lock *next;
> +
> +       start = this_cpu()->pir;
> +       next = lock_table[start];
> +       i = 0;

I think you can remove the counter. It's being used for is to make
sure the current lock isn't the one we started from and you can do
that check in a more straight forward manner.

> +
> +       while (i < MAX_THREADS) {
> +
> +               if (!next)
> +                       return false;
> +

> +               if (!(next->lock_val & 1) || next->in_con_path)
> +                       return false;
Why is being in the console path important?

> +
> +               lock_owner = next->lock_val >> 32;


> +               if (lock_owner >= MAX_THREADS)
> +                       return false;

This should be impossible, maybe make it an assert() because if it
happens something is very broken.

> +
> +               if (lock_owner == start && i > 0)
> +                       return true;
> +
> +               next = lock_table[lock_owner];
> +               i++;
> +       }
> +
> +       return false;
> +}
> +

> +void add_lock_request(struct lock *l)
> +{
> +       if (this_cpu()->state == cpu_state_active ||
> +           this_cpu()->state == cpu_state_os) {

If we're taking locks inside skiboot is the thread ever going to be in
a different state?

> +
> +               if (this_cpu()->pir >= MAX_THREADS)
> +                       return;
Again, this should never happen so either make it a assert() or remove
the check.

> +
> +               lock_table[this_cpu()->pir] = l;
> +
> +               if (check_deadlock() && check_deadlock()) {

Why twice?

> +#ifdef DEBUG_LOCKS
> +                       lock_error(l, "Deadlock detected", 0);
> +#else
> +                       prlog(PR_EMERG, "LOCK: Deadlock detected\n");
> +                       backtrace();
> +                       abort();
> +#endif /* DEBUG_LOCKS */
> +               }
> +       }
> +}
> +
> +void remove_lock_request(void)
> +{
> +       if (this_cpu()->pir < MAX_THREADS)
> +               lock_table[this_cpu()->pir] = NULL;
> +}
> +#endif /* DEBUG_DEADLOCKS */
> +
>  bool lock_held_by_me(struct lock *l)
>  {
>         uint64_t pir64 = this_cpu()->pir;
> @@ -86,15 +153,29 @@ bool try_lock(struct lock *l)
>
>  void lock(struct lock *l)
>  {
> +
>         if (bust_locks)
>                 return;
>
>         lock_check(l);
> +
> +#ifdef DEBUG_DEADLOCKS
> +       if (try_lock(l))
> +               return;
> +
> +       add_lock_request(l);
> +#endif
> +
>         for (;;) {
>                 if (try_lock(l))
>                         break;
>                 cpu_relax();
>         }
> +
> +#ifdef DEBUG_DEADLOCKS
> +       remove_lock_request();
> +#endif
> +
>  }
>
>  void unlock(struct lock *l)
> diff --git a/include/config.h b/include/config.h
> index cd8a0a6..c2f80e9 100644
> --- a/include/config.h
> +++ b/include/config.h
> @@ -39,6 +39,9 @@
>  /* Enable lock debugging */
>  #define DEBUG_LOCKS            1
>
> +/* Enable deadlock detection */
> +//#define DEBUG_DEADLOCKS      1
> +
>  /* Enable malloc debugging */
>  #define DEBUG_MALLOC           1
>
> diff --git a/include/lock.h b/include/lock.h
> index 0ac943d..27bedc7 100644
> --- a/include/lock.h
> +++ b/include/lock.h
> @@ -81,4 +81,9 @@ extern bool lock_recursive(struct lock *l);
>  /* Called after per-cpu data structures are available */
>  extern void init_locks(void);
>
> +#ifdef DEBUG_DEADLOCKS
> +extern void add_lock_request(struct lock *l);
> +extern void remove_lock_request(void);
> +#endif /* DEBUG_DEADLOCKS*/
> +
>  #endif /* __LOCK_H */
> --
> 2.9.3
>
> _______________________________________________
> Skiboot mailing list
> Skiboot@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot
Nicholas Piggin June 23, 2017, 8:24 a.m. | #3
On Fri, 23 Jun 2017 17:14:34 +1000
Oliver <oohall@gmail.com> wrote:

> Hey nick, you're a concurrency wizard can you review this?

I could try.

> On Fri, Jun 23, 2017 at 2:35 PM, Matt Brown <matthew.brown.dev@gmail.com> wrote:
> > This adds simple deadlock detection.
> > The detection looks for circular dependencies in the lock requests.
> > It will abort and display a stack trace when a deadlock occurs.
> > The detection will reduce performance so only use for debugging.
> > To use, enable DEBUG_DEADLOCKS in include/config.h (disabled by default).  
> 
> It might be useful if we can get stack traces for every thread
> involved in the deadlock. You might be able
> to use the NMI injection that nick was working on to wake up
> individual threads to get them to print the stack
> trace.

Would probably be a good idea. We'll have to somehow bribe or
Alistair to do the NMI injection firmware though :)

> > Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
> > ---
> >  core/lock.c      | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> >  include/config.h |  3 ++
> >  include/lock.h   |  5 ++++
> >  3 files changed, 90 insertions(+), 1 deletion(-)

Hmm, okay so the algorithm is follows, if a CPU can't get a lock
immediately, it registers that it is waiting for the lock in
lock_table, and then checks if the lock owner of that lock is waiting
for another lock, etc. and if it reaches back to us it's a deadlock.

Seems like a reasonable algorithm.

I think the deadlock detection algorithm itself needs to be
protected from concurrency. You could have a lock owner release
its lock and later try to grab the one you hold while the check
is running, I think?

An issue with this type of detection is that it doesn't catch a lot
in testing. I mean only actual deadlocks. Probably better than
nothing, but what would be really cool is to dynamically build the
observed relationship between locks (aka lockdep aka witness).
Then you can complain if you have ever seen locks being taken in
the wrong order.

But I don't want to say "don't do this because we can spend a lot
more time and work to do something else" :) This could make a good
start for lock debugging. A basic timeout warning for acquiring a
lock wouldn't be a bad idea either (and may catch some different
bugs).

Thanks,
Nick
Alistair Popple June 26, 2017, 7:22 a.m. | #4
On Fri, 23 Jun 2017 06:24:12 PM Nicholas Piggin wrote:
> On Fri, 23 Jun 2017 17:14:34 +1000
> Oliver <oohall@gmail.com> wrote:
> 
> > Hey nick, you're a concurrency wizard can you review this?
> 
> I could try.
> 
> > On Fri, Jun 23, 2017 at 2:35 PM, Matt Brown <matthew.brown.dev@gmail.com> wrote:
> > > This adds simple deadlock detection.
> > > The detection looks for circular dependencies in the lock requests.
> > > It will abort and display a stack trace when a deadlock occurs.
> > > The detection will reduce performance so only use for debugging.
> > > To use, enable DEBUG_DEADLOCKS in include/config.h (disabled by default).  
> > 
> > It might be useful if we can get stack traces for every thread
> > involved in the deadlock. You might be able
> > to use the NMI injection that nick was working on to wake up
> > individual threads to get them to print the stack
> > trace.
> 
> Would probably be a good idea. We'll have to somehow bribe or
> Alistair to do the NMI injection firmware though :)

Bribes are welcome but not necessary :-)

It's on my list of things to come back to.

- Alistair

> > > Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
> > > ---
> > >  core/lock.c      | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> > >  include/config.h |  3 ++
> > >  include/lock.h   |  5 ++++
> > >  3 files changed, 90 insertions(+), 1 deletion(-)
> 
> Hmm, okay so the algorithm is follows, if a CPU can't get a lock
> immediately, it registers that it is waiting for the lock in
> lock_table, and then checks if the lock owner of that lock is waiting
> for another lock, etc. and if it reaches back to us it's a deadlock.
> 
> Seems like a reasonable algorithm.
> 
> I think the deadlock detection algorithm itself needs to be
> protected from concurrency. You could have a lock owner release
> its lock and later try to grab the one you hold while the check
> is running, I think?
> 
> An issue with this type of detection is that it doesn't catch a lot
> in testing. I mean only actual deadlocks. Probably better than
> nothing, but what would be really cool is to dynamically build the
> observed relationship between locks (aka lockdep aka witness).
> Then you can complain if you have ever seen locks being taken in
> the wrong order.
> 
> But I don't want to say "don't do this because we can spend a lot
> more time and work to do something else" :) This could make a good
> start for lock debugging. A basic timeout warning for acquiring a
> lock wouldn't be a bad idea either (and may catch some different
> bugs).
> 
> Thanks,
> Nick
>
Matt Brown July 7, 2017, 3:01 a.m. | #5
On Fri, Jun 23, 2017 at 6:24 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
> On Fri, 23 Jun 2017 17:14:34 +1000
> Oliver <oohall@gmail.com> wrote:
>
>> Hey nick, you're a concurrency wizard can you review this?
>
> I could try.
>
>> On Fri, Jun 23, 2017 at 2:35 PM, Matt Brown <matthew.brown.dev@gmail.com> wrote:
>> > This adds simple deadlock detection.
>> > The detection looks for circular dependencies in the lock requests.
>> > It will abort and display a stack trace when a deadlock occurs.
>> > The detection will reduce performance so only use for debugging.
>> > To use, enable DEBUG_DEADLOCKS in include/config.h (disabled by default).
>>
>> It might be useful if we can get stack traces for every thread
>> involved in the deadlock. You might be able
>> to use the NMI injection that nick was working on to wake up
>> individual threads to get them to print the stack
>> trace.
>
> Would probably be a good idea. We'll have to somehow bribe or
> Alistair to do the NMI injection firmware though :)
>
>> > Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
>> > ---
>> >  core/lock.c      | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> >  include/config.h |  3 ++
>> >  include/lock.h   |  5 ++++
>> >  3 files changed, 90 insertions(+), 1 deletion(-)
>
> Hmm, okay so the algorithm is follows, if a CPU can't get a lock
> immediately, it registers that it is waiting for the lock in
> lock_table, and then checks if the lock owner of that lock is waiting
> for another lock, etc. and if it reaches back to us it's a deadlock.
>
> Seems like a reasonable algorithm.
>
> I think the deadlock detection algorithm itself needs to be
> protected from concurrency. You could have a lock owner release
> its lock and later try to grab the one you hold while the check
> is running, I think?
>
Yeah you're right. I just submitted a v3 which stops other cpu's from
getting locks while you are checking for the deadlock. (forgot to add
this to v2)
There isn't any significant overhead from the mutual exclusion, which
was my main concern.


> An issue with this type of detection is that it doesn't catch a lot
> in testing. I mean only actual deadlocks. Probably better than
> nothing, but what would be really cool is to dynamically build the
> observed relationship between locks (aka lockdep aka witness).
> Then you can complain if you have ever seen locks being taken in
> the wrong order.
>
> But I don't want to say "don't do this because we can spend a lot
> more time and work to do something else" :) This could make a good
> start for lock debugging. A basic timeout warning for acquiring a
> lock wouldn't be a bad idea either (and may catch some different
> bugs).
>
Also added a simple patch to print a timeout warning and backtrace on
locks which wait for > 10 secs (pretty arbitrary value, but the xscom
and uart reads can take 6-8 seconds in my testing).

Thanks,
Matt

> Thanks,
> Nick

Patch

diff --git a/core/lock.c b/core/lock.c
index e82048b..0379fb0 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -1,4 +1,4 @@ 
-/* Copyright 2013-2014 IBM Corp.
+/* Copyright 2013-2017 IBM Corp.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -66,6 +66,73 @@  static inline void lock_check(struct lock *l) { };
 static inline void unlock_check(struct lock *l) { };
 #endif /* DEBUG_LOCKS */
 
+#ifdef DEBUG_DEADLOCKS
+
+#define MAX_THREADS 2048
+static struct lock *lock_table[MAX_THREADS];
+
+/* Find circular dependencies in the lock requests. */
+static bool check_deadlock(void)
+{
+	int lock_owner, i, start;
+	struct lock *next;
+
+	start = this_cpu()->pir;
+	next = lock_table[start];
+	i = 0;
+
+	while (i < MAX_THREADS) {
+
+		if (!next)
+			return false;
+
+		if (!(next->lock_val & 1) || next->in_con_path)
+			return false;
+
+		lock_owner = next->lock_val >> 32;
+
+		if (lock_owner >= MAX_THREADS)
+			return false;
+
+		if (lock_owner == start && i > 0)
+			return true;
+
+		next = lock_table[lock_owner];
+		i++;
+	}
+
+	return false;
+}
+
+void add_lock_request(struct lock *l)
+{
+	if (this_cpu()->state == cpu_state_active ||
+	    this_cpu()->state == cpu_state_os) {
+
+		if (this_cpu()->pir >= MAX_THREADS)
+			return;
+
+		lock_table[this_cpu()->pir] = l;
+
+		if (check_deadlock() && check_deadlock()) {
+#ifdef DEBUG_LOCKS
+			lock_error(l, "Deadlock detected", 0);
+#else
+			prlog(PR_EMERG, "LOCK: Deadlock detected\n");
+			backtrace();
+			abort();
+#endif /* DEBUG_LOCKS */
+		}
+	}
+}
+
+void remove_lock_request(void)
+{
+	if (this_cpu()->pir < MAX_THREADS)
+		lock_table[this_cpu()->pir] = NULL;
+}
+#endif /* DEBUG_DEADLOCKS */
+
 bool lock_held_by_me(struct lock *l)
 {
 	uint64_t pir64 = this_cpu()->pir;
@@ -86,15 +153,29 @@  bool try_lock(struct lock *l)
 
 void lock(struct lock *l)
 {
+
 	if (bust_locks)
 		return;
 
 	lock_check(l);
+
+#ifdef DEBUG_DEADLOCKS
+	if (try_lock(l))
+		return;
+
+	add_lock_request(l);
+#endif
+
 	for (;;) {
 		if (try_lock(l))
 			break;
 		cpu_relax();
 	}
+
+#ifdef DEBUG_DEADLOCKS
+	remove_lock_request();
+#endif
+
 }
 
 void unlock(struct lock *l)
diff --git a/include/config.h b/include/config.h
index cd8a0a6..c2f80e9 100644
--- a/include/config.h
+++ b/include/config.h
@@ -39,6 +39,9 @@ 
 /* Enable lock debugging */
 #define DEBUG_LOCKS		1
 
+/* Enable deadlock detection */
+//#define DEBUG_DEADLOCKS	1
+
 /* Enable malloc debugging */
 #define DEBUG_MALLOC		1
 
diff --git a/include/lock.h b/include/lock.h
index 0ac943d..27bedc7 100644
--- a/include/lock.h
+++ b/include/lock.h
@@ -81,4 +81,9 @@  extern bool lock_recursive(struct lock *l);
 /* Called after per-cpu data structures are available */
 extern void init_locks(void);
 
+#ifdef DEBUG_DEADLOCKS
+extern void add_lock_request(struct lock *l);
+extern void remove_lock_request(void);
+#endif /* DEBUG_DEADLOCKS*/
+
 #endif /* __LOCK_H */