diff mbox series

[v2] core/lock: Add debug options to store backtrace of where lock was taken

Message ID 20190418052106.32022-1-andrew.donnellan@au1.ibm.com
State Accepted
Headers show
Series [v2] core/lock: Add debug options to store backtrace of where lock was taken | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch master (d318cdb3863fcf92288528bfed3b6e435cf6f0ef)
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot success Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco success Signed-off-by present

Commit Message

Andrew Donnellan April 18, 2019, 5:21 a.m. UTC
Contrary to popular belief, skiboot developers are imperfect and
occasionally write locking bugs. When we exit skiboot, we check if we're
still holding any locks, and if so, we print an error with a list of the
locks currently held and the locations where they were taken.

However, this only tells us the location where lock() was called, which may
not be enough to work out what's going on. To give us more to go on with,
we can store backtrace data in the lock and print that out when we
unexpectedly still hold locks.

Because the backtrace data is rather big, we only enable this if
DEBUG_LOCKS_BACKTRACE is defined, which in turn is switched on when
DEBUG=1.

(We disable DEBUG_LOCKS_BACKTRACE in some of the memory allocation tests
because the locks used by the memory allocator take up too much room in the
fake skiboot heap.)

Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

---

RFC->v1:
- Switch this on when DEBUG=1 (Stewart)
- Rebase on top of some backtrace API changes

v1->v2:
- Disable lock backtraces in the mem_region and mem_range_is_reserved
tests. This is because region allocation requires space in the fake skiboot
heap, including a lock, and the backtrace records make locks a lot bigger.
Maybe coupling DEBUG=1 to DEBUG_LOCKS_BACKTRACE isn't that great an idea?
---
 core/lock.c                           | 19 +++++++++++++++++--
 core/test/run-mem_range_is_reserved.c |  3 +++
 core/test/run-mem_region.c            |  3 +++
 include/config.h                      |  7 +++++++
 include/lock.h                        | 11 +++++++++++
 5 files changed, 41 insertions(+), 2 deletions(-)

Comments

Stewart Smith May 21, 2019, 3:07 a.m. UTC | #1
Andrew Donnellan <andrew.donnellan@au1.ibm.com> writes:
> Contrary to popular belief, skiboot developers are imperfect and
> occasionally write locking bugs. When we exit skiboot, we check if we're
> still holding any locks, and if so, we print an error with a list of the
> locks currently held and the locations where they were taken.
>
> However, this only tells us the location where lock() was called, which may
> not be enough to work out what's going on. To give us more to go on with,
> we can store backtrace data in the lock and print that out when we
> unexpectedly still hold locks.
>
> Because the backtrace data is rather big, we only enable this if
> DEBUG_LOCKS_BACKTRACE is defined, which in turn is switched on when
> DEBUG=1.
>
> (We disable DEBUG_LOCKS_BACKTRACE in some of the memory allocation tests
> because the locks used by the memory allocator take up too much room in the
> fake skiboot heap.)
>
> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
>
> ---
>
> RFC->v1:
> - Switch this on when DEBUG=1 (Stewart)
> - Rebase on top of some backtrace API changes
>
> v1->v2:
> - Disable lock backtraces in the mem_region and mem_range_is_reserved
> tests. This is because region allocation requires space in the fake skiboot
> heap, including a lock, and the backtrace records make locks a lot bigger.
> Maybe coupling DEBUG=1 to DEBUG_LOCKS_BACKTRACE isn't that great an idea?
> ---
>  core/lock.c                           | 19 +++++++++++++++++--
>  core/test/run-mem_range_is_reserved.c |  3 +++
>  core/test/run-mem_region.c            |  3 +++
>  include/config.h                      |  7 +++++++
>  include/lock.h                        | 11 +++++++++++
>  5 files changed, 41 insertions(+), 2 deletions(-)

Past me would have loved this a few times. Merged to master as of 1706e0311c47b2bfda3db1ef6168628072e7878b
diff mbox series

Patch

diff --git a/core/lock.c b/core/lock.c
index fbf0b21ed5c6..6fbb5eb9bf5e 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -208,6 +208,12 @@  bool try_lock_caller(struct lock *l, const char *owner)
 		cpu->con_suspend++;
 	if (__try_lock(cpu, l)) {
 		l->owner = owner;
+
+#ifdef DEBUG_LOCKS_BACKTRACE
+		backtrace_create(l->bt_buf, LOCKS_BACKTRACE_MAX_ENTS,
+				 &l->bt_metadata);
+#endif
+
 		list_add(&cpu->locks_held, &l->list);
 		return true;
 	}
@@ -312,8 +318,12 @@  void dump_locks_list(void)
 	struct lock *l;
 
 	prlog(PR_ERR, "Locks held:\n");
-	list_for_each(&this_cpu()->locks_held, l, list)
+	list_for_each(&this_cpu()->locks_held, l, list) {
 		prlog(PR_ERR, "  %s\n", l->owner);
+#ifdef DEBUG_LOCKS_BACKTRACE
+		backtrace_print(l->bt_buf, &l->bt_metadata, NULL, NULL, true);
+#endif
+	}
 }
 
 void drop_my_locks(bool warn)
@@ -322,8 +332,13 @@  void drop_my_locks(bool warn)
 
 	disable_fast_reboot("Lock corruption");
 	while((l = list_top(&this_cpu()->locks_held, struct lock, list)) != NULL) {
-		if (warn)
+		if (warn) {
 			prlog(PR_ERR, "  %s\n", l->owner);
+#ifdef DEBUG_LOCKS_BACKTRACE
+			backtrace_print(l->bt_buf, &l->bt_metadata, NULL, NULL,
+					true);
+#endif
+		}
 		unlock(l);
 	}
 }
diff --git a/core/test/run-mem_range_is_reserved.c b/core/test/run-mem_range_is_reserved.c
index 15dc6cbc42eb..8d70a2404346 100644
--- a/core/test/run-mem_range_is_reserved.c
+++ b/core/test/run-mem_range_is_reserved.c
@@ -16,6 +16,9 @@ 
 
 #include <config.h>
 
+/* The lock backtrace structures consume too much room on the skiboot heap */
+#undef DEBUG_LOCKS_BACKTRACE
+
 #define BITS_PER_LONG (sizeof(long) * 8)
 
 #include "dummy-cpu.h"
diff --git a/core/test/run-mem_region.c b/core/test/run-mem_region.c
index 4b3aff74e4a9..d48f5523d281 100644
--- a/core/test/run-mem_region.c
+++ b/core/test/run-mem_region.c
@@ -18,6 +18,9 @@ 
 #include <stdbool.h>
 #include <stdint.h>
 
+/* The lock backtrace structures consume too much room on the skiboot heap */
+#undef DEBUG_LOCKS_BACKTRACE
+
 #define BITS_PER_LONG (sizeof(long) * 8)
 
 #include "dummy-cpu.h"
diff --git a/include/config.h b/include/config.h
index 438f1b0f3373..7059e39ddb91 100644
--- a/include/config.h
+++ b/include/config.h
@@ -39,6 +39,13 @@ 
 /* Enable lock debugging */
 #define DEBUG_LOCKS		1
 
+/* Enable printing of backtraces when locks not released */
+#ifdef DEBUG
+#define DEBUG_LOCKS_BACKTRACE  1
+#else
+//#define DEBUG_LOCKS_BACKTRACE	1
+#endif
+
 /* Enable lock dependency checker */
 #define DEADLOCK_CHECKER	1
 
diff --git a/include/lock.h b/include/lock.h
index b18757394dbb..e08ec08fcdfb 100644
--- a/include/lock.h
+++ b/include/lock.h
@@ -23,6 +23,12 @@ 
 #include <ccan/list/list.h>
 #include <ccan/str/str.h>
 
+#ifdef DEBUG_LOCKS_BACKTRACE
+#include <stack.h>
+
+#define LOCKS_BACKTRACE_MAX_ENTS	60
+#endif
+
 struct lock {
 	/* Lock value has bit 63 as lock bit and the PIR of the owner
 	 * in the top 32-bit
@@ -38,6 +44,11 @@  struct lock {
 	/* file/line of lock owner */
 	const char *owner;
 
+#ifdef DEBUG_LOCKS_BACKTRACE
+	struct bt_entry bt_buf[LOCKS_BACKTRACE_MAX_ENTS];
+	struct bt_metadata bt_metadata;
+#endif
+
 	/* linkage in per-cpu list of owned locks */
 	struct list_node list;
 };