[v3,1/2] core/lock: Add deadlock detection

Message ID 20170707015244.9260-1-matthew.brown.dev@gmail.com
State New
Headers show

Commit Message

Matt Brown July 7, 2017, 1:52 a.m.
This adds simple deadlock detection. The detection looks for circular
dependencies in the lock requests. It will abort and display a stack trace
when a deadlock occurs.
The detection is enabled by DEBUG_LOCKS (enabled by default).
While the detection may have a slight performance overhead, as there are
not a huge number of locks in skiboot this overhead isn't significant.

Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
Changelog:
v3: 
	- added mutual exclusion for deadlock check
	- updated commit message
v2:
	- changed global lock table to be field in the respective 
	  cpu_thread struct
	- tidied the lock function
	
---
 core/cpu.c    |  1 +
 core/lock.c   | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/cpu.h |  5 ++++
 3 files changed, 86 insertions(+)

Patch

diff --git a/core/cpu.c b/core/cpu.c
index 75c7008..8498011 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -855,6 +855,7 @@  void init_all_cpus(void)
 		t->node = cpu;
 		t->chip_id = chip_id;
 		t->icp_regs = NULL; /* Will be set later */
+		t->requested_lock = NULL;
 		t->core_hmi_state = 0;
 		t->core_hmi_state_ptr = &t->core_hmi_state;
 		t->thread_mask = 1;
diff --git a/core/lock.c b/core/lock.c
index 0868f2b..a9d1894 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -28,6 +28,7 @@ 
 bool bust_locks = true;
 
 #ifdef DEBUG_LOCKS
+static struct lock dl_lock = LOCK_UNLOCKED;
 
 static void lock_error(struct lock *l, const char *reason, uint16_t err)
 {
@@ -61,9 +62,81 @@  static void unlock_check(struct lock *l)
 		lock_error(l, "Releasing lock with 0 depth", 4);
 }
 
+/* Find circular dependencies in the lock requests. */
+static bool check_deadlock(void)
+{
+	uint32_t lock_owner, start, i;
+	struct cpu_thread *next_cpu;
+	struct lock *next;
+
+	next  = this_cpu()->requested_lock;
+	start = this_cpu()->pir;
+	i = 0;
+
+	while (i < cpu_max_pir) {
+
+		if (!next)
+			return false;
+
+		if (!(next->lock_val & 1) || next->in_con_path)
+			return false;
+
+		lock_owner = next->lock_val >> 32;
+
+		if (lock_owner == start)
+			return true;
+
+		next_cpu = find_cpu_by_pir(lock_owner);
+
+		if (!next_cpu)
+			return false;
+
+		next = next_cpu->requested_lock;
+		i++;
+	}
+
+	return false;
+}
+
+static void add_lock_request(struct lock *l)
+{
+	struct cpu_thread *curr = this_cpu();
+
+	if (curr->state != cpu_state_active &&
+	    curr->state != cpu_state_os)
+		return;
+
+	/*
+	 * For deadlock detection we must keep the lock states constant
+	 * while doing the deadlock check.
+	 */
+	for (;;) {
+		if (try_lock(&dl_lock))
+			break;
+		smt_lowest();
+		while (dl_lock.lock_val)
+			barrier();
+		smt_medium();
+	}
+
+	curr->requested_lock = l;
+
+	if (check_deadlock())
+		lock_error(l, "Deadlock detected", 0);
+
+	unlock(&dl_lock);
+}
+
+static void remove_lock_request(void)
+{
+	this_cpu()->requested_lock = NULL;
+}
+
 #else
 static inline void lock_check(struct lock *l) { };
 static inline void unlock_check(struct lock *l) { };
+static inline void add_lock_request(struct lock *l) { };
+static inline void remove_lock_request(void) { };
 #endif /* DEBUG_LOCKS */
 
 bool lock_held_by_me(struct lock *l)
@@ -90,6 +163,11 @@  void lock(struct lock *l)
 		return;
 
 	lock_check(l);
+
+	if (try_lock(l))
+		return;
+	add_lock_request(l);
+
 	for (;;) {
 		if (try_lock(l))
 			break;
@@ -98,6 +176,8 @@  void lock(struct lock *l)
 			barrier();
 		smt_medium();
 	}
+
+	remove_lock_request();
 }
 
 void unlock(struct lock *l)
diff --git a/include/cpu.h b/include/cpu.h
index fd3acf7..ab4e4fe 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -95,6 +95,11 @@  struct cpu_thread {
 
 	/* For use by XICS emulation on XIVE */
 	struct xive_cpu_state		*xstate;
+
+#ifdef DEBUG_LOCKS
+	/* The lock requested by this cpu, used for deadlock detection */
+	struct lock			*requested_lock;
+#endif
 };
 
 /* This global is set to 1 to allow secondaries to callin,