diff mbox

[1/7] Add die_spin_lock_{irqsave,irqrestore}

Message ID 1424748634-9153-2-git-send-email-anton@samba.org (mailing list archive)
State Not Applicable
Headers show

Commit Message

Anton Blanchard Feb. 24, 2015, 3:30 a.m. UTC
Many architectures have their own oops locking code that allows
the lock to be taken recursively. Create a common version.

Avoid creating generic locking functions, so they can't be
abused in other parts of the kernel.

Signed-off-by: Anton Blanchard <anton@samba.org>
---
 include/linux/die_lock.h | 23 +++++++++++++++++++++++
 lib/Makefile             |  1 +
 lib/die_lock.c           | 43 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+)
 create mode 100644 include/linux/die_lock.h
 create mode 100644 lib/die_lock.c

Comments

Ingo Molnar Feb. 24, 2015, 6:41 a.m. UTC | #1
* Anton Blanchard <anton@samba.org> wrote:

> +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> +static int die_owner = -1;
> +static unsigned int die_nest_count;
> +
> +unsigned long __die_spin_lock_irqsave(void)
> +{
> +	unsigned long flags;
> +	int cpu;
> +
> +	/* racy, but better than risking deadlock. */
> +	raw_local_irq_save(flags);
> +
> +	cpu = smp_processor_id();
> +	if (!arch_spin_trylock(&die_lock)) {
> +		if (cpu != die_owner)
> +			arch_spin_lock(&die_lock);

So why not trylock and time out here after a few seconds, 
instead of indefinitely supressing some potentially vital 
output due to some other CPU crashing/locking with the lock 
held?

> +	}
> +	die_nest_count++;
> +	die_owner = cpu;
> +
> +	return flags;

I suspect this would work in most cases.

If we fix the deadlock potential, and get a true global 
ordering of various oopses/warnings as they triggered (or 
at least timestamping them), then I'm sold on this I guess, 
it will likely improve things.

Thanks,

	Ingo
Ingo Molnar Feb. 24, 2015, 7:27 a.m. UTC | #2
* Ingo Molnar <mingo@kernel.org> wrote:

> 
> * Anton Blanchard <anton@samba.org> wrote:
> 
> > +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> > +static int die_owner = -1;
> > +static unsigned int die_nest_count;
> > +
> > +unsigned long __die_spin_lock_irqsave(void)
> > +{
> > +	unsigned long flags;
> > +	int cpu;
> > +
> > +	/* racy, but better than risking deadlock. */
> > +	raw_local_irq_save(flags);
> > +
> > +	cpu = smp_processor_id();
> > +	if (!arch_spin_trylock(&die_lock)) {
> > +		if (cpu != die_owner)
> > +			arch_spin_lock(&die_lock);
> 
> So why not trylock and time out here after a few seconds, 
> instead of indefinitely supressing some potentially vital 
> output due to some other CPU crashing/locking with the lock 
> held?

[...]

> If we fix the deadlock potential, and get a true global 
> ordering of various oopses/warnings as they triggered (or 
> at least timestamping them), [...]

If we had a global 'trouble counter' we could use that to 
refine the spin-looping timeout: instead of using a pure 
timeout of a few seconds, we could say 'a timeout of a few 
seconds while the counter does not increase'.

I.e. only override the locking/ordering if the owner CPU 
does not seem to be able to make progress with printing the 
oops/warning.

Thanks,

	Ingo
David Laight Feb. 24, 2015, 9:56 a.m. UTC | #3
From: Ingo Molnar

...
> So why not trylock and time out here after a few seconds,

> instead of indefinitely supressing some potentially vital

> output due to some other CPU crashing/locking with the lock

> held?


I've used that for status requests that usually lock a table
to get a consistent view.
If trylock times out assume that the data is actually stable
and access it anyway. Remember the pid doing the access and
next time it tries to acquire the same lock do a trylock with
no timeout.
That way if (when) there is a locking fubar (or a driver oops
with a lock held) at least some of the relevant status commands
will work.

	David
diff mbox

Patch

diff --git a/include/linux/die_lock.h b/include/linux/die_lock.h
new file mode 100644
index 0000000..540d09d
--- /dev/null
+++ b/include/linux/die_lock.h
@@ -0,0 +1,23 @@ 
+#ifndef __LINUX_DIE_LOCK_H
+#define __LINUX_DIE_LOCK_H
+
+#include <linux/typecheck.h>
+
+/**
+ * die_spin_lock_irqsave - lock die spinlock
+ * @flags: interrupt state is saved here
+ *
+ * The die spinlock is used to serialise output during oopses, BUGs and
+ * WARNs. It can be taken recursively so that nested oopses will not
+ * lock up.
+ */
+unsigned long __die_spin_lock_irqsave(void);
+#define die_spin_lock_irqsave(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = __die_spin_lock_irqsave();	\
+	} while (0)
+
+void die_spin_unlock_irqrestore(unsigned long flags);
+
+#endif /* __LINUX_DIE_LOCK_H */
diff --git a/lib/Makefile b/lib/Makefile
index 3c3b30b..7d87a80 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -28,6 +28,7 @@  obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
 obj-y += string_helpers.o
+obj-y += die_lock.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
diff --git a/lib/die_lock.c b/lib/die_lock.c
new file mode 100644
index 0000000..5d2de2e
--- /dev/null
+++ b/lib/die_lock.c
@@ -0,0 +1,43 @@ 
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+
+static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+
+unsigned long __die_spin_lock_irqsave(void)
+{
+	unsigned long flags;
+	int cpu;
+
+	/* racy, but better than risking deadlock. */
+	raw_local_irq_save(flags);
+
+	cpu = smp_processor_id();
+	if (!arch_spin_trylock(&die_lock)) {
+		if (cpu != die_owner)
+			arch_spin_lock(&die_lock);
+	}
+	die_nest_count++;
+	die_owner = cpu;
+
+	return flags;
+}
+
+/**
+ * die_spin_unlock_irqrestore - Unlock die spinlock
+ * @flags: interrupt state to restore
+ *
+ * Unlock die spinlock and restore interrupt state. This must be
+ * paired with die_spin_lock_irqsave.
+ */
+void die_spin_unlock_irqrestore(unsigned long flags)
+{
+	die_nest_count--;
+	if (!die_nest_count) {
+		die_owner = -1;
+		/* Nest count reaches zero, release the lock. */
+		arch_spin_unlock(&die_lock);
+	}
+	raw_local_irq_restore(flags);
+}