diff mbox series

[v4] stack: guess endian for stack frame walking

Message ID 20181030235803.2048-1-npiggin@gmail.com
State Accepted
Headers show
Series [v4] stack: guess endian for stack frame walking | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success master/apply_patch Successfully applied
snowpatch_ozlabs/build-multiarch success Test build-multiarch on branch master

Commit Message

Nicholas Piggin Oct. 30, 2018, 11:58 p.m. UTC
The stack unwinder currently does not do any endian conversion, which
means it won't work correctly if the stack does not match pdbg endian.

This patch attempts an endian flip if the stack looks wrong, and goes
with that if it's an improvement. It also has some magic hackery to
take OPAL->Linux into account. Unfortunately this is not a "clean"
fully general solution, but works reasonably well in practice.

This is the regs --backtrace output for a test that has a CPU hang in
an OPAL call from Linux:

STACK:           SP                NIA
 0x0000000031c43cb0 0x000000003002b324 (big-endian)
 0x0000000031c43d20 0x00000000300051e4 (big-endian)
 0xc000200006283b60 0xc00000000008f1c8 (little-endian)
 0xc000200006283c40 0xc00000000002af18 (little-endian)
 0xc000200006283c70 0xc000000000114064 (little-endian)
 0xc000200006283ce0 0xc0000000001144d0 (little-endian)
 0xc000200006283e30 0xc00000000000b288 (little-endian)
 0x00007fffe28d0cb0

We can see the stack unwind from OPAL to Linux to userspace (which
does not get decoded -- yet).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
v4: Fix missed last frame and tidied up output

 src/thread.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 85 insertions(+), 10 deletions(-)

Comments

Alistair Popple Nov. 2, 2018, 12:16 a.m. UTC | #1
Thanks Nick!

On Wednesday, 31 October 2018 9:58:03 AM AEDT Nicholas Piggin wrote:
> +		/*
> +		 * Basic endian detection.
> +		 * Stack grows down, so as we unwind it we expect to see
> +		 * increasing addresses without huge jumps.  The stack may
> +		 * switch endian-ness across frames in some cases (e.g., LE
> +		 * kernel calling BE OPAL).
> +		 */
> +
> +		/* Check for OPAL stack -> Linux stack */
> +		if ((sp >= 0x30000000UL && sp < 0x40000000UL) &&
> +				!(tmp >= 0x30000000UL && tmp < 0x40000000UL)) {
> +			if (tmp >> 60 == 0xc)
> +				goto no_flip;
> +			if (tmp2 >> 60 == 0xc)
> +				goto do_flip;
> +		}
> +
> +		/* Check for Linux -> userspace */
> +		if ((sp >> 60 == 0xc) && !(tmp >> 60 == 0xc)) {
> +			finished = true; /* Don't decode userspace */
> +			if (tmp >> 60 == 0)
> +				goto no_flip;
> +			if (tmp2 >> 60 == 0)
> +				goto do_flip;
> +		}
> +
> +		/* Otherwise try to ensure sane stack */
> +		if (tmp < sp || (tmp - sp > 0xffffffffUL)) {
> +			if (tmp2 < sp || (tmp2 - sp > 0xffffffffUL)) {
> +				finished = true;
> +				goto no_flip;
> +			}
> +do_flip:
> +			next_sp = tmp2;
> +			flip = true;
> +			be = !be;
> +		} else {
> +no_flip:
> +			next_sp = tmp;

I'd struggle to call this particular bit of code elegant but it seems to get 
the job done :-)

I'm not sure it's possible but I wouldn't be upset if someone submitted a 
clean-up patch in future to simplify the control flow a little bit. However 
it's quite a useful feature and I had to do a custom build last week for 
someone wanting to use it so I'll merge it now.

Reviewed-by: Alistair Popple <alistair@popple.id.au>

> +		}
> +
> +		if (flip)
> +			pc = flip_endian(pc);
> +
> +		printf(" 0x%016" PRIx64 " 0x%016" PRIx64 " (%s)\n",
> +			sp, pc, be ? "big-endian" : "little-endian");
>  	}
> +	printf(" 0x%016" PRIx64 "\n", next_sp);
> 
>  	return 0;
>  }
diff mbox series

Patch

diff --git a/src/thread.c b/src/thread.c
index e755620..4401384 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -105,11 +105,21 @@  static int load8(struct pdbg_target *target, uint64_t addr, uint64_t *value)
 	return 1;
 }
 
+uint64_t flip_endian(uint64_t v)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	return be64toh(v);
+#else
+	return le64toh(v);
+#endif
+}
+
 static int dump_stack(struct thread_regs *regs)
 {
 	struct pdbg_target *target;
-	uint64_t sp = regs->gprs[1];
+	uint64_t next_sp = regs->gprs[1];
 	uint64_t pc;
+	bool finished = false;
 
 	pdbg_for_each_class_target("adu", target) {
 		if (pdbg_target_probe(target) != PDBG_TARGET_ENABLED)
@@ -117,23 +127,88 @@  static int dump_stack(struct thread_regs *regs)
 		break;
 	}
 
-	printf("STACK:\n");
+	printf("STACK:           SP                NIA\n");
 	if (!target)
 		pdbg_log(PDBG_ERROR, "Unable to read memory (no ADU found)\n");
 
-	if (sp && is_real_address(regs, sp)) {
-		if (!load8(target, sp, &sp))
+	if (!(next_sp && is_real_address(regs, next_sp))) {
+		printf("SP:0x%016" PRIx64 " does not appear to be a stack\n", next_sp);
+		return 0;
+	}
+
+	while (!finished) {
+		uint64_t sp = next_sp;
+		uint64_t tmp, tmp2;
+		bool flip = false;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+		bool be = false;
+#else
+		bool be = true;
+#endif
+
+		if (!is_real_address(regs, sp))
+			break;
+
+		if (!load8(target, sp, &tmp))
+			return 1;
+		if (!load8(target, sp + 16, &pc))
 			return 1;
-		while (sp && is_real_address(regs, sp)) {
-			if (!load8(target, sp + 16, &pc))
-				return 1;
 
-			printf(" 0x%016" PRIx64 " 0x%16" PRIx64 "\n", sp, pc);
+		tmp2 = flip_endian(tmp);
 
-			if (!load8(target, sp, &sp))
-				return 1;
+		if (!tmp) {
+			finished = true;
+			goto no_flip;
 		}
+
+		/*
+		 * Basic endian detection.
+		 * Stack grows down, so as we unwind it we expect to see
+		 * increasing addresses without huge jumps.  The stack may
+		 * switch endian-ness across frames in some cases (e.g., LE
+		 * kernel calling BE OPAL).
+		 */
+
+		/* Check for OPAL stack -> Linux stack */
+		if ((sp >= 0x30000000UL && sp < 0x40000000UL) &&
+				!(tmp >= 0x30000000UL && tmp < 0x40000000UL)) {
+			if (tmp >> 60 == 0xc)
+				goto no_flip;
+			if (tmp2 >> 60 == 0xc)
+				goto do_flip;
+		}
+
+		/* Check for Linux -> userspace */
+		if ((sp >> 60 == 0xc) && !(tmp >> 60 == 0xc)) {
+			finished = true; /* Don't decode userspace */
+			if (tmp >> 60 == 0)
+				goto no_flip;
+			if (tmp2 >> 60 == 0)
+				goto do_flip;
+		}
+
+		/* Otherwise try to ensure sane stack */
+		if (tmp < sp || (tmp - sp > 0xffffffffUL)) {
+			if (tmp2 < sp || (tmp2 - sp > 0xffffffffUL)) {
+				finished = true;
+				goto no_flip;
+			}
+do_flip:
+			next_sp = tmp2;
+			flip = true;
+			be = !be;
+		} else {
+no_flip:
+			next_sp = tmp;
+		}
+
+		if (flip)
+			pc = flip_endian(pc);
+
+		printf(" 0x%016" PRIx64 " 0x%016" PRIx64 " (%s)\n",
+			sp, pc, be ? "big-endian" : "little-endian");
 	}
+	printf(" 0x%016" PRIx64 "\n", next_sp);
 
 	return 0;
 }