@@ -105,10 +105,19 @@ static int load8(struct pdbg_target *target, uint64_t addr, uint64_t *value)
return 1;
}
+uint64_t flip_endian(uint64_t v)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return be64toh(v);
+#else
+ return le64toh(v);
+#endif
+}
+
static int dump_stack(struct thread_regs *regs)
{
struct pdbg_target *target;
- uint64_t sp = regs->gprs[1];
+ uint64_t next_sp = regs->gprs[1];
uint64_t pc;
pdbg_for_each_class_target("adu", target) {
@@ -117,22 +126,73 @@ static int dump_stack(struct thread_regs *regs)
break;
}
- printf("STACK:\n");
+ printf("STACK: SP NIA\n");
if (!target)
pdbg_log(PDBG_ERROR, "Unable to read memory (no ADU found)\n");
- if (sp && is_real_address(regs, sp)) {
- if (!load8(target, sp, &sp))
+ if (!(next_sp && is_real_address(regs, next_sp))) {
+ printf("SP:0x%016" PRIx64 " does not appear to be a stack\n", next_sp);
+ return 0;
+ }
+
+ for (;;) {
+ uint64_t sp = next_sp;
+ uint64_t tmp, tmp2;
+ bool flip = false;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ bool be = false;
+#else
+ bool be = true;
+#endif
+
+ if (!load8(target, sp, &tmp))
+ return 1;
+ if (!load8(target, sp + 16, &pc))
return 1;
- while (sp && is_real_address(regs, sp)) {
- if (!load8(target, sp + 16, &pc))
- return 1;
- printf(" 0x%016" PRIx64 " 0x%16" PRIx64 "\n", sp, pc);
+ if (!tmp) {
+badstack:
+ printf("SP:0x%016" PRIx64 " points to 0x%016" PRIx64 ", not unwinding\n", sp, tmp);
+ return 0;
+ }
+
+ tmp2 = flip_endian(tmp);
+
+ /*
+ * Basic endian detection.
+ * Stack grows down, so as we unwind it we expect to see
+ * increasing addresses without huge jumps. The stack may
+ * switch endian-ness across frames in some cases (e.g., LE
+ * kernel calling BE OPAL).
+ */
+ if (sp >= 0x30000000UL && sp < 0x40000000UL) {
+ /* Check for OPAL stack -> Linux stack */
+ if (tmp >> 60 == 0xc)
+ goto no_flip;
+ else if (tmp2 >> 60 == 0xc)
+ goto do_flip;
+ }
- if (!load8(target, sp, &sp))
- return 1;
+ if (tmp < sp || (tmp - sp > 0xffffffffUL)) {
+ if (tmp2 < sp || (tmp2 - sp > 0xffffffffUL))
+ goto badstack;
+do_flip:
+ next_sp = tmp2;
+ flip = true;
+ be = !be;
+ } else {
+no_flip:
+ next_sp = tmp;
}
+
+ if (!is_real_address(regs, sp))
+ break;
+
+ if (flip)
+ pc = flip_endian(pc);
+
+ printf(" 0x%016" PRIx64 " 0x%016" PRIx64 " (%s)\n",
+ sp, pc, be ? "big-endian" : "little-endian");
}
return 0;
The stack unwinder currently does not do any endian conversion, which means it won't work correctly if the stack does not match pdbg endian. This patch attempts an endian flip if the stack looks wrong, and goes with that if it's an improvement. It also has some magic hackery to take OPAL->Linux into account. Unfortunately this is not a "clean" fully general solution, but works reasonably well in practice. This is the regs --backtrace output for a test that has a CPU hang in an OPAL call from Linux: STACK: SP NIA 0x0000000031c43cb0 0x000000003002b324 (big-endian) 0x0000000031c43d20 0x00000000300051e4 (big-endian) 0xc000200006283b60 0xc00000000008f1c8 (little-endian) 0xc000200006283c40 0xc00000000002af18 (little-endian) 0xc000200006283c70 0xc000000000114064 (little-endian) 0xc000200006283ce0 0xc0000000001144d0 (little-endian) SP:0xc000200006283e30 points to 0x00007fffe28d0cb0, not unwinding We can see the stack unwind from OPAL to Linux to userspace (which does not get decoded -- yet). Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- v3: improve detection a bit more to cross OPAL/Linux boundary (suggegsted by mpe) src/thread.c | 80 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 10 deletions(-)