diff mbox series

[RFC,2/6] powerpc/trace: Add support for stack tracer

Message ID 6ed4941e8ff48729a14b24c8e0d0f876fe8f22e0.1621577151.git.naveen.n.rao@linux.vnet.ibm.com (mailing list archive)
State RFC
Headers show
Series powerpc: Stack tracer fixes | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (258eb1f3aaa9face35e613c229c1337263491ea0)
snowpatch_ozlabs/checkpatch warning total: 0 errors, 3 warnings, 1 checks, 103 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Naveen N. Rao May 21, 2021, 6:48 a.m. UTC
With -mprofile-kernel and ppc32, we call into ftrace at function entry
before the function can establish its own stack frame. This breaks the
ABI since functions are expected to setup a stack frame before calling
into another function. As a consequence of this, when walking the stack,
the ftraced function does not show up in the stack trace.

Fix this by checking for ftrace functions (ftrace_[regs_]call+4) in the
stack trace and looking up the stored nip in pt_regs in its stackframe.
Use the back chain from the stack frame headers to accurately determine
the stack frame sizes, except for the ftraced function on
-mprofile-kernel and ppc32 where we set the frame size to 0.

The max stack tracer ftrace selftest (ftrace/func_stack_tracer.tc)
passes on -mprofile-kernel with this patch.

Before this patch, top of a stack trace with the stack tracer:
        Depth    Size   Location    (44 entries)
        -----    ----   --------
  0)     7616     496   ftrace_call+0x4/0x44
  1)     7120      64   __mod_lruvec_page_state+0x90/0x110
  2)     7056      96   test_clear_page_writeback+0xe4/0x480
  3)     6960      48   end_page_writeback+0xa0/0x1c0
  4)     6912     256   ext4_finish_bio+0x2c0/0x350
  5)     6656     176   ext4_end_bio+0x74/0x280
  6)     6480      64   bio_endio+0x1cc/0x240
  7)     6416     176   blk_update_request+0x2b8/0x640
  8)     6240      64   blk_mq_end_request+0x3c/0x1e0
  9)     6176      48   virtblk_request_done+0x48/0xd0
 10)     6128      48   blk_complete_reqs+0x80/0xa0
 11)     6080     240   __do_softirq+0x150/0x408
 12)     5840      32   irq_exit+0x144/0x150
 13)     5808      80   do_IRQ+0xc8/0x140
 14)     5728      32   hardware_interrupt_common_virt+0x1a4/0x1b0
 15)     5696      64   0x0
 16)     5632     768   virtqueue_notify+0x40/0x80
 17)     4864     240   virtio_queue_rq+0x568/0x610
 18)     4624     256   blk_mq_dispatch_rq_list+0x190/0xbc0
 19)     4368     160   __blk_mq_do_dispatch_sched+0x1f0/0x3d0
 20)     4208      96   __blk_mq_sched_dispatch_requests+0x238/0x2c0
 ...

After this patch:
        Depth    Size   Location    (44 entries)
        -----    ----   --------
  0)     7136       0   rcu_read_unlock_strict+0x8/0x10
  1)     7136      64   __mod_lruvec_page_state+0x90/0x110
  2)     7072      96   test_clear_page_writeback+0xe4/0x480
  3)     6976      48   end_page_writeback+0xa0/0x1c0
  4)     6928     256   ext4_finish_bio+0x2c0/0x350
  5)     6672     176   ext4_end_bio+0x74/0x280
  6)     6496      64   bio_endio+0x1cc/0x240
  7)     6432     176   blk_update_request+0x2b8/0x640
  8)     6256      64   blk_mq_end_request+0x3c/0x1e0
  9)     6192      48   virtblk_request_done+0x48/0xd0
 10)     6144      48   blk_complete_reqs+0x80/0xa0
 11)     6096     240   __do_softirq+0x150/0x408
 12)     5856      32   irq_exit+0x144/0x150
 13)     5824      80   do_IRQ+0xc8/0x140
 14)     5744     784   hardware_interrupt_common_virt+0x1a4/0x1b0
 15)     4960      32   0x0
 16)     4928      48   virtqueue_notify+0x40/0x80
 17)     4880     240   virtio_queue_rq+0x568/0x610
 18)     4640     256   blk_mq_dispatch_rq_list+0x190/0xbc0
 19)     4384     160   __blk_mq_do_dispatch_sched+0x1f0/0x3d0
 20)     4224      96   __blk_mq_sched_dispatch_requests+0x238/0x2c0
 ...

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/ftrace.h  | 18 ++++++++
 arch/powerpc/kernel/trace/ftrace.c | 70 ++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

Comments

Naveen N. Rao June 1, 2021, 1:51 p.m. UTC | #1
Naveen N. Rao wrote:
> +
> +unsigned long ftrace_get_traced_func_if_no_stackframe(unsigned long ip, unsigned long *stack)
> +{
> +	if (!is_ftrace_entry(ip))
> +		return 0;
> +
> +	if (IS_ENABLED(CONFIG_PPC32))
> +		return stack[11]; /* see MCOUNT_SAVE_FRAME */
> +
> +	if (!IS_ENABLED(CONFIG_MPROFILE_KERNEL))
> +		return 0;
> +
> +	return stack[(STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, nip)) / sizeof(unsigned long)];

Looking at Daniel's patch to address KASAN errors with our stack walk 
code in show_stack() [*], I realized that I am not validating the stack 
pointer here for the above accesses...

[*] http://lkml.kernel.org/r/20210528074806.1311297-1-dja@axtens.net

> +}
> +
> +#ifdef CONFIG_STACK_TRACER
> +void stack_get_trace(unsigned long traced_ip,
> +		     unsigned long *stack_ref __maybe_unused,
> +		     unsigned long stack_size __maybe_unused,
> +		     int *tracer_frame)
> +{
> +	unsigned long sp, newsp, top, ip;
> +	int ftrace_call_found = 0;
> +	unsigned long *stack;
> +	int i = 0;
> +
> +	sp = current_stack_frame();
> +	top = (unsigned long)task_stack_page(current) + THREAD_SIZE;
> +
> +	while (validate_sp(sp, current, STACK_FRAME_OVERHEAD) && i < STACK_TRACE_ENTRIES) {
> +		stack = (unsigned long *) sp;
> +		newsp = stack[0];
> +		ip = stack[STACK_FRAME_LR_SAVE];
> +
> +		if (ftrace_call_found) {
> +			stack_dump_trace[i] = ip;
> +			stack_trace_index[i++] = top - sp;
> +		}

And I need to make the above accesses bypass KASAN as well.


- Naveen
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index debe8c4f706260..392296df70e96c 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -126,6 +126,24 @@  static inline void this_cpu_enable_ftrace(void) { }
 static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) { }
 static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; }
 #endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_FUNCTION_TRACER
+/*
+ * With ppc64 -mprofile-kernel and ppc32, mcount call is made before a function
+ * establishes its own stack frame. While unwinding the stack, such functions
+ * do not appear in the trace. This helper returns the traced function if ip in
+ * the stack frame points to ftrace_[regs_]call.
+ *
+ * In ppc64 ELFv1, mcount call is after a function establishes its own
+ * stackframe. So, this always returns 0.
+ */
+unsigned long ftrace_get_traced_func_if_no_stackframe(unsigned long ip, unsigned long *stack);
+#else
+static inline unsigned long ftrace_get_traced_func_if_no_stackframe(unsigned long ip, unsigned long *stack)
+{
+	return 0;
+}
+#endif /* FUNCTION_TRACER */
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_FTRACE */
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index ffe9537195aa33..ec1072d9a858d0 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -21,6 +21,7 @@ 
 #include <linux/percpu.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/asm-prototypes.h>
 #include <asm/cacheflush.h>
@@ -987,3 +988,72 @@  char *arch_ftrace_match_adjust(char *str, const char *search)
 		return str;
 }
 #endif /* PPC64_ELF_ABI_v1 */
+
+static int is_ftrace_entry(unsigned long ip)
+{
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	if (ip == (unsigned long)&ftrace_call + 4 || ip == (unsigned long)&ftrace_regs_call + 4)
+#else
+	if (ip == (unsigned long)&ftrace_call + 4)
+#endif
+		return 1;
+
+	return 0;
+}
+
+unsigned long ftrace_get_traced_func_if_no_stackframe(unsigned long ip, unsigned long *stack)
+{
+	if (!is_ftrace_entry(ip))
+		return 0;
+
+	if (IS_ENABLED(CONFIG_PPC32))
+		return stack[11]; /* see MCOUNT_SAVE_FRAME */
+
+	if (!IS_ENABLED(CONFIG_MPROFILE_KERNEL))
+		return 0;
+
+	return stack[(STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, nip)) / sizeof(unsigned long)];
+}
+
+#ifdef CONFIG_STACK_TRACER
+void stack_get_trace(unsigned long traced_ip,
+		     unsigned long *stack_ref __maybe_unused,
+		     unsigned long stack_size __maybe_unused,
+		     int *tracer_frame)
+{
+	unsigned long sp, newsp, top, ip;
+	int ftrace_call_found = 0;
+	unsigned long *stack;
+	int i = 0;
+
+	sp = current_stack_frame();
+	top = (unsigned long)task_stack_page(current) + THREAD_SIZE;
+
+	while (validate_sp(sp, current, STACK_FRAME_OVERHEAD) && i < STACK_TRACE_ENTRIES) {
+		stack = (unsigned long *) sp;
+		newsp = stack[0];
+		ip = stack[STACK_FRAME_LR_SAVE];
+
+		if (ftrace_call_found) {
+			stack_dump_trace[i] = ip;
+			stack_trace_index[i++] = top - sp;
+		}
+
+		if (is_ftrace_entry(ip)) {
+			if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32)) {
+				stack_dump_trace[i] = ftrace_get_traced_func_if_no_stackframe(ip, stack);
+				stack_trace_index[i++] = top - newsp;
+			}
+			if (unlikely(!*tracer_frame)) {
+				*tracer_frame = newsp - (unsigned long)stack_ref;
+				stack_trace_max_size -= *tracer_frame;
+			}
+			ftrace_call_found = 1;
+		}
+
+		sp = newsp;
+	}
+
+	stack_trace_nr_entries = i;
+}
+#endif