diff mbox

[U-Boot,v2,07/21] Add trace library

Message ID 1370974493-21822-8-git-send-email-sjg@chromium.org
State Accepted
Delegated to: Tom Rini
Headers show

Commit Message

Simon Glass June 11, 2013, 6:14 p.m. UTC
Add a library which supports tracing of execution using built-in gcc
features and a microsecond timer. This can be used to record a list of
function which are executed, along with a timestamp for each. Later
this information can be sent to the host for processing.

Signed-off-by: Simon Glass <sjg@chromium.org>
---
Changes in v2:
- Adjust printf() type to avoid warning on x86

 doc/README.trace | 361 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/common.h |   4 +
 include/trace.h  | 125 ++++++++++++++++++
 lib/Makefile     |   1 +
 lib/trace.c      | 379 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 870 insertions(+)
 create mode 100644 doc/README.trace
 create mode 100644 include/trace.h
 create mode 100644 lib/trace.c
diff mbox

Patch

diff --git a/doc/README.trace b/doc/README.trace
new file mode 100644
index 0000000..b535c06
--- /dev/null
+++ b/doc/README.trace
@@ -0,0 +1,361 @@ 
+#
+# Copyright (c) 2013 The Chromium OS Authors.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundatio; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA
+#
+
+Tracing in U-Boot
+=================
+
+U-Boot supports a simple tracing feature which allows a record of excecution
+to be collected and sent to a host machine for analysis. At present the
+main use for this is to profile boot time.
+
+
+Overview
+--------
+
+The trace feature uses GCC's instrument-functions feature to trace all
+function entry/exit points. These are then recorded in a memory buffer.
+The memory buffer can be saved to the host over a network link using
+tftpput or by writing to an attached memory device such as MMC.
+
+On the host, the file is first converted with a tool called 'proftool',
+which extracts useful information from it. The resulting trace output
+resembles that emitted by Linux's ftrace feature, so can be visually
+displayed by pytimechart.
+
+
+Quick-start using Sandbox
+-------------------------
+
+Sandbox is a build of U-Boot that can run under Linux so it is a convenient
+way of trying out tracing before you use it on your actual board. To do
+this, follow these steps:
+
+Add the following to include/configs/sandbox.h (if not already there)
+
+#define CONFIG_TRACE
+#define CONFIG_CMD_TRACE
+#define CONFIG_TRACE_BUFFER_SIZE		(16 << 20)
+#define CONFIG_TRACE_EARLY_SIZE		(8 << 20)
+#define CONFIG_TRACE_EARLY
+#define CONFIG_TRACE_EARLY_ADDR		0x00100000
+
+Build sandbox U-Boot with tracing enabled:
+
+$ make FTRACE=1 O=sandbox sandbox_config
+$ make FTRACE=1 O=sandbox
+
+Run sandbox, wait for a bit of trace information to appear, and then capture
+a trace:
+
+$ ./sandbox/u-boot
+
+
+U-Boot 2013.04-rc2-00100-ga72fcef (Apr 17 2013 - 19:25:24)
+
+DRAM:  128 MiB
+trace: enabled
+Using default environment
+
+In:    serial
+Out:   serial
+Err:   serial
+=>trace stats
+        671,406 function sites
+         69,712 function calls
+              0 untracked function calls
+         73,373 traced function calls
+             16 maximum observed call depth
+             15 call depth limit
+         66,491 calls not traced due to depth
+=>trace stats
+        671,406 function sites
+      1,279,450 function calls
+              0 untracked function calls
+        950,490 traced function calls (333217 dropped due to overflow)
+             16 maximum observed call depth
+             15 call depth limit
+      1,275,767 calls not traced due to depth
+=>trace calls 0 e00000
+Call list dumped to 00000000, size 0xae0a40
+=>print
+baudrate=115200
+profbase=0
+profoffset=ae0a40
+profsize=e00000
+stderr=serial
+stdin=serial
+stdout=serial
+
+Environment size: 117/8188 bytes
+=>sb save host 0 trace 0 ${profoffset}
+11405888 bytes written in 10 ms (1.1 GiB/s)
+=>reset
+
+
+Then run proftool to convert the trace information to ftrace format.
+
+$ ./sandbox/tools/proftool -m sandbox/System.map -p trace dump-ftrace >trace.txt
+
+Finally run pytimechart to display it:
+
+$ pytimechart trace.txt
+
+Using this tool you can zoom and pan across the trace, with the function
+calls on the left and little marks representing the start and end of each
+function.
+
+
+CONFIG Options
+--------------
+
+- CONFIG_TRACE
+		Enables the trace feature in U-Boot.
+
+- CONFIG_CMD_TRACE
+		Enables the trace command.
+
+- CONFIG_TRACE_BUFFER_SIZE
+		Size of trace buffer to allocate for U-Boot. This buffer is
+		used after relocation, as a place to put function tracing
+		information. The address of the buffer is determined by
+		the relocation code.
+
+- CONFIG_TRACE_EARLY
+		Define this to start tracing early, before relocation.
+
+- CONFIG_TRACE_EARLY_SIZE
+		Size of 'early' trace buffer. Before U-Boot has relocated
+		it doesn't have a proper trace buffer. On many boards
+		you can define an area of memory to use for the trace
+		buffer until the 'real' trace buffer is available after
+		relocation. The contents of this buffer are then copied to
+		the real buffer.
+
+- CONFIG_TRACE_EARLY_ADDR
+		Address of early trace buffer
+
+
+Building U-Boot with Tracing Enabled
+------------------------------------
+
+Pass 'FTRACE=1' to the U-Boot Makefile to actually instrument the code.
+This is kept as a separate option so that it is easy to enable/disable
+instrumenting from the command line instead of having to change board
+config files.
+
+
+Collecting Trace Data
+---------------------
+
+When you run U-Boot on your board it will collect trace data up to the
+limit of the trace buffer size you have specified. Once that is exhausted
+no more data will be collected.
+
+Collecting trace data has an affect on execution time/performance. You
+will notice this particularly with trvial functions - the overhead of
+recording their execution may even exceed their normal execution time.
+In practice this doesn't matter much so long as you are aware of the
+effect. Once you have done your optimisations, turn off tracing before
+doing end-to-end timing.
+
+The best time to start tracing is right at the beginning of U-Boot. The
+best time to stop tracing is right at the end. In practice it is hard
+to achieve these ideals.
+
+This implementation enables tracing early in board_init_f(). This means
+that it captures most of the board init process, missing only the
+early architecture-specific init. However, it also misses the entire
+SPL stage if there is one.
+
+U-Boot typically ends with a 'bootm' command which loads and runs an
+OS. There is useful trace data in the execution of that bootm
+command. Therefore this implementation provides a way to collect trace
+data after bootm has finished processing, but just before it jumps to
+the OS. In practical terms, U-Boot runs the 'fakegocmd' environment
+variable at this point. This variable should have a short script which
+collects the trace data and writes it somewhere.
+
+Trace data collection relies on a microsecond timer, accesed through
+timer_get_us(). So the first think you should do is make sure that
+this produces sensible results for your board. Suitable sources for
+this timer include high resolution timers, PWMs or profile timers if
+available. Most modern SOCs have a suitable timer for this. Make sure
+that you mark this timer (and anything it calls) with
+__attribute__((no_instrument_function)) so that the trace library can
+use it without causing an infinite loop.
+
+
+Commands
+--------
+
+The trace command has variable sub-commands:
+
+- stats
+		Display tracing statistics
+
+- pause
+		Pause tracing
+
+- resume
+		Resume tracing
+
+- funclist [<addr> <size>]
+		Dump a list of functions into the buffer
+
+- calls  [<addr> <size>]
+		Dump function call trace into buffer
+
+If the address and size are not given, these are obtained from environment
+variables (see below). In any case the environment variables are updated
+after the command runs.
+
+
+Environment Variables
+---------------------
+
+The following are used:
+
+- profbase
+		Base address of trace output buffer
+
+- profoffset
+		Offset of first unwritten byte in trace output buffer
+
+- profsize
+		Size of trace output buffer
+
+All of these are set by the 'trace calls' command.
+
+These variables keep track of the amount of data written to the trace
+output buffer by the 'trace' command. The trace commands which write data
+to the output buffer can use these to specify the buffer to write to, and
+update profoffset each time. This allows successive commands to append data
+to the same buffer, for example:
+
+	trace funclist 10000 e00000
+	trace calls
+
+(the latter command appends more data to the buffer).
+
+
+- fakegocmd
+		Specifies commands to run just before booting the OS. This
+		is a useful time to write the trace data to the host for
+		processing.
+
+
+Writing Out Trace Data
+----------------------
+
+Once the trace data is in an output buffer in memory there are various ways
+to transmit it to the host. Notably you can use tftput to send the data
+over a network link:
+
+fakegocmd=trace pause; usb start; set autoload n; bootp;
+	trace calls 10000000 1000000;
+	tftpput ${profbase} ${profoffset} 192.168.1.4:/tftpboot/calls
+
+This starts up USB (to talk to an attached USB Ethernet dongle), writes
+a trace log to address 10000000 and sends it to a host machine using
+TFTP. After this, U-Boot will boot the OS normally, albeit a little
+later.
+
+
+Converting Trace Output Data
+----------------------------
+
+The trace output data is kept in a binary format which is not documented
+here. To convert it into something useful, you can use proftool.
+
+This tool must be given the U-Boot map file and the trace data received
+from running that U-Boot. It produces a text output file.
+
+Options
+	-m <map_file>
+		Specify U-Boot map file
+
+	-p <trace_file>
+		Specifiy profile/trace file
+
+Commands:
+
+- dump-ftrace
+	Write a text dump of the file in Linux ftrace format to stdout
+
+
+Viewing the Trace Data
+----------------------
+
+You can use pytimechart for this (sudo apt-get pytimechart might work on
+your Debian-style machine, and use your favourite search engine to obtain
+documentation). It expects the file to have a .txt extension. The program
+has terse user interface but is very convenient for viewing U-Boot
+profile information.
+
+
+Workflow Suggestions
+--------------------
+
+The following suggestions may be helpful if you are trying to reduce boot
+time:
+
+1. Enable CONFIG_BOOTSTAGE and CONFIG_BOOTSTAGE_REPORT. This should get
+you are helpful overall snapshot of the boot time.
+
+2. Build U-Boot with tracing and run it. Note the difference in boot time
+(it is common for tracing to add 10% to the time)
+
+3. Collect the trace information as descibed above. Use this to find where
+all the time is being spent.
+
+4. Take a look at that code and see if you can optimise it. Perhaps it is
+possible to speed up the initialisation of a device, or remove an unused
+feature.
+
+5. Rebuild, run and collect again. Compare your results.
+
+6. Keep going until you run out of steam, or your boot is fast enough.
+
+
+Configuring Trace
+-----------------
+
+There are a few parameters in the code that you may want to consider.
+There is a function call depth limit (set to 15 by default). When the
+stack depth goes above this then no tracing information is recorded.
+The maximum depth reached is recorded and displayed by the 'trace stats'
+command.
+
+
+Future Work
+-----------
+
+Tracing could be a little tidier in some areas, for example providing
+run-time configuration options for trace.
+
+Some other features that might be useful:
+
+- Trace filter to select which functions are recorded
+- Sample-based profiling using a timer interrupt
+- Better control over trace depth
+- Compression of trace information
+
+
+Simon Glass <sjg@chromium.org>
+April 2013
diff --git a/include/common.h b/include/common.h
index 126891d..e5220cf 100644
--- a/include/common.h
+++ b/include/common.h
@@ -750,6 +750,10 @@  void	irq_install_handler(int, interrupt_handler_t *, void *);
 void	irq_free_handler   (int);
 void	reset_timer	   (void);
 ulong	get_timer	   (ulong base);
+
+/* Return value of monotonic microsecond timer */
+unsigned long timer_get_us(void);
+
 void	enable_interrupts  (void);
 int	disable_interrupts (void);
 
diff --git a/include/trace.h b/include/trace.h
new file mode 100644
index 0000000..8082466
--- /dev/null
+++ b/include/trace.h
@@ -0,0 +1,125 @@ 
+/*
+ * Copyright (c) 2012 The Chromium OS Authors.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef __TRACE_H
+#define __TRACE_H
+
+enum {
+	/*
+	 * This affects the granularity of our trace. We can bin function
+	 * entry points into groups on the basis that functions typically
+	 * have a minimum size, so entry points can't appear any closer
+	 * than this to each other.
+	 *
+	 * The value here assumes a minimum instruction size of 4 bytes,
+	 * or that instructions are 2 bytes but there are at least 2 of
+	 * them in every function.
+	 *
+	 * Increasing this value reduces the number of functions we can
+	 * resolve, but reduces the size of the uintptr_t array used for
+	 * our function list, which is the length of the code divided by
+	 * this value.
+	 */
+	FUNC_SITE_SIZE	= 4,	/* distance between function sites */
+};
+
+enum trace_chunk_type {
+	TRACE_CHUNK_FUNCS,
+	TRACE_CHUNK_CALLS,
+};
+
+/* A trace record for a function, as written to the profile output file */
+struct trace_output_func {
+	uint32_t offset;		/* Function offset into code */
+	uint32_t call_count;		/* Number of times called */
+};
+
+/* A header at the start of the trace output buffer */
+struct trace_output_hdr {
+	enum trace_chunk_type type;	/* Record type */
+	uint32_t rec_count;		/* Number of records */
+};
+
+/* Print statistics about traced function calls */
+void trace_print_stats(void);
+
+/**
+ * Dump a list of functions and call counts into a buffer
+ *
+ * Each record in the buffer is a struct trace_func_stats. The 'needed'
+ * parameter returns the number of bytes needed to complete the operation,
+ * which may be more than buff_size if your buffer is too small.
+ *
+ * @param buff		Buffer in which to place data, or NULL to count size
+ * @param buff_size	Size of buffer
+ * @param needed	Returns number of bytes used / needed
+ * @return 0 if ok, -1 on error (buffer exhausted)
+ */
+int trace_list_functions(void *buff, int buff_size, unsigned *needed);
+
+/* Flags for ftrace_record */
+enum ftrace_flags {
+	FUNCF_EXIT		= 0UL << 30,
+	FUNCF_ENTRY		= 1UL << 30,
+	FUNCF_TEXTBASE		= 2UL << 30,
+
+	FUNCF_TIMESTAMP_MASK	= 0x3fffffff,
+};
+
+#define TRACE_CALL_TYPE(call)	((call)->flags & 0xc0000000UL)
+
+/* Information about a single function entry/exit */
+struct trace_call {
+	uint32_t func;		/* Function offset */
+	uint32_t caller;	/* Caller function offset */
+	uint32_t flags;		/* Flags and timestamp */
+};
+
+int trace_list_calls(void *buff, int buff_size, unsigned int *needed);
+
+/**
+ * Turn function tracing on and off
+ *
+ * Don't enable trace if it has not been initialised.
+ *
+ * @param enabled	1 to enable trace, 0 to disable
+ */
+void trace_set_enabled(int enabled);
+
+#ifdef CONFIG_TRACE_EARLY
+int trace_early_init(void);
+#else
+static inline int trace_early_init(void)
+{
+	return 0;
+}
+#endif
+
+/**
+ * Init the trace system
+ *
+ * This should be called after relocation with a suitably large buffer
+ * (typically as large as the U-Boot text area)
+ *
+ * @param buff		Pointer to trace buffer
+ * @param buff_size	Size of trace buffer
+ */
+int trace_init(void *buff, size_t buff_size);
+
+#endif
diff --git a/lib/Makefile b/lib/Makefile
index 5d58609..30d576f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -77,6 +77,7 @@  COBJS-y += linux_string.o
 COBJS-$(CONFIG_REGEX) += slre.o
 COBJS-y += string.o
 COBJS-y += time.o
+COBJS-$(CONFIG_TRACE) += trace.o
 COBJS-$(CONFIG_BOOTP_PXE) += uuid.o
 COBJS-y += vsprintf.o
 COBJS-$(CONFIG_RANDOM_MACADDR) += rand.o
diff --git a/lib/trace.c b/lib/trace.c
new file mode 100644
index 0000000..e7455bc
--- /dev/null
+++ b/lib/trace.c
@@ -0,0 +1,379 @@ 
+/*
+ * Copyright (c) 2012 The Chromium OS Authors.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <common.h>
+#include <trace.h>
+#include <asm/io.h>
+#include <asm/sections.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+static char trace_enabled __attribute__((section(".data")));
+static char trace_inited __attribute__((section(".data")));
+
+/* The header block at the start of the trace memory area */
+struct trace_hdr {
+	int func_count;		/* Total number of function call sites */
+	u64 call_count;		/* Total number of tracked function calls */
+	u64 untracked_count;	/* Total number of untracked function calls */
+	int funcs_used;		/* Total number of functions used */
+
+	/*
+	 * Call count for each function. This is indexed by the word offset
+	 * of the function from gd->relocaddr
+	 */
+	uintptr_t *call_accum;
+
+	/* Function trace list */
+	struct trace_call *ftrace;	/* The function call records */
+	ulong ftrace_size;	/* Num. of ftrace records we have space for */
+	ulong ftrace_count;	/* Num. of ftrace records written */
+	ulong ftrace_too_deep_count;	/* Functions that were too deep */
+
+	int depth;
+	int depth_limit;
+	int max_depth;
+};
+
+static struct trace_hdr *hdr;	/* Pointer to start of trace buffer */
+
+static inline uintptr_t __attribute__((no_instrument_function))
+		func_ptr_to_num(void *func_ptr)
+{
+	uintptr_t offset = (uintptr_t)func_ptr;
+
+#ifdef CONFIG_SANDBOX
+	offset -= (uintptr_t)&_init;
+#else
+	if (gd->flags & GD_FLG_RELOC)
+		offset -= gd->relocaddr;
+	else
+		offset -= CONFIG_SYS_TEXT_BASE;
+#endif
+	return offset / FUNC_SITE_SIZE;
+}
+
+static void __attribute__((no_instrument_function)) add_ftrace(void *func_ptr,
+				void *caller, ulong flags)
+{
+	if (hdr->depth > hdr->depth_limit) {
+		hdr->ftrace_too_deep_count++;
+		return;
+	}
+	if (hdr->ftrace_count < hdr->ftrace_size) {
+		struct trace_call *rec = &hdr->ftrace[hdr->ftrace_count];
+
+		rec->func = func_ptr_to_num(func_ptr);
+		rec->caller = func_ptr_to_num(caller);
+		rec->flags = flags | (timer_get_us() & FUNCF_TIMESTAMP_MASK);
+	}
+	hdr->ftrace_count++;
+}
+
+static void __attribute__((no_instrument_function)) add_textbase(void)
+{
+	if (hdr->ftrace_count < hdr->ftrace_size) {
+		struct trace_call *rec = &hdr->ftrace[hdr->ftrace_count];
+
+		rec->func = CONFIG_SYS_TEXT_BASE;
+		rec->caller = 0;
+		rec->flags = FUNCF_TEXTBASE;
+	}
+	hdr->ftrace_count++;
+}
+
+/**
+ * This is called on every function entry
+ *
+ * We add to our tally for this function and add to the list of called
+ * functions.
+ *
+ * @param func_ptr	Pointer to function being entered
+ * @param caller	Pointer to function which called this function
+ */
+void __attribute__((no_instrument_function)) __cyg_profile_func_enter(
+		void *func_ptr, void *caller)
+{
+	if (trace_enabled) {
+		int func;
+
+		add_ftrace(func_ptr, caller, FUNCF_ENTRY);
+		func = func_ptr_to_num(func_ptr);
+		if (func < hdr->func_count) {
+			hdr->call_accum[func]++;
+			hdr->call_count++;
+		} else {
+			hdr->untracked_count++;
+		}
+		hdr->depth++;
+		if (hdr->depth > hdr->depth_limit)
+			hdr->max_depth = hdr->depth;
+	}
+}
+
+/**
+ * This is called on every function exit
+ *
+ * We do nothing here.
+ *
+ * @param func_ptr	Pointer to function being entered
+ * @param caller	Pointer to function which called this function
+ */
+void __attribute__((no_instrument_function)) __cyg_profile_func_exit(
+		void *func_ptr, void *caller)
+{
+	if (trace_enabled) {
+		add_ftrace(func_ptr, caller, FUNCF_EXIT);
+		hdr->depth--;
+	}
+}
+
+/**
+ * Produce a list of called functions
+ *
+ * The information is written into the supplied buffer - a header followed
+ * by a list of function records.
+ *
+ * @param buff		Buffer to place list into
+ * @param buff_size	Size of buffer
+ * @param needed	Returns size of buffer needed, which may be
+ *			greater than buff_size if we ran out of space.
+ * @return 0 if ok, -1 if space was exhausted
+ */
+int trace_list_functions(void *buff, int buff_size, unsigned int *needed)
+{
+	struct trace_output_hdr *output_hdr = NULL;
+	void *end, *ptr = buff;
+	int func;
+	int upto;
+
+	end = buff ? buff + buff_size : NULL;
+
+	/* Place some header information */
+	if (ptr + sizeof(struct trace_output_hdr) < end)
+		output_hdr = ptr;
+	ptr += sizeof(struct trace_output_hdr);
+
+	/* Add information about each function */
+	for (func = upto = 0; func < hdr->func_count; func++) {
+		int calls = hdr->call_accum[func];
+
+		if (!calls)
+			continue;
+
+		if (ptr + sizeof(struct trace_output_func) < end) {
+			struct trace_output_func *stats = ptr;
+
+			stats->offset = func * FUNC_SITE_SIZE;
+			stats->call_count = calls;
+			upto++;
+		}
+		ptr += sizeof(struct trace_output_func);
+	}
+
+	/* Update the header */
+	if (output_hdr) {
+		output_hdr->rec_count = upto;
+		output_hdr->type = TRACE_CHUNK_FUNCS;
+	}
+
+	/* Work out how must of the buffer we used */
+	*needed = ptr - buff;
+	if (ptr > end)
+		return -1;
+	return 0;
+}
+
+int trace_list_calls(void *buff, int buff_size, unsigned *needed)
+{
+	struct trace_output_hdr *output_hdr = NULL;
+	void *end, *ptr = buff;
+	int rec, upto;
+	int count;
+
+	end = buff ? buff + buff_size : NULL;
+
+	/* Place some header information */
+	if (ptr + sizeof(struct trace_output_hdr) < end)
+		output_hdr = ptr;
+	ptr += sizeof(struct trace_output_hdr);
+
+	/* Add information about each call */
+	count = hdr->ftrace_count;
+	if (count > hdr->ftrace_size)
+		count = hdr->ftrace_size;
+	for (rec = upto = 0; rec < count; rec++) {
+		if (ptr + sizeof(struct trace_call) < end) {
+			struct trace_call *call = &hdr->ftrace[rec];
+			struct trace_call *out = ptr;
+
+			out->func = call->func * FUNC_SITE_SIZE;
+			out->caller = call->caller * FUNC_SITE_SIZE;
+			out->flags = call->flags;
+			upto++;
+		}
+		ptr += sizeof(struct trace_call);
+	}
+
+	/* Update the header */
+	if (output_hdr) {
+		output_hdr->rec_count = upto;
+		output_hdr->type = TRACE_CHUNK_CALLS;
+	}
+
+	/* Work out how must of the buffer we used */
+	*needed = ptr - buff;
+	if (ptr > end)
+		return -1;
+	return 0;
+}
+
+/* Print basic information about tracing */
+void trace_print_stats(void)
+{
+	ulong count;
+
+#ifndef FTRACE
+	puts("Warning: make U-Boot with FTRACE to enable function instrumenting.\n");
+	puts("You will likely get zeroed data here\n");
+#endif
+	if (!trace_inited) {
+		printf("Trace is disabled\n");
+		return;
+	}
+	print_grouped_ull(hdr->func_count, 10);
+	puts(" function sites\n");
+	print_grouped_ull(hdr->call_count, 10);
+	puts(" function calls\n");
+	print_grouped_ull(hdr->untracked_count, 10);
+	puts(" untracked function calls\n");
+	count = min(hdr->ftrace_count, hdr->ftrace_size);
+	print_grouped_ull(count, 10);
+	puts(" traced function calls");
+	if (hdr->ftrace_count > hdr->ftrace_size) {
+		printf(" (%lu dropped due to overflow)",
+		       hdr->ftrace_count - hdr->ftrace_size);
+	}
+	puts("\n");
+	printf("%15d maximum observed call depth\n", hdr->max_depth);
+	printf("%15d call depth limit\n", hdr->depth_limit);
+	print_grouped_ull(hdr->ftrace_too_deep_count, 10);
+	puts(" calls not traced due to depth\n");
+}
+
+void __attribute__((no_instrument_function)) trace_set_enabled(int enabled)
+{
+	trace_enabled = enabled != 0;
+}
+
+/**
+ * Init the tracing system ready for used, and enable it
+ *
+ * @param buff		Pointer to trace buffer
+ * @param buff_size	Size of trace buffer
+ */
+int __attribute__((no_instrument_function)) trace_init(void *buff,
+		size_t buff_size)
+{
+	ulong func_count = gd->mon_len / FUNC_SITE_SIZE;
+	size_t needed;
+	int was_disabled = !trace_enabled;
+
+	if (!was_disabled) {
+#ifdef CONFIG_TRACE_EARLY
+		char *end;
+		ulong used;
+
+		/*
+		 * Copy over the early trace data if we have it. Disable
+		 * tracing while we are doing this.
+		 */
+		trace_enabled = 0;
+		hdr = map_sysmem(CONFIG_TRACE_EARLY_ADDR,
+				 CONFIG_TRACE_EARLY_SIZE);
+		end = (char *)&hdr->ftrace[hdr->ftrace_count];
+		used = end - (char *)hdr;
+		printf("trace: copying %08lx bytes of early data from %x to %08lx\n",
+		       used, CONFIG_TRACE_EARLY_ADDR,
+		       (ulong)map_to_sysmem(buff));
+		memcpy(buff, hdr, used);
+#else
+		puts("trace: already enabled\n");
+		return -1;
+#endif
+	}
+	hdr = (struct trace_hdr *)buff;
+	needed = sizeof(*hdr) + func_count * sizeof(uintptr_t);
+	if (needed > buff_size) {
+		printf("trace: buffer size %zd bytes: at least %zd needed\n",
+		       buff_size, needed);
+		return -1;
+	}
+
+	if (was_disabled)
+		memset(hdr, '\0', needed);
+	hdr->func_count = func_count;
+	hdr->call_accum = (uintptr_t *)(hdr + 1);
+
+	/* Use any remaining space for the timed function trace */
+	hdr->ftrace = (struct trace_call *)(buff + needed);
+	hdr->ftrace_size = (buff_size - needed) / sizeof(*hdr->ftrace);
+	add_textbase();
+
+	puts("trace: enabled\n");
+	hdr->depth_limit = 15;
+	trace_enabled = 1;
+	trace_inited = 1;
+	return 0;
+}
+
+#ifdef CONFIG_TRACE_EARLY
+int __attribute__((no_instrument_function)) trace_early_init(void)
+{
+	ulong func_count = gd->mon_len / FUNC_SITE_SIZE;
+	size_t buff_size = CONFIG_TRACE_EARLY_SIZE;
+	size_t needed;
+
+	/* We can ignore additional calls to this function */
+	if (trace_enabled)
+		return 0;
+
+	hdr = map_sysmem(CONFIG_TRACE_EARLY_ADDR, CONFIG_TRACE_EARLY_SIZE);
+	needed = sizeof(*hdr) + func_count * sizeof(uintptr_t);
+	if (needed > buff_size) {
+		printf("trace: buffer size is %zd bytes, at least %zd needed\n",
+		       buff_size, needed);
+		return -1;
+	}
+
+	memset(hdr, '\0', needed);
+	hdr->call_accum = (uintptr_t *)(hdr + 1);
+	hdr->func_count = func_count;
+
+	/* Use any remaining space for the timed function trace */
+	hdr->ftrace = (struct trace_call *)((char *)hdr + needed);
+	hdr->ftrace_size = (buff_size - needed) / sizeof(*hdr->ftrace);
+	add_textbase();
+	hdr->depth_limit = 200;
+	printf("trace: early enable at %08x\n", CONFIG_TRACE_EARLY_ADDR);
+
+	trace_enabled = 1;
+	return 0;
+}
+#endif