[RFC] main: Implement 'dmesg' command

Message ID 20180525032304.24188-1-joel@jms.id.au
State RFC
Headers show
Series
  • [RFC] main: Implement 'dmesg' command
Related show

Commit Message

Joel Stanley May 25, 2018, 3:23 a.m.
This command fetches the linux kernel log from host RAM and displays it
on stdout.

Almost any powernv kernel will support this. However it requires a
skiboot change in order to find the location of the kernel log buffer.
On systems without that change this command will return a message saying
it is unsupported.

Signed-off-by: Joel Stanley <joel@jms.id.au>
---
The guts of the log parsing comes from benh's kmsg_dump.c.

The skiboot change has not been merged yet:

 http://patchwork.ozlabs.org/patch/919508/

More importantly, this doesn't currently work, as we cannot read the
memory region that log_buf points to. Jump to the end of this essay to
see the error.

__log_buf[] is a statically allocated buffer that the kernel uses for
it's messages. However, we refer to it using the pointer log_buf, as the
kernel may chose to reallocate the buffer.

kernel/printk/printk.c:

        static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
        static char *log_buf = __log_buf;
        static u32 log_buf_len = __LOG_BUF_LEN;

        /* Return log buffer address */
        char *log_buf_addr_get(void)
        {
                return log_buf;
        }

        ...

	if (early) {
                new_log_buf =
                        memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
        } else {
                new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
                                                          LOG_ALIGN);
        }

The value gets retrieved via log_buf_addr_get in the powernv platform
code, and sent to skiboot via the opal_register_dump_region OPAL call.

arch/powerpc/platforms/powernv/opal.c

        addr = log_buf_addr_get();
        if (addr == NULL)
                return;
        ...

        rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
                                       __pa(addr), size);

Skiboot sticks this in the debug descriptor:

core/dump-region.c

        static int64_t bmc_opal_register_dump_region(uint32_t id,
                                                     uint64_t addr, uint64_t size)
        {
                ...
                prlog(PR_INFO, "Registered log buf at 0x%016llx\n", addr);
                debug_descriptor.log_buf_phys = addr;

Looking at the host's kallsyms, we can see where __log_buf starts. If we
hardcode that as the address to read from in pdbg, we can get data out:

/proc/kallsyms:
        c000000001572a88 d log_buf
        c0000000018bde84 b __log_buf

Here's the address we actually get:

/sys/firmware/opal/msglog:
        DUMP: Registered log buf at 0x0000201cc69e0000

When we getmem:

	$ pdbg -p0 getmem 0x0000201cc69e0000 8
	p9_adu_getmem: Unable to read memory. ALTD_STATUS_REG = 0x0080000000000000

This is an invalid address error:

8     RWX_WCLRREG             FBC_ALTD_ADDRESS_ERROR: Invalid Adress Error: PB respond with Adress Errror cresp
---
 Makefile.am |   2 +-
 src/dmesg.c | 218 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/main.c  |   2 +
 3 files changed, 221 insertions(+), 1 deletion(-)
 create mode 100644 src/dmesg.c

Comments

Balbir Singh May 25, 2018, 4:21 a.m. | #1
On Fri, 25 May 2018 12:53:04 +0930
Joel Stanley <joel@jms.id.au> wrote:

> This command fetches the linux kernel log from host RAM and displays it
> on stdout.
> 
> Almost any powernv kernel will support this. However it requires a
> skiboot change in order to find the location of the kernel log buffer.
> On systems without that change this command will return a message saying
> it is unsupported.
> 
> Signed-off-by: Joel Stanley <joel@jms.id.au>
> ---
> The guts of the log parsing comes from benh's kmsg_dump.c.
> 
> The skiboot change has not been merged yet:
> 
>  http://patchwork.ozlabs.org/patch/919508/
> 
> More importantly, this doesn't currently work, as we cannot read the
> memory region that log_buf points to. Jump to the end of this essay to
> see the error.
> 
> __log_buf[] is a statically allocated buffer that the kernel uses for
> it's messages. However, we refer to it using the pointer log_buf, as the
> kernel may chose to reallocate the buffer.
> 
> kernel/printk/printk.c:
> 
>         static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
>         static char *log_buf = __log_buf;
>         static u32 log_buf_len = __LOG_BUF_LEN;
> 
>         /* Return log buffer address */
>         char *log_buf_addr_get(void)
>         {
>                 return log_buf;
>         }
> 
>         ...
> 
> 	if (early) {
>                 new_log_buf =
>                         memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
>         } else {
>                 new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
>                                                           LOG_ALIGN);
>         }
> 
> The value gets retrieved via log_buf_addr_get in the powernv platform
> code, and sent to skiboot via the opal_register_dump_region OPAL call.
> 
> arch/powerpc/platforms/powernv/opal.c
> 
>         addr = log_buf_addr_get();
>         if (addr == NULL)
>                 return;
>         ...
> 
>         rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
>                                        __pa(addr), size);
> 
> Skiboot sticks this in the debug descriptor:
> 
> core/dump-region.c
> 
>         static int64_t bmc_opal_register_dump_region(uint32_t id,
>                                                      uint64_t addr, uint64_t size)
>         {
>                 ...
>                 prlog(PR_INFO, "Registered log buf at 0x%016llx\n", addr);
>                 debug_descriptor.log_buf_phys = addr;
> 
> Looking at the host's kallsyms, we can see where __log_buf starts. If we
> hardcode that as the address to read from in pdbg, we can get data out:
>

Let's not hardcode, please. Have you tried this with CONFIG_RELOCATABLE
and CONFIG_RELOCATABLE_TEST set?

Balbir Singh.
Alistair Popple June 1, 2018, 3:43 a.m. | #2
> > Skiboot sticks this in the debug descriptor:
> > 
> > core/dump-region.c
> > 
> >         static int64_t bmc_opal_register_dump_region(uint32_t id,
> >                                                      uint64_t addr, uint64_t size)
> >         {
> >                 ...
> >                 prlog(PR_INFO, "Registered log buf at 0x%016llx\n", addr);
> >                 debug_descriptor.log_buf_phys = addr;
> > 
> > Looking at the host's kallsyms, we can see where __log_buf starts. If we
> > hardcode that as the address to read from in pdbg, we can get data out:
> >
> 
> Let's not hardcode, please. Have you tried this with CONFIG_RELOCATABLE
> and CONFIG_RELOCATABLE_TEST set?

I'm not sure the patch actually hardcodes the address if that's what you're
getting at? I think it's registered dynamically at boot time with an OPAL call.

Have you tested this on P8 at all? The ADU error is a bit strange so we will
have to get to the bottom of that, but having a dmesg command is pretty useful.

- Alistair

> Balbir Singh.
>
Joel Stanley June 1, 2018, 3:47 a.m. | #3
On 1 June 2018 at 13:13, Alistair Popple <alistair@popple.id.au> wrote:
>> > Skiboot sticks this in the debug descriptor:
>> >
>> > core/dump-region.c
>> >
>> >         static int64_t bmc_opal_register_dump_region(uint32_t id,
>> >                                                      uint64_t addr, uint64_t size)
>> >         {
>> >                 ...
>> >                 prlog(PR_INFO, "Registered log buf at 0x%016llx\n", addr);
>> >                 debug_descriptor.log_buf_phys = addr;
>> >
>> > Looking at the host's kallsyms, we can see where __log_buf starts. If we
>> > hardcode that as the address to read from in pdbg, we can get data out:
>> >
>>
>> Let's not hardcode, please. Have you tried this with CONFIG_RELOCATABLE
>> and CONFIG_RELOCATABLE_TEST set?
>
> I'm not sure the patch actually hardcodes the address if that's what you're
> getting at? I think it's registered dynamically at boot time with an OPAL call.

Yep.

> Have you tested this on P8 at all? The ADU error is a bit strange so we will
> have to get to the bottom of that, but having a dmesg command is pretty useful.

Lets park this patch for the moment, for a few reasons:

We're having discussions on the skiboot list about how to store and
retrieve this information.

In addition, the command as it stands will not work once the host
buffer has wrapped. For most cases that's fine, as the host has a nice
long buffer, and when you're debugging a machine it hasn't been up for
months.

However I can see this being used in the field at a later date, so I
will fix it to work in all cases.

Cheers,

Joel

Patch

diff --git a/Makefile.am b/Makefile.am
index cf04b942d475..e51d01f77f02 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -28,7 +28,7 @@  BUILT_SOURCES = $(DT) $(DT_headers)
 
 pdbg_SOURCES = \
 	src/main.c src/cfam.c src/scom.c src/reg.c src/mem.c src/thread.c \
-	src/ring.c src/htm.c src/progress.c src/options_@ARCH@.c
+	src/ring.c src/htm.c src/progress.c src/dmesg.c src/options_@ARCH@.c
 
 pdbg_LDADD = $(DT_objects) libpdbg.la libfdt.la \
 	-L.libs -lrt
diff --git a/src/dmesg.c b/src/dmesg.c
new file mode 100644
index 000000000000..89d925bb2de7
--- /dev/null
+++ b/src/dmesg.c
@@ -0,0 +1,218 @@ 
+/* Copyright 2018 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <byteswap.h>
+
+#include <operations.h>
+#include <target.h>
+#include <debug.h>
+
+#include "main.h"
+
+/* Linux kernel log buffer flags */
+enum log_flags {
+	LOG_NOCONS      = 1,    /* already flushed, do not print to console */
+        LOG_NEWLINE     = 2,    /* text ended with a newline */
+        LOG_PREFIX      = 4,    /* text started with a prefix */
+        LOG_CONT        = 8,    /* text is a fragment of a continuation line */
+};
+
+/* Linux kernel log buffer structure */
+struct log {
+        uint64_t ts_nsec;            /* timestamp in nanoseconds */
+        uint16_t len;                /* length of entire record */
+        uint16_t text_len;           /* length of text buffer */
+        uint16_t dict_len;           /* length of dictionary buffer */
+        uint8_t facility;            /* syslog facility */
+	union {
+		struct {
+			uint8_t flags:5; /* internal record flags */
+			uint8_t level:3; /* syslog level */
+		} normal;
+		struct {
+			uint8_t level:3; /* syslog level */
+			uint8_t flags:5; /* internal record flags */
+		} swapped;
+	} fl;
+};
+
+#define OPAL_DEBUG_DESCRIPTOR_ADDR	0x30000080
+
+/* OPAL debug descriptor (minus the traces) */
+struct debug_descriptor {
+	uint8_t		eye_catcher[8]; /* "OPALdbug" */
+#define DEBUG_DESC_VERSION      1
+	uint32_t	version;
+	uint8_t		console_log_levels;     /* high 4 bits in memory,
+						 * low 4 bits driver (e.g. uart). */
+	uint8_t		state_flags; /* various state flags - OPAL_BOOT_COMPLETE etc */ 
+	uint16_t	reserved2;
+	uint64_t	log_buf_phys;
+
+	/* Memory console */
+	uint64_t	memcons_phys;
+	uint32_t	memcons_tce;
+	uint32_t	memcons_obuf_tce;
+	uint32_t	memcons_ibuf_tce;
+} __packed;
+
+
+static size_t print_time(uint64_t ts)
+{
+	unsigned long rem_nsec;
+
+	rem_nsec = ts / 1000000000;
+
+	return printf("[%5lu.%06lu] ", (unsigned long)ts, rem_nsec / 1000);
+}
+
+static size_t print_prefix(const struct log *msg, unsigned int level)
+{
+	size_t len = 0;
+	unsigned int prefix = (msg->facility << 3) | level;
+
+	len += printf("<%u>", prefix);
+	len += print_time(msg->ts_nsec);
+	return len;
+}
+
+static int display_dmesg(struct pdbg_target *target, uint64_t log_addr, bool swap)
+{
+	for (;;) {
+		size_t sz;
+		struct log l;
+		unsigned int flags, level;
+		uint8_t buf[0x10000];
+		bool prefix = true;
+		bool newline = true;
+		uint8_t *text;
+		unsigned int text_size;
+		unsigned int prev = 0;
+
+		sz = adu_getmem(target, log_addr, (uint8_t *)&l, sizeof(struct log));
+		if (sz)
+			exit(0);
+
+		if (swap) {
+			l.ts_nsec = bswap_64(l.ts_nsec);
+			l.len = bswap_16(l.len);
+			l.text_len = bswap_16(l.text_len);
+			l.dict_len = bswap_16(l.dict_len);
+			flags = l.fl.swapped.flags;
+			level = l.fl.swapped.level;
+		} else {
+			flags = l.fl.normal.flags;
+			level = l.fl.normal.level;
+		}
+
+		if (l.len == 0)
+			break;
+
+		sz = adu_getmem(target, log_addr + sizeof(struct log), buf,
+				l.len - sizeof(struct log));
+		if (sz)
+			break;
+		log_addr += l.len;
+
+		if ((prev & LOG_CONT) && !(flags & LOG_PREFIX))
+			prefix = false;
+
+		if (flags & LOG_CONT) {
+			if ((prev & LOG_CONT) && !(prev & LOG_NEWLINE))
+				prefix = false;
+
+			if (!(flags & LOG_NEWLINE))
+				newline = false;
+		}
+
+		text = buf;
+		text_size = l.text_len;
+		do {
+			uint8_t *next = memchr(text, '\n', text_size);
+			size_t text_len;
+
+			if (next) {
+				text_len = next - text;
+				next++;
+				text_size -= next - text;
+			} else {
+				text_len = text_size;
+			}
+
+			if (prefix)
+				print_prefix(&l, level);
+			fwrite(text, text_len, 1, stdout);
+			if (next || newline)
+				printf("\n");
+			prefix = true;
+			text = next;
+		} while (text);
+	}
+	return 0;
+}
+
+int handle_dmesg(int optind, int argc, char *argv[])
+{
+	struct pdbg_target *target;
+	struct debug_descriptor dd;
+	uint64_t dd_addr;
+	uint64_t log_buf_phys;
+	int ret;
+
+	pdbg_for_each_class_target("adu", target) {
+		if (!target_selected(target->parent))
+			continue;
+		if (pdbg_target_probe(target) != PDBG_TARGET_ENABLED)
+			continue;
+		break;
+	};
+	assert(target);
+
+	ret = adu_getmem(target, OPAL_DEBUG_DESCRIPTOR_ADDR,
+			(uint8_t *)&dd_addr, sizeof(dd_addr));
+	if (ret < 0) {
+		PR_ERROR("unable to read memory\n");
+		exit(-1);
+	}
+	dd_addr = be64toh(dd_addr);
+	PR_DEBUG("dd_addr: %s 0x%016"PRIx64"\n", __func__, dd_addr);
+
+	ret = adu_getmem(target, dd_addr, (uint8_t *)&dd, sizeof(dd));
+	if (ret < 0) {
+		PR_ERROR("unable to read memory\n");
+		exit(-1);
+	}
+	log_buf_phys = be64toh(dd.log_buf_phys);
+	PR_DEBUG("log_buf_phys: 0x%016"PRIx64"\n", log_buf_phys);
+
+	if (log_buf_phys == 0) {
+		PR_ERROR("dmesg not registered with skiboot\n");
+		exit(-1);
+	}
+	ret = display_dmesg(target, log_buf_phys, true);
+	if (ret < 0) {
+		PR_ERROR("unable to display dmesg\n");
+		exit(-1);
+	}
+	return 0;
+}
diff --git a/src/main.c b/src/main.c
index 2200a01c5e73..731795f69bd3 100644
--- a/src/main.c
+++ b/src/main.c
@@ -41,6 +41,7 @@ 
 #include "mem.h"
 #include "thread.h"
 #include "htm.h"
+#include "dmesg.h"
 #include "options.h"
 
 #define PR_ERROR(x, args...) \
@@ -122,6 +123,7 @@  static struct action actions[] = {
 	{ "threadstatus", "", "Print the status of a thread", &thread_status_print },
 	{ "sreset",  "", "Reset", &thread_sreset },
 	{ "regs",  "", "State", &thread_state },
+	{ "dmesg", "", "Display kernel log", &handle_dmesg },
 };