diff mbox series

[RFC,8/9] OPAL V4: initial OS-operations (traps, printf)

Message ID 20200502113649.176329-9-npiggin@gmail.com
State New
Headers show
Series OPAL V4 | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch
snowpatch_ozlabs/apply_patch warning Failed to apply on branch master (0f1937ef40fca0c3212a9dff1010b832a24fb063)

Commit Message

Nicholas Piggin May 2, 2020, 11:36 a.m. UTC
This completes OPAL V4, where the OS can provide servies to OPAL.

To start with for this patch, the OS is to provide printf console and
logging support, and trap handling and decoding.

When the OS calls OPAL_REGISTER_OS_OPS, skiboot patches its traps back
in, for the OS to handle. It sets the console to raw mode (introduced in
an earlier patch) and no longer uses its console facilities.  It calls
the OS to log messages.

Other services may be added to the size-versioned structure.

The benefit of this is that 1) OPAL messages get into kernel log
facilities, and 2) Linux's path to the console is much shorter, with
no locks or buffering between the kernel and the uart MMIO. Things like
xmon and BUG printing are careful to be as reliable as possible, I've
encountered times when the skiboot console code trips us up here.

Other OS facilities that may be useful:

- Sleep facilities, which could abstract away highly implementation
  specific idle sleep, and expose calls for a simple generic OPAL
  driver.

- ftrace and/or kprobes facilities.

- perf interrupts.

- Lockup watchdog interrupts.

- Timer facilities. Skiboot could request an OS timer is scheduled which
  then makes an OPAL call with an opaque pointer which skiboot can then
  use to run functions asynchronously, for example.

- Interrupt enable... possibly not, we'd rather keep calls as short as
  possible. Except perhaps for watchog and perf interrupts.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 core/console-log.c      |  9 ++++++++
 core/console.c          | 11 ++++++++++
 core/fast-reboot.c      |  7 +++++-
 core/init.c             |  4 +---
 core/opal.c             | 48 +++++++++++++++++++++++++++++++++++++++++
 include/mem_region.h    |  1 +
 include/opal-api.h      |  7 +++++-
 include/opal-internal.h |  9 ++++++++
 8 files changed, 91 insertions(+), 5 deletions(-)

Comments

Gautham R Shenoy May 6, 2020, 5:37 a.m. UTC | #1
Hello Nicholas,

On Sat, May 02, 2020 at 09:36:48PM +1000, Nicholas Piggin wrote:
> This completes OPAL V4, where the OS can provide servies to OPAL.
> 
> To start with for this patch, the OS is to provide printf console and
> logging support, and trap handling and decoding.
> 
> When the OS calls OPAL_REGISTER_OS_OPS, skiboot patches its traps back
> in, for the OS to handle. It sets the console to raw mode (introduced in
> an earlier patch) and no longer uses its console facilities.  It calls
> the OS to log messages.
> 
> Other services may be added to the size-versioned structure.
> 
> The benefit of this is that 1) OPAL messages get into kernel log
> facilities, and 2) Linux's path to the console is much shorter, with
> no locks or buffering between the kernel and the uart MMIO. Things like
> xmon and BUG printing are careful to be as reliable as possible, I've
> encountered times when the skiboot console code trips us up here.
> 
> Other OS facilities that may be useful:
> 
> - Sleep facilities, which could abstract away highly implementation
>   specific idle sleep, and expose calls for a simple generic OPAL
>   driver.
> 
> - ftrace and/or kprobes facilities.
> 
> - perf interrupts.
> 
> - Lockup watchdog interrupts.
> 
> - Timer facilities. Skiboot could request an OS timer is scheduled which
>   then makes an OPAL call with an opaque pointer which skiboot can then
>   use to run functions asynchronously, for example.
> 
> - Interrupt enable... possibly not, we'd rather keep calls as short as
>   possible. Except perhaps for watchog and perf interrupts.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[..snip..]

> diff --git a/core/opal.c b/core/opal.c
> index 7fb4fae86..5454cac67 100644
> --- a/core/opal.c
> +++ b/core/opal.c
> @@ -23,6 +23,7 @@
>  #include <elf-abi.h>
>  #include <errorlog.h>
>  #include <occ.h>
> +#include <mem_region.h>
> 
>  /* Pending events to signal via opal_poll_events */
>  uint64_t opal_pending_events;
> @@ -471,6 +472,53 @@ out:
>  	return ret;
>  }
> 
> +bool opal_v4_os = false;
> +struct os_ops os_ops;
> +
> +static int64_t opal_register_os_ops(struct opal_os_ops *ops, uint64_t size)
> +{
> +	struct cpu_thread *cpu;
> +
> +	if (size < 8)
> +		return OPAL_PARAMETER;
> +
> +	for_each_cpu(cpu) {
> +		if (cpu == this_cpu())
> +			continue;
> +		if (cpu->state == cpu_state_os)
> +			return OPAL_BUSY;
> +	}
> +

Is this check to ensure that the OS calls register_os_ops() before the
secondaries are woken up? If so, when we introduce a os_stop()
callback, it should be registered prior to idle_init() in the Kernel.



> +	if (opal_v4_os)
> +		return OPAL_WRONG_STATE;
> +
> +	if (!verify_romem()) {
> +		prlog(PR_NOTICE, "OPAL checksums did not match\n");
> +		disable_fast_reboot("Inconsistent firmware romem checksum");
> +		return OPAL_WRONG_STATE;
> +	}
> +
> +	/* v4 must handle OPAL traps */
> +	patch_traps(true);
> +
> +	if (size >= 8) {
> +		os_ops.os_printf = (void *)be64_to_cpu(ops->os_printf);
> +		set_opal_console_to_raw();

At this point, we are invoking set_opal_raw_console() only called for
mambo_opal_raw_con, but not for uart_opal_raw_con. Do we need that if
we want to run this series on a P9 machine ?

> +	}
> +
> +	checksum_romem();
> +
> +	opal_v4_os = true;
> +
> +	return OPAL_SUCCESS;
> +}
> +opal_call(OPAL_REGISTER_OS_OPS, opal_register_os_ops, 2);
> +
> +void os_printf(uint32_t log_level, const char *str)
> +{
> +	os_ops.os_printf(log_level, str);
> +}
> +
>  void add_opal_node(void)
>  {
>  	uint64_t base, entry, size;
> diff --git a/include/mem_region.h b/include/mem_region.h
> index 47c3bd70c..f56eef2d8 100644
> --- a/include/mem_region.h
> +++ b/include/mem_region.h
> @@ -73,6 +73,7 @@ struct mem_region *find_mem_region(const char *name);
>  bool mem_range_is_reserved(uint64_t start, uint64_t size);
> 
>  /* Read-only memory checksum */
> +void checksum_romem(void);
>  bool verify_romem(void);
> 
>  #endif /* __MEMORY_REGION */
> diff --git a/include/opal-api.h b/include/opal-api.h
> index e5142f121..139dc1d43 100644
> --- a/include/opal-api.h
> +++ b/include/opal-api.h
> @@ -231,7 +231,8 @@
>  #define OPAL_SYM_TO_ADDR			182
>  #define OPAL_REPORT_TRAP			183
>  #define OPAL_FIND_VM_AREA			184
> -#define OPAL_LAST				184
> +#define OPAL_REGISTER_OS_OPS			185
> +#define OPAL_LAST				185
> 
>  #define QUIESCE_HOLD			1 /* Spin all calls at entry */
>  #define QUIESCE_REJECT			2 /* Fail all calls with OPAL_BUSY */
> @@ -1275,6 +1276,10 @@ struct opal_vm_area {
>  	__be64	vm_flags;
>  };
> 
> +struct opal_os_ops {
> +        __be64  os_printf;      /* void printf(int32_t level, const char *str) */
> +};
> +
>  #endif /* __ASSEMBLY__ */
> 
>  #endif /* __OPAL_API_H */
> diff --git a/include/opal-internal.h b/include/opal-internal.h
> index f6ca7ac32..64f372489 100644
> --- a/include/opal-internal.h
> +++ b/include/opal-internal.h
> @@ -18,6 +18,15 @@ struct opal_table_entry {
>  	u32	nargs;
>  };
> 
> +struct os_ops {
> +        void (*os_printf)(uint32_t log_level, const char *str);
> +};
> +
> +extern bool opal_v4_os;
> +extern struct os_ops os_ops;
> +
> +extern void os_printf(uint32_t log_level, const char *str);
> +
>  #ifdef __CHECKER__
>  #define __opal_func_test_arg(__func, __nargs) 0
>  #else
> -- 
> 2.23.0
> 
> _______________________________________________
> Skiboot mailing list
> Skiboot@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot
Nicholas Piggin May 6, 2020, 7:18 a.m. UTC | #2
Excerpts from Gautham R Shenoy's message of May 6, 2020 3:37 pm:
> Hello Nicholas,
> 
> On Sat, May 02, 2020 at 09:36:48PM +1000, Nicholas Piggin wrote:
>> +static int64_t opal_register_os_ops(struct opal_os_ops *ops, uint64_t size)
>> +{
>> +	struct cpu_thread *cpu;
>> +
>> +	if (size < 8)
>> +		return OPAL_PARAMETER;
>> +
>> +	for_each_cpu(cpu) {
>> +		if (cpu == this_cpu())
>> +			continue;
>> +		if (cpu->state == cpu_state_os)
>> +			return OPAL_BUSY;
>> +	}
>> +
> 
> Is this check to ensure that the OS calls register_os_ops() before the
> secondaries are woken up?

Yes, although I'm not sure if we'd like to allow some CPUs to remain in 
skiboot. Possibly an alternative is to disallow waking more secondaries
after we set to v4.

We could let the OS synchronise this itself, but it seemed like a good 
idea for OPAL to enforce it.

> If so, when we introduce a os_stop()
> callback, it should be registered prior to idle_init() in the Kernel.

Yes I guess so.

>> +	if (opal_v4_os)
>> +		return OPAL_WRONG_STATE;
>> +
>> +	if (!verify_romem()) {
>> +		prlog(PR_NOTICE, "OPAL checksums did not match\n");
>> +		disable_fast_reboot("Inconsistent firmware romem checksum");
>> +		return OPAL_WRONG_STATE;
>> +	}
>> +
>> +	/* v4 must handle OPAL traps */
>> +	patch_traps(true);
>> +
>> +	if (size >= 8) {
>> +		os_ops.os_printf = (void *)be64_to_cpu(ops->os_printf);
>> +		set_opal_console_to_raw();
> 
> At this point, we are invoking set_opal_raw_console() only called for
> mambo_opal_raw_con, but not for uart_opal_raw_con. Do we need that if
> we want to run this series on a P9 machine ?

Hmm, good catch. I actually had it running on a P9 with lpc uart, but
possibly didn't test printing very well.

I'm not sure what to do if the driver doesn't support raw mode. Just 
disallow the register call I suppose, it's easy enough to add them.

Thanks,
Nick
Gautham R Shenoy May 6, 2020, 10 a.m. UTC | #3
On Wed, May 06, 2020 at 05:18:54PM +1000, Nicholas Piggin wrote:
> Excerpts from Gautham R Shenoy's message of May 6, 2020 3:37 pm:
> > Hello Nicholas,
> > 
> > On Sat, May 02, 2020 at 09:36:48PM +1000, Nicholas Piggin wrote:
> >> +static int64_t opal_register_os_ops(struct opal_os_ops *ops, uint64_t size)
> >> +{
> >> +	struct cpu_thread *cpu;
> >> +
> >> +	if (size < 8)
> >> +		return OPAL_PARAMETER;
> >> +
> >> +	for_each_cpu(cpu) {
> >> +		if (cpu == this_cpu())
> >> +			continue;
> >> +		if (cpu->state == cpu_state_os)
> >> +			return OPAL_BUSY;
> >> +	}
> >> +
> > 
> > Is this check to ensure that the OS calls register_os_ops() before the
> > secondaries are woken up?
> 
> Yes, although I'm not sure if we'd like to allow some CPUs to remain in 
> skiboot. Possibly an alternative is to disallow waking more secondaries
> after we set to v4.
> 
> We could let the OS synchronise this itself, but it seemed like a good 
> idea for OPAL to enforce it.
> 
> > If so, when we introduce a os_stop()
> > callback, it should be registered prior to idle_init() in the Kernel.
> 
> Yes I guess so.

Ok. That shouldn't be a problem. Will try that out.

> 
> >> +	if (opal_v4_os)
> >> +		return OPAL_WRONG_STATE;
> >> +
> >> +	if (!verify_romem()) {
> >> +		prlog(PR_NOTICE, "OPAL checksums did not match\n");
> >> +		disable_fast_reboot("Inconsistent firmware romem checksum");
> >> +		return OPAL_WRONG_STATE;
> >> +	}
> >> +
> >> +	/* v4 must handle OPAL traps */
> >> +	patch_traps(true);
> >> +
> >> +	if (size >= 8) {
> >> +		os_ops.os_printf = (void *)be64_to_cpu(ops->os_printf);
> >> +		set_opal_console_to_raw();
> > 
> > At this point, we are invoking set_opal_raw_console() only called for
> > mambo_opal_raw_con, but not for uart_opal_raw_con. Do we need that if
> > we want to run this series on a P9 machine ?
> 
> Hmm, good catch. I actually had it running on a P9 with lpc uart, but
> possibly didn't test printing very well.
> 
> I'm not sure what to do if the driver doesn't support raw mode. Just 
> disallow the register call I suppose, it's easy enough to add them.

Would it be ok to disallow os_printf alone instead of failing the
register call? That way we can still leverage the ability to run OPAL
with mmu_on, since vm_map() and vm_unmap() callbacks would still be
functional.



> 
> Thanks,
> Nick

--
Thanks and Regards
gautham.
diff mbox series

Patch

diff --git a/core/console-log.c b/core/console-log.c
index 21a1442bd..d73913894 100644
--- a/core/console-log.c
+++ b/core/console-log.c
@@ -8,6 +8,7 @@ 
  * Copyright 2013-2018 IBM Corp.
  */
 
+#include <opal.h>
 #include "skiboot.h"
 #include "unistd.h"
 #include "stdio.h"
@@ -15,6 +16,8 @@ 
 #include "timebase.h"
 #include <debug_descriptor.h>
 
+extern void os_printf(uint32_t log_level, const char *str);
+
 static int vprlog(int log_level, const char *fmt, va_list ap)
 {
 	int count;
@@ -32,6 +35,12 @@  static int vprlog(int log_level, const char *fmt, va_list ap)
 	if (log_level > (debug_descriptor.console_log_levels >> 4))
 		return 0;
 
+	if (opal_v4_os) {
+		count = vsnprintf(buffer, sizeof(buffer), fmt, ap);
+		os_printf(log_level, buffer);
+		return count;
+	}
+
 	count = snprintf(buffer, sizeof(buffer), "[%5lu.%09lu,%d] ",
 			 tb_to_secs(tb), tb_remaining_nsecs(tb), log_level);
 	count+= vsnprintf(buffer+count, sizeof(buffer)-count, fmt, ap);
diff --git a/core/console.c b/core/console.c
index 336dda7a1..9c882c6a4 100644
--- a/core/console.c
+++ b/core/console.c
@@ -259,8 +259,19 @@  ssize_t console_write(bool flush_to_drivers, const void *buf, size_t count)
 	return count;
 }
 
+extern void os_printf(uint32_t log_level, const char *str);
+
 ssize_t write(int fd __unused, const void *buf, size_t count)
 {
+	if (opal_v4_os) {
+		char *b = malloc(count+1);
+		memcpy(b, buf, count);
+		b[count] = '\0';
+		os_printf(4, b);
+		free(b);
+		return count;
+	}
+
 	return console_write(true, buf, count);
 }
 
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index e7f3b5c67..fc5dd8bf4 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -334,7 +334,12 @@  void __noreturn fast_reboot_entry(void)
 		/* Restore skiboot vectors */
 		copy_exception_vectors();
 		copy_sreset_vector();
-		patch_traps(true);
+		if (opal_v4_os) {
+			set_opal_console_to_cooked();
+			opal_v4_os = false;
+		} else {
+			patch_traps(true);
+		}
 	}
 
 	/* Must wait for others to because shared SPRs like HID0 are only set
diff --git a/core/init.c b/core/init.c
index 95c0339cf..81672b0d4 100644
--- a/core/init.c
+++ b/core/init.c
@@ -86,8 +86,6 @@  struct debug_descriptor debug_descriptor = {
 #endif
 };
 
-static void checksum_romem(void);
-
 static bool try_load_elf64_le(struct elf_hdr *header)
 {
 	struct elf64le_hdr *kh = (struct elf64le_hdr *)header;
@@ -1028,7 +1026,7 @@  static uint32_t mem_csum(void *_p, void *_e)
 
 static uint32_t romem_csum;
 
-static void checksum_romem(void)
+void checksum_romem(void)
 {
 	uint32_t csum;
 
diff --git a/core/opal.c b/core/opal.c
index 7fb4fae86..5454cac67 100644
--- a/core/opal.c
+++ b/core/opal.c
@@ -23,6 +23,7 @@ 
 #include <elf-abi.h>
 #include <errorlog.h>
 #include <occ.h>
+#include <mem_region.h>
 
 /* Pending events to signal via opal_poll_events */
 uint64_t opal_pending_events;
@@ -471,6 +472,53 @@  out:
 	return ret;
 }
 
+bool opal_v4_os = false;
+struct os_ops os_ops;
+
+static int64_t opal_register_os_ops(struct opal_os_ops *ops, uint64_t size)
+{
+	struct cpu_thread *cpu;
+
+	if (size < 8)
+		return OPAL_PARAMETER;
+
+	for_each_cpu(cpu) {
+		if (cpu == this_cpu())
+			continue;
+		if (cpu->state == cpu_state_os)
+			return OPAL_BUSY;
+	}
+
+	if (opal_v4_os)
+		return OPAL_WRONG_STATE;
+
+	if (!verify_romem()) {
+		prlog(PR_NOTICE, "OPAL checksums did not match\n");
+		disable_fast_reboot("Inconsistent firmware romem checksum");
+		return OPAL_WRONG_STATE;
+	}
+
+	/* v4 must handle OPAL traps */
+	patch_traps(true);
+
+	if (size >= 8) {
+		os_ops.os_printf = (void *)be64_to_cpu(ops->os_printf);
+		set_opal_console_to_raw();
+	}
+
+	checksum_romem();
+
+	opal_v4_os = true;
+
+	return OPAL_SUCCESS;
+}
+opal_call(OPAL_REGISTER_OS_OPS, opal_register_os_ops, 2);
+
+void os_printf(uint32_t log_level, const char *str)
+{
+	os_ops.os_printf(log_level, str);
+}
+
 void add_opal_node(void)
 {
 	uint64_t base, entry, size;
diff --git a/include/mem_region.h b/include/mem_region.h
index 47c3bd70c..f56eef2d8 100644
--- a/include/mem_region.h
+++ b/include/mem_region.h
@@ -73,6 +73,7 @@  struct mem_region *find_mem_region(const char *name);
 bool mem_range_is_reserved(uint64_t start, uint64_t size);
 
 /* Read-only memory checksum */
+void checksum_romem(void);
 bool verify_romem(void);
 
 #endif /* __MEMORY_REGION */
diff --git a/include/opal-api.h b/include/opal-api.h
index e5142f121..139dc1d43 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -231,7 +231,8 @@ 
 #define OPAL_SYM_TO_ADDR			182
 #define OPAL_REPORT_TRAP			183
 #define OPAL_FIND_VM_AREA			184
-#define OPAL_LAST				184
+#define OPAL_REGISTER_OS_OPS			185
+#define OPAL_LAST				185
 
 #define QUIESCE_HOLD			1 /* Spin all calls at entry */
 #define QUIESCE_REJECT			2 /* Fail all calls with OPAL_BUSY */
@@ -1275,6 +1276,10 @@  struct opal_vm_area {
 	__be64	vm_flags;
 };
 
+struct opal_os_ops {
+        __be64  os_printf;      /* void printf(int32_t level, const char *str) */
+};
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __OPAL_API_H */
diff --git a/include/opal-internal.h b/include/opal-internal.h
index f6ca7ac32..64f372489 100644
--- a/include/opal-internal.h
+++ b/include/opal-internal.h
@@ -18,6 +18,15 @@  struct opal_table_entry {
 	u32	nargs;
 };
 
+struct os_ops {
+        void (*os_printf)(uint32_t log_level, const char *str);
+};
+
+extern bool opal_v4_os;
+extern struct os_ops os_ops;
+
+extern void os_printf(uint32_t log_level, const char *str);
+
 #ifdef __CHECKER__
 #define __opal_func_test_arg(__func, __nargs) 0
 #else