diff mbox series

[RFC,9/9] OPAL V4: local vm_map/unmap operations

Message ID 20200502113649.176329-10-npiggin@gmail.com
State New
Headers show
Series OPAL V4 | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning Failed to apply on branch master (0f1937ef40fca0c3212a9dff1010b832a24fb063)
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Nicholas Piggin May 2, 2020, 11:36 a.m. UTC
This implements vm_map/unmap API that an OS can provide. The per-CPU
areas are advertised by OPAL_FIND_VM_AREA so page tables can be allocated
ahead of time.

The ops must not sleep or cause IPIs.

This allows skiboot to run entirely in virtual mode and not have to
drop to real mode to cope with vm_map().

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 core/console-log.c      |  2 +-
 core/opal.c             | 37 ++++++++++++++++++++++
 core/vm.c               | 70 ++++++++++++++++++++++++++++++++++-------
 include/config.h        |  1 +
 include/opal-api.h      |  3 ++
 include/opal-internal.h |  4 +++
 include/skiboot.h       |  1 +
 7 files changed, 105 insertions(+), 13 deletions(-)

Comments

Gautham R Shenoy May 6, 2020, 5:53 a.m. UTC | #1
Hello Nicholas,

On Sat, May 02, 2020 at 09:36:49PM +1000, Nicholas Piggin wrote:
> This implements vm_map/unmap API that an OS can provide. The per-CPU
> areas are advertised by OPAL_FIND_VM_AREA so page tables can be allocated
> ahead of time.
> 
> The ops must not sleep or cause IPIs.
> 
> This allows skiboot to run entirely in virtual mode and not have to
> drop to real mode to cope with vm_map().
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>


[..snip..]


> @@ -519,6 +525,37 @@ void os_printf(uint32_t log_level, const char *str)
>  	os_ops.os_printf(log_level, str);
>  }
> 
> +int64_t os_vm_map(uint64_t ea, uint64_t pa, uint64_t flags)
> +{
> +	struct cpu_thread *cpu = this_cpu();
> +	uint64_t msr = mfmsr();
> +	int64_t ret;
> +
> +	if (msr != cpu->opal_call_msr)
> +		mtmsrd(cpu->opal_call_msr, 0);
> +
> +	ret = os_ops.os_vm_map(ea, pa, flags);
> +
> +	if (msr != cpu->opal_call_msr)
> +		mtmsrd(cpu->opal_call_msr, 0);

Should this be mtmsrd(msr, 0) , so that we return with the msr value
that was set by someone up in the call stack ?



> +
> +	return ret;
> +}
> +
> +void os_vm_unmap(uint64_t ea)
> +{
> +	struct cpu_thread *cpu = this_cpu();
> +	uint64_t msr = mfmsr();
> +
> +	if (msr != cpu->opal_call_msr)
> +		mtmsrd(cpu->opal_call_msr, 0);
> +
> +	os_ops.os_vm_unmap(ea);
> +
> +	if (msr != cpu->opal_call_msr)
> +		mtmsrd(cpu->opal_call_msr, 0);

Ditto ?

> +}
> +
>  void add_opal_node(void)
>  {
>  	uint64_t base, entry, size;
> diff --git a/core/vm.c b/core/vm.c
> index b2830fccf..2ecfaeb44 100644

[..snip..]

--
Thanks and Regards
gautham.
Nicholas Piggin May 6, 2020, 7:22 a.m. UTC | #2
Excerpts from Gautham R Shenoy's message of May 6, 2020 3:53 pm:
> Hello Nicholas,
> 
> On Sat, May 02, 2020 at 09:36:49PM +1000, Nicholas Piggin wrote:
>> This implements vm_map/unmap API that an OS can provide. The per-CPU
>> areas are advertised by OPAL_FIND_VM_AREA so page tables can be allocated
>> ahead of time.
>> 
>> The ops must not sleep or cause IPIs.
>> 
>> This allows skiboot to run entirely in virtual mode and not have to
>> drop to real mode to cope with vm_map().
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> 
> 
> [..snip..]
> 
> 
>> @@ -519,6 +525,37 @@ void os_printf(uint32_t log_level, const char *str)
>>  	os_ops.os_printf(log_level, str);
>>  }
>> 
>> +int64_t os_vm_map(uint64_t ea, uint64_t pa, uint64_t flags)
>> +{
>> +	struct cpu_thread *cpu = this_cpu();
>> +	uint64_t msr = mfmsr();
>> +	int64_t ret;
>> +
>> +	if (msr != cpu->opal_call_msr)
>> +		mtmsrd(cpu->opal_call_msr, 0);
>> +
>> +	ret = os_ops.os_vm_map(ea, pa, flags);
>> +
>> +	if (msr != cpu->opal_call_msr)
>> +		mtmsrd(cpu->opal_call_msr, 0);
> 
> Should this be mtmsrd(msr, 0) , so that we return with the msr value
> that was set by someone up in the call stack ?

Yeah good catch. os_printf needs this as well I think.

Thanks,
Nick
diff mbox series

Patch

diff --git a/core/console-log.c b/core/console-log.c
index d73913894..2899c1c56 100644
--- a/core/console-log.c
+++ b/core/console-log.c
@@ -35,7 +35,7 @@  static int vprlog(int log_level, const char *fmt, va_list ap)
 	if (log_level > (debug_descriptor.console_log_levels >> 4))
 		return 0;
 
-	if (opal_v4_os) {
+	if (os_ops.os_printf) {
 		count = vsnprintf(buffer, sizeof(buffer), fmt, ap);
 		os_printf(log_level, buffer);
 		return count;
diff --git a/core/opal.c b/core/opal.c
index 5454cac67..bb71ac374 100644
--- a/core/opal.c
+++ b/core/opal.c
@@ -506,6 +506,12 @@  static int64_t opal_register_os_ops(struct opal_os_ops *ops, uint64_t size)
 		set_opal_console_to_raw();
 	}
 
+	if (size >= 24) {
+		os_ops.os_vm_map = (void *)be64_to_cpu(ops->os_vm_map);
+		os_ops.os_vm_unmap = (void *)be64_to_cpu(ops->os_vm_unmap);
+		vm_resurrect();
+	}
+
 	checksum_romem();
 
 	opal_v4_os = true;
@@ -519,6 +525,37 @@  void os_printf(uint32_t log_level, const char *str)
 	os_ops.os_printf(log_level, str);
 }
 
+int64_t os_vm_map(uint64_t ea, uint64_t pa, uint64_t flags)
+{
+	struct cpu_thread *cpu = this_cpu();
+	uint64_t msr = mfmsr();
+	int64_t ret;
+
+	if (msr != cpu->opal_call_msr)
+		mtmsrd(cpu->opal_call_msr, 0);
+
+	ret = os_ops.os_vm_map(ea, pa, flags);
+
+	if (msr != cpu->opal_call_msr)
+		mtmsrd(cpu->opal_call_msr, 0);
+
+	return ret;
+}
+
+void os_vm_unmap(uint64_t ea)
+{
+	struct cpu_thread *cpu = this_cpu();
+	uint64_t msr = mfmsr();
+
+	if (msr != cpu->opal_call_msr)
+		mtmsrd(cpu->opal_call_msr, 0);
+
+	os_ops.os_vm_unmap(ea);
+
+	if (msr != cpu->opal_call_msr)
+		mtmsrd(cpu->opal_call_msr, 0);
+}
+
 void add_opal_node(void)
 {
 	uint64_t base, entry, size;
diff --git a/core/vm.c b/core/vm.c
index b2830fccf..2ecfaeb44 100644
--- a/core/vm.c
+++ b/core/vm.c
@@ -35,6 +35,7 @@  static bool vm_globals_allocated = false;
 #define LOCAL_SLB_BASE		GLOBAL_SLB_NR
 
 #define LOCAL_EA_PERCPU		(SLB_SZ)
+#define OS_LOCAL_EA_PERCPU	(2ULL*1024*1024)
 #define LOCAL_EA_BEGIN		0x0008000000000000ULL
 #define LOCAL_EA_END		0x0009000000000000ULL
 
@@ -485,13 +486,16 @@  void *vm_map(unsigned long addr, unsigned long len, bool rw)
 	addr &= ~(PAGE_SIZE - 1);
 	len = end - addr;
 
-	assert(len <= LOCAL_EA_PERCPU);
+	if (cpu_in_os())
+		assert(len <= OS_LOCAL_EA_PERCPU);
+	else
+		assert(len <= LOCAL_EA_PERCPU);
 
 	/* Can't do nested mappings */
 	assert(!c->vm_local_map_inuse);
 	c->vm_local_map_inuse = true;
 
-	if (cpu_in_os() && c->vm_setup) {
+	if (cpu_in_os() && c->vm_setup && !os_ops.os_vm_map) {
 		assert(c->opal_call_msr & (MSR_IR|MSR_DR));
 		newaddr = addr;
 		mtmsr(c->opal_call_msr & ~MSR_DR);
@@ -501,9 +505,10 @@  void *vm_map(unsigned long addr, unsigned long len, bool rw)
 	} else {
 		struct vm_map *new = &c->vm_local_map;
 
-		assert(!cpu_in_os());
-
-		newaddr = LOCAL_EA_BEGIN + LOCAL_EA_PERCPU * c->pir;
+		if (cpu_in_os())
+			newaddr = LOCAL_EA_BEGIN + OS_LOCAL_EA_PERCPU * c->pir;
+		else
+			newaddr = LOCAL_EA_BEGIN + LOCAL_EA_PERCPU * c->pir;
 
 		new->name = "local";
 		new->address = newaddr;
@@ -513,6 +518,20 @@  void *vm_map(unsigned long addr, unsigned long len, bool rw)
 		new->writeable = rw;
 		new->executable = false;
 		new->ci = false;
+
+		if (cpu_in_os()) {
+			uint64_t ea, pa, flags;
+
+			flags = vmm_os_map_flags(new);
+
+			pa = new->pa;
+			ea = newaddr;
+			while (ea < newaddr + len) {
+				os_vm_map(ea, pa, flags);
+				ea += OS_PAGE_SIZE;
+				pa += OS_PAGE_SIZE;
+			}
+		}
 	}
 
 	return (void *)newaddr + offset;
@@ -528,12 +547,15 @@  void vm_unmap(unsigned long addr, unsigned long len)
 	addr &= ~(PAGE_SIZE - 1);
 	len = end - addr;
 
-	assert(len <= LOCAL_EA_PERCPU);
+	if (cpu_in_os())
+		assert(len <= OS_LOCAL_EA_PERCPU);
+	else
+		assert(len <= LOCAL_EA_PERCPU);
 
 	assert(c->vm_local_map_inuse);
 	c->vm_local_map_inuse = false;
 
-	if (cpu_in_os() && (c->opal_call_msr & (MSR_IR|MSR_DR))) {
+	if (cpu_in_os() && (c->opal_call_msr & (MSR_IR|MSR_DR)) && !os_ops.os_vm_map) {
 		assert(!c->vm_setup);
 		c->vm_setup = true;
 		mtmsr(c->opal_call_msr);
@@ -543,9 +565,10 @@  void vm_unmap(unsigned long addr, unsigned long len)
 		struct vm_map *vmm;
 		unsigned long ea;
 
-		assert(!cpu_in_os());
-
-		newaddr = LOCAL_EA_BEGIN + LOCAL_EA_PERCPU * c->pir;
+		if (cpu_in_os())
+			newaddr = LOCAL_EA_BEGIN + OS_LOCAL_EA_PERCPU * c->pir;
+		else
+			newaddr = LOCAL_EA_BEGIN + LOCAL_EA_PERCPU * c->pir;
 
 		vmm = &c->vm_local_map;
 		assert(newaddr == vmm->address);
@@ -554,8 +577,13 @@  void vm_unmap(unsigned long addr, unsigned long len)
 
 		ea = newaddr;
 		while (ea < newaddr + len) {
-			htab_remove(ea, true);
-			ea += PAGE_SIZE;
+			if (cpu_in_os()) {
+				os_vm_unmap(ea);
+				ea += OS_PAGE_SIZE;
+			} else {
+				htab_remove(ea, true);
+				ea += PAGE_SIZE;
+			}
 		}
 	}
 }
@@ -987,6 +1015,24 @@  static int64_t opal_find_vm_area(uint64_t addr, struct opal_vm_area *opal_vm_are
 		return OPAL_SUCCESS;
 	}
 
+	if (addr < LOCAL_EA_BEGIN) {
+		opal_vm_area->address = cpu_to_be64(LOCAL_EA_BEGIN);
+		opal_vm_area->length = cpu_to_be64(cpu_max_pir * OS_LOCAL_EA_PERCPU);
+		opal_vm_area->pa = 0;
+		opal_vm_area->vm_flags = 0;
+
+		printf("0x%016llx-0x%016llx flags=0 (per-cpu maps)\n", LOCAL_EA_BEGIN, LOCAL_EA_BEGIN + cpu_max_pir * OS_LOCAL_EA_PERCPU);
+
+		return OPAL_SUCCESS;
+	}
+
 	return OPAL_EMPTY;
 }
 opal_call(OPAL_FIND_VM_AREA, opal_find_vm_area, 2);
+
+void vm_resurrect(void)
+{
+	/* OS takes over control of providing VM mappings */
+	prlog(PR_NOTICE, "VMM: Rise from your grave!\n");
+	print_maps();
+}
diff --git a/include/config.h b/include/config.h
index fd9521fa2..70608d120 100644
--- a/include/config.h
+++ b/include/config.h
@@ -6,6 +6,7 @@ 
 
 /* Alignment to which skiboot lays out memory. */
 #define PAGE_SIZE	0x10000
+#define OS_PAGE_SIZE	0x10000
 
 #define HAVE_TYPEOF			1
 #define HAVE_BUILTIN_TYPES_COMPATIBLE_P	1
diff --git a/include/opal-api.h b/include/opal-api.h
index 139dc1d43..3c630f02e 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -1278,6 +1278,9 @@  struct opal_vm_area {
 
 struct opal_os_ops {
         __be64  os_printf;      /* void printf(int32_t level, const char *str) */
+        __be64  os_vm_map;      /* void os_vm_map(uint64_t ea, uint64_t pa, uint64_t flags) */
+        __be64  os_vm_unmap;    /* static void os_vm_unmap(uint64_t ea) */
+
 };
 
 #endif /* __ASSEMBLY__ */
diff --git a/include/opal-internal.h b/include/opal-internal.h
index 64f372489..af5216639 100644
--- a/include/opal-internal.h
+++ b/include/opal-internal.h
@@ -20,12 +20,16 @@  struct opal_table_entry {
 
 struct os_ops {
         void (*os_printf)(uint32_t log_level, const char *str);
+        int64_t (*os_vm_map)(uint64_t ea, uint64_t pa, uint64_t flags);
+        void (*os_vm_unmap)(uint64_t ea);
 };
 
 extern bool opal_v4_os;
 extern struct os_ops os_ops;
 
 extern void os_printf(uint32_t log_level, const char *str);
+extern int64_t os_vm_map(uint64_t ea, uint64_t pa, uint64_t flags);
+extern void os_vm_unmap(uint64_t ea);
 
 #ifdef __CHECKER__
 #define __opal_func_test_arg(__func, __nargs) 0
diff --git a/include/skiboot.h b/include/skiboot.h
index aacb425f7..5f0e3e0f7 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -362,5 +362,6 @@  bool vm_dslb(uint64_t nia, uint64_t dar);
 bool vm_islb(uint64_t nia);
 bool vm_dsi(uint64_t nia, uint64_t dar, uint32_t dsisr);
 bool vm_isi(uint64_t nia);
+void vm_resurrect(void);
 
 #endif /* __SKIBOOT_H */