diff mbox series

[1/4] powerpc/kvm: Move kvm_tmp into .text, shrink to 64K

Message ID 20190911115746.12433-1-mpe@ellerman.id.au (mailing list archive)
State Accepted
Commit 0cb0837f9db1a6ed5b764ef61dd5f1a314b8231a
Headers show
Series [1/4] powerpc/kvm: Move kvm_tmp into .text, shrink to 64K | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch next (c317052c95bef1f977b023158e5aa929215f443d)
snowpatch_ozlabs/checkpatch warning total: 0 errors, 2 warnings, 0 checks, 66 lines checked

Commit Message

Michael Ellerman Sept. 11, 2019, 11:57 a.m. UTC
In some configurations of KVM, guests binary patch themselves to
avoid/reduce trapping into the hypervisor. For some instructions this
requires replacing one instruction with a sequence of instructions.

For those cases we need to write the sequence of instructions
somewhere and then patch the location of the original instruction to
branch to the sequence. That requires that the location of the
sequence be within 32MB of the original instruction.

The current solution for this is that we create a 1MB array in BSS,
write sequences into there, and then free the remainder of the array.

This has a few problems:
 - it confuses kmemleak.
 - it confuses lockdep.
 - it requires mapping kvm_tmp executable, which can cause adjacent
   areas to also be mapped executable if we're using 16M pages for the
   linear mapping.
 - the 32MB limit can be exceeded if the kernel is big enough,
   especially with STRICT_KERNEL_RWX enabled, which then prevents the
   patching from working at all.

We can fix all those problems by making kvm_tmp just a region of
regular .text. However currently it's 1MB in size, and we don't want
to waste 1MB of text. In practice however I only see ~30KB of kvm_tmp
being used even for an allyes_config. So shrink kvm_tmp to 64K, which
ought to be enough for everyone, and move it into .text.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/kvm.c      | 24 +++++-------------------
 arch/powerpc/kernel/kvm_emul.S |  8 ++++++++
 2 files changed, 13 insertions(+), 19 deletions(-)

Comments

Michael Ellerman Sept. 19, 2019, 10:25 a.m. UTC | #1
On Wed, 2019-09-11 at 11:57:43 UTC, Michael Ellerman wrote:
> In some configurations of KVM, guests binary patch themselves to
> avoid/reduce trapping into the hypervisor. For some instructions this
> requires replacing one instruction with a sequence of instructions.
> 
> For those cases we need to write the sequence of instructions
> somewhere and then patch the location of the original instruction to
> branch to the sequence. That requires that the location of the
> sequence be within 32MB of the original instruction.
> 
> The current solution for this is that we create a 1MB array in BSS,
> write sequences into there, and then free the remainder of the array.
> 
> This has a few problems:
>  - it confuses kmemleak.
>  - it confuses lockdep.
>  - it requires mapping kvm_tmp executable, which can cause adjacent
>    areas to also be mapped executable if we're using 16M pages for the
>    linear mapping.
>  - the 32MB limit can be exceeded if the kernel is big enough,
>    especially with STRICT_KERNEL_RWX enabled, which then prevents the
>    patching from working at all.
> 
> We can fix all those problems by making kvm_tmp just a region of
> regular .text. However currently it's 1MB in size, and we don't want
> to waste 1MB of text. In practice however I only see ~30KB of kvm_tmp
> being used even for an allyes_config. So shrink kvm_tmp to 64K, which
> ought to be enough for everyone, and move it into .text.
> 
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

Series applied to powerpc next.

https://git.kernel.org/powerpc/c/0cb0837f9db1a6ed5b764ef61dd5f1a314b8231a

cheers
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index b7b3a5e4e224..e3b5aa583319 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -64,7 +64,8 @@ 
 #define KVM_INST_MTSRIN		0x7c0001e4
 
 static bool kvm_patching_worked = true;
-char kvm_tmp[1024 * 1024];
+extern char kvm_tmp[];
+extern char kvm_tmp_end[];
 static int kvm_tmp_index;
 
 static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
@@ -132,7 +133,7 @@  static u32 *kvm_alloc(int len)
 {
 	u32 *p;
 
-	if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
+	if ((kvm_tmp_index + len) > (kvm_tmp_end - kvm_tmp)) {
 		printk(KERN_ERR "KVM: No more space (%d + %d)\n",
 				kvm_tmp_index, len);
 		kvm_patching_worked = false;
@@ -699,25 +700,13 @@  static void kvm_use_magic_page(void)
 			 kvm_patching_worked ? "worked" : "failed");
 }
 
-static __init void kvm_free_tmp(void)
-{
-	/*
-	 * Inform kmemleak about the hole in the .bss section since the
-	 * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y.
-	 */
-	kmemleak_free_part(&kvm_tmp[kvm_tmp_index],
-			   ARRAY_SIZE(kvm_tmp) - kvm_tmp_index);
-	free_reserved_area(&kvm_tmp[kvm_tmp_index],
-			   &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
-}
-
 static int __init kvm_guest_init(void)
 {
 	if (!kvm_para_available())
-		goto free_tmp;
+		return 0;
 
 	if (!epapr_paravirt_enabled)
-		goto free_tmp;
+		return 0;
 
 	if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
 		kvm_use_magic_page();
@@ -727,9 +716,6 @@  static int __init kvm_guest_init(void)
 	powersave_nap = 1;
 #endif
 
-free_tmp:
-	kvm_free_tmp();
-
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index eb2568f583ae..9dd17dce10a1 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -334,5 +334,13 @@ 
 kvm_emulate_mtsrin_len:
 	.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
 
+	.balign 4
+	.global kvm_tmp
+kvm_tmp:
+	.space	(64 * 1024)
+
+.global kvm_tmp_end
+kvm_tmp_end:
+
 .global kvm_template_end
 kvm_template_end: