[SRU,T,1/1] KVM: x86: fix singlestepping over syscall

Message ID 1531170993-3361-2-git-send-email-tyhicks@canonical.com
State New
Headers show
Series
  • CVE-2017-7518 - Incorrect debug exception emulation
Related show

Commit Message

Tyler Hicks July 9, 2018, 9:16 p.m.
From: Paolo Bonzini <pbonzini@redhat.com>

commit c8401dda2f0a00cd25c0af6a95ed50e478d25de4 upstream.

TF is handled a bit differently for syscall and sysret, compared
to the other instructions: TF is checked after the instruction completes,
so that the OS can disable #DB at a syscall by adding TF to FMASK.
When the sysret is executed the #DB is taken "as if" the syscall insn
just completed.

KVM emulates syscall so that it can trap 32-bit syscall on Intel processors.
Fix the behavior, otherwise you could get #DB on a user stack which is not
nice.  This does not affect Linux guests, as they use an IST or task gate
for #DB.

This fixes CVE-2017-7518.

Reported-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
[bwh: Backported to 3.16:
 - kvm_vcpu_check_singlestep() did not take an rflags parameter but
   called get_rflags() itself; delete that code
 - kvm_vcpu_check_singlestep() sets some flags differently
 - Drop changes to kvm_skip_emulated_instruction()]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>

CVE-2017-7518

(cherry picked from commit 24ee2a286de7ac680ad4b2423c2dcee68444e567 linux-stable)
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
---
 arch/x86/include/asm/kvm_emulate.h |  1 +
 arch/x86/kvm/emulate.c             |  1 +
 arch/x86/kvm/x86.c                 | 53 ++++++++++++++++----------------------
 3 files changed, 24 insertions(+), 31 deletions(-)

Comments

Kleber Souza July 26, 2018, 3:49 p.m. | #1
On 07/09/18 23:16, Tyler Hicks wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> commit c8401dda2f0a00cd25c0af6a95ed50e478d25de4 upstream.
> 
> TF is handled a bit differently for syscall and sysret, compared
> to the other instructions: TF is checked after the instruction completes,
> so that the OS can disable #DB at a syscall by adding TF to FMASK.
> When the sysret is executed the #DB is taken "as if" the syscall insn
> just completed.
> 
> KVM emulates syscall so that it can trap 32-bit syscall on Intel processors.
> Fix the behavior, otherwise you could get #DB on a user stack which is not
> nice.  This does not affect Linux guests, as they use an IST or task gate
> for #DB.
> 
> This fixes CVE-2017-7518.
> 
> Reported-by: Andy Lutomirski <luto@kernel.org>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
> [bwh: Backported to 3.16:
>  - kvm_vcpu_check_singlestep() did not take an rflags parameter but
>    called get_rflags() itself; delete that code
>  - kvm_vcpu_check_singlestep() sets some flags differently
>  - Drop changes to kvm_skip_emulated_instruction()]
> Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
> 
> CVE-2017-7518
> 
> (cherry picked from commit 24ee2a286de7ac680ad4b2423c2dcee68444e567 linux-stable)
> Signed-off-by: Tyler Hicks <tyhicks@canonical.com>

Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>

> ---
>  arch/x86/include/asm/kvm_emulate.h |  1 +
>  arch/x86/kvm/emulate.c             |  1 +
>  arch/x86/kvm/x86.c                 | 53 ++++++++++++++++----------------------
>  3 files changed, 24 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
> index 24ec1216596e..71a1399903cc 100644
> --- a/arch/x86/include/asm/kvm_emulate.h
> +++ b/arch/x86/include/asm/kvm_emulate.h
> @@ -275,6 +275,7 @@ struct x86_emulate_ctxt {
>  	bool guest_mode; /* guest running a nested guest */
>  	bool perm_ok; /* do not check permissions if true */
>  	bool ud;	/* inject an #UD if host doesn't support insn */
> +	bool tf;	/* TF value before instruction (after for syscall/sysret) */
>  
>  	bool have_exception;
>  	struct x86_exception exception;
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 29524529d3f5..fda743285155 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -2274,6 +2274,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
>  		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
>  	}
>  
> +	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
>  	return X86EMUL_CONTINUE;
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f1c15ad80219..4ec7f1ee6357 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4866,6 +4866,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
>  	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
>  
>  	ctxt->eflags = kvm_get_rflags(vcpu);
> +	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
> +
>  	ctxt->eip = kvm_rip_read(vcpu);
>  	ctxt->mode = (!is_protmode(vcpu))		? X86EMUL_MODE_REAL :
>  		     (ctxt->eflags & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
> @@ -5056,38 +5058,26 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
>  	return dr6;
>  }
>  
> -static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r)
> +static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
>  {
>  	struct kvm_run *kvm_run = vcpu->run;
>  
> -	/*
> -	 * Use the "raw" value to see if TF was passed to the processor.
> -	 * Note that the new value of the flags has not been saved yet.
> -	 *
> -	 * This is correct even for TF set by the guest, because "the
> -	 * processor will not generate this exception after the instruction
> -	 * that sets the TF flag".
> -	 */
> -	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
> -
> -	if (unlikely(rflags & X86_EFLAGS_TF)) {
> -		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
> -			kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
> -			kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
> -			kvm_run->debug.arch.exception = DB_VECTOR;
> -			kvm_run->exit_reason = KVM_EXIT_DEBUG;
> -			*r = EMULATE_USER_EXIT;
> -		} else {
> -			vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
> -			/*
> -			 * "Certain debug exceptions may clear bit 0-3.  The
> -			 * remaining contents of the DR6 register are never
> -			 * cleared by the processor".
> -			 */
> -			vcpu->arch.dr6 &= ~15;
> -			vcpu->arch.dr6 |= DR6_BS;
> -			kvm_queue_exception(vcpu, DB_VECTOR);
> -		}
> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
> +		kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
> +		kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
> +		kvm_run->debug.arch.exception = DB_VECTOR;
> +		kvm_run->exit_reason = KVM_EXIT_DEBUG;
> +		*r = EMULATE_USER_EXIT;
> +	} else {
> +		vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
> +		/*
> +		 * "Certain debug exceptions may clear bit 0-3.  The
> +		 * remaining contents of the DR6 register are never
> +		 * cleared by the processor".
> +		 */
> +		vcpu->arch.dr6 &= ~15;
> +		vcpu->arch.dr6 |= DR6_BS;
> +		kvm_queue_exception(vcpu, DB_VECTOR);
>  	}
>  }
>  
> @@ -5240,8 +5230,9 @@ restart:
>  		kvm_make_request(KVM_REQ_EVENT, vcpu);
>  		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
>  		kvm_rip_write(vcpu, ctxt->eip);
> -		if (r == EMULATE_DONE)
> -			kvm_vcpu_check_singlestep(vcpu, &r);
> +		if (r == EMULATE_DONE &&
> +		    (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
> +			kvm_vcpu_do_singlestep(vcpu, &r);
>  		kvm_set_rflags(vcpu, ctxt->eflags);
>  	} else
>  		vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
>

Patch

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 24ec1216596e..71a1399903cc 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -275,6 +275,7 @@  struct x86_emulate_ctxt {
 	bool guest_mode; /* guest running a nested guest */
 	bool perm_ok; /* do not check permissions if true */
 	bool ud;	/* inject an #UD if host doesn't support insn */
+	bool tf;	/* TF value before instruction (after for syscall/sysret) */
 
 	bool have_exception;
 	struct x86_exception exception;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 29524529d3f5..fda743285155 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2274,6 +2274,7 @@  static int em_syscall(struct x86_emulate_ctxt *ctxt)
 		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
 	}
 
+	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
 	return X86EMUL_CONTINUE;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1c15ad80219..4ec7f1ee6357 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4866,6 +4866,8 @@  static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
 	ctxt->eflags = kvm_get_rflags(vcpu);
+	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
 	ctxt->eip = kvm_rip_read(vcpu);
 	ctxt->mode = (!is_protmode(vcpu))		? X86EMUL_MODE_REAL :
 		     (ctxt->eflags & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
@@ -5056,38 +5058,26 @@  static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
 	return dr6;
 }
 
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 {
 	struct kvm_run *kvm_run = vcpu->run;
 
-	/*
-	 * Use the "raw" value to see if TF was passed to the processor.
-	 * Note that the new value of the flags has not been saved yet.
-	 *
-	 * This is correct even for TF set by the guest, because "the
-	 * processor will not generate this exception after the instruction
-	 * that sets the TF flag".
-	 */
-	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
-
-	if (unlikely(rflags & X86_EFLAGS_TF)) {
-		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
-			kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
-			kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
-			kvm_run->debug.arch.exception = DB_VECTOR;
-			kvm_run->exit_reason = KVM_EXIT_DEBUG;
-			*r = EMULATE_USER_EXIT;
-		} else {
-			vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
-			/*
-			 * "Certain debug exceptions may clear bit 0-3.  The
-			 * remaining contents of the DR6 register are never
-			 * cleared by the processor".
-			 */
-			vcpu->arch.dr6 &= ~15;
-			vcpu->arch.dr6 |= DR6_BS;
-			kvm_queue_exception(vcpu, DB_VECTOR);
-		}
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
+		kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+		kvm_run->debug.arch.exception = DB_VECTOR;
+		kvm_run->exit_reason = KVM_EXIT_DEBUG;
+		*r = EMULATE_USER_EXIT;
+	} else {
+		vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
+		/*
+		 * "Certain debug exceptions may clear bit 0-3.  The
+		 * remaining contents of the DR6 register are never
+		 * cleared by the processor".
+		 */
+		vcpu->arch.dr6 &= ~15;
+		vcpu->arch.dr6 |= DR6_BS;
+		kvm_queue_exception(vcpu, DB_VECTOR);
 	}
 }
 
@@ -5240,8 +5230,9 @@  restart:
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 		kvm_rip_write(vcpu, ctxt->eip);
-		if (r == EMULATE_DONE)
-			kvm_vcpu_check_singlestep(vcpu, &r);
+		if (r == EMULATE_DONE &&
+		    (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+			kvm_vcpu_do_singlestep(vcpu, &r);
 		kvm_set_rflags(vcpu, ctxt->eflags);
 	} else
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = true;