[1/1] powerpc/kvm/book3s: Fixes possible 'use after release' of kvm
diff mbox series

Message ID 20191126175212.377171-1-leonardo@linux.ibm.com
State Rejected
Headers show
Series
  • [1/1] powerpc/kvm/book3s: Fixes possible 'use after release' of kvm
Related show

Commit Message

Leonardo Bras Nov. 26, 2019, 5:52 p.m. UTC
Fixes a possible 'use after free' of kvm variable.
It does use mutex_unlock(&kvm->lock) after possible freeing a variable
with kvm_put_kvm(kvm).

Signed-off-by: Leonardo Bras <leonardo@linux.ibm.com>
---
 arch/powerpc/kvm/book3s_64_vio.c | 3 +--
 virt/kvm/kvm_main.c              | 8 ++++----
 2 files changed, 5 insertions(+), 6 deletions(-)

Comments

Sean Christopherson Nov. 26, 2019, 6:14 p.m. UTC | #1
On Tue, Nov 26, 2019 at 02:52:12PM -0300, Leonardo Bras wrote:
> Fixes a possible 'use after free' of kvm variable.
> It does use mutex_unlock(&kvm->lock) after possible freeing a variable
> with kvm_put_kvm(kvm).

Moving the calls to kvm_put_kvm() to the end of the functions doesn't
actually fix a use-after-free.  In these flows, the reference being
released is a borrowed reference that KVM takes on behalf of the entity it
is creating, e.g. device, vcpu, or spapr tce.  The caller of these create
helpers must also hold its own reference to @kvm on top of the borrowed
reference, i.e. these kvm_put_kvm() calls will never free @kvm (assuming
there are no refcounting bugs elsewhere in KVM).

If one these kvm_put_kvm() calls did unexpectedly free @kvm (due to a bug
somewhere else), KVM would still hit a use-after-free scenario as the
caller still thinks @kvm is valid.  Currently, this would only happen on a
subsequent ioctl() on the caller's file descriptor (which holds a pointer
to @kvm), as the callers of these functions don't directly dereference
@kvm after the functions return.  But, not deferencing @kvm isn't deliberate
or functionally required, it's just how the code happens to be written.

The intent of adding kvm_put_kvm_no_destroy() was primarily to document
that under no circumstance should the to-be-put reference be the *last*
reference to @kvm.  Moving the call to kvm_put_kvm{_no_destroy}() doesn't
change that

> Signed-off-by: Leonardo Bras <leonardo@linux.ibm.com>
> ---
>  arch/powerpc/kvm/book3s_64_vio.c | 3 +--
>  virt/kvm/kvm_main.c              | 8 ++++----
>  2 files changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
> index 5834db0a54c6..a402ead833b6 100644
> --- a/arch/powerpc/kvm/book3s_64_vio.c
> +++ b/arch/powerpc/kvm/book3s_64_vio.c
> @@ -316,14 +316,13 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  
>  	if (ret >= 0)
>  		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> -	else
> -		kvm_put_kvm(kvm);
>  
>  	mutex_unlock(&kvm->lock);
>  
>  	if (ret >= 0)
>  		return ret;
>  
> +	kvm_put_kvm(kvm);
>  	kfree(stt);
>   fail_acct:
>  	account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 13efc291b1c7..f37089b60d09 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2744,10 +2744,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>  	/* Now it's all set up, let userspace reach it */
>  	kvm_get_kvm(kvm);
>  	r = create_vcpu_fd(vcpu);
> -	if (r < 0) {
> -		kvm_put_kvm(kvm);
> +	if (r < 0)
>  		goto unlock_vcpu_destroy;
> -	}
>  
>  	kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
>  
> @@ -2771,6 +2769,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>  	mutex_lock(&kvm->lock);
>  	kvm->created_vcpus--;
>  	mutex_unlock(&kvm->lock);
> +	if (r < 0)
> +		kvm_put_kvm(kvm);
>  	return r;
>  }
>  
> @@ -3183,10 +3183,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
>  	kvm_get_kvm(kvm);
>  	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
>  	if (ret < 0) {
> -		kvm_put_kvm(kvm);
>  		mutex_lock(&kvm->lock);
>  		list_del(&dev->vm_node);
>  		mutex_unlock(&kvm->lock);
> +		kvm_put_kvm(kvm);
>  		ops->destroy(dev);
>  		return ret;
>  	}
> -- 
> 2.23.0
>
Paolo Bonzini Nov. 27, 2019, 4:40 p.m. UTC | #2
On 26/11/19 18:52, Leonardo Bras wrote:
> Fixes a possible 'use after free' of kvm variable.
> It does use mutex_unlock(&kvm->lock) after possible freeing a variable
> with kvm_put_kvm(kvm).
> 
> Signed-off-by: Leonardo Bras <leonardo@linux.ibm.com>
> ---
>  arch/powerpc/kvm/book3s_64_vio.c | 3 +--
>  virt/kvm/kvm_main.c              | 8 ++++----
>  2 files changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
> index 5834db0a54c6..a402ead833b6 100644
> --- a/arch/powerpc/kvm/book3s_64_vio.c
> +++ b/arch/powerpc/kvm/book3s_64_vio.c
> @@ -316,14 +316,13 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  
>  	if (ret >= 0)
>  		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> -	else
> -		kvm_put_kvm(kvm);
>  
>  	mutex_unlock(&kvm->lock);
>  
>  	if (ret >= 0)
>  		return ret;
>  
> +	kvm_put_kvm(kvm);
>  	kfree(stt);
>   fail_acct:
>  	account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);

This part is a good change, as it makes the code clearer.  The
virt/kvm/kvm_main.c bits, however, are not necessary as explained by Sean.

Paolo

> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 13efc291b1c7..f37089b60d09 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2744,10 +2744,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>  	/* Now it's all set up, let userspace reach it */
>  	kvm_get_kvm(kvm);
>  	r = create_vcpu_fd(vcpu);
> -	if (r < 0) {
> -		kvm_put_kvm(kvm);
> +	if (r < 0)
>  		goto unlock_vcpu_destroy;
> -	}
>  
>  	kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
>  
> @@ -2771,6 +2769,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>  	mutex_lock(&kvm->lock);
>  	kvm->created_vcpus--;
>  	mutex_unlock(&kvm->lock);
> +	if (r < 0)
> +		kvm_put_kvm(kvm);
>  	return r;
>  }
>  
> @@ -3183,10 +3183,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
>  	kvm_get_kvm(kvm);
>  	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
>  	if (ret < 0) {
> -		kvm_put_kvm(kvm);
>  		mutex_lock(&kvm->lock);
>  		list_del(&dev->vm_node);
>  		mutex_unlock(&kvm->lock);
> +		kvm_put_kvm(kvm);
>  		ops->destroy(dev);
>  		return ret;
>  	}
>
Leonardo Bras Nov. 27, 2019, 8:18 p.m. UTC | #3
On Wed, 2019-11-27 at 17:40 +0100, Paolo Bonzini wrote:
> >   
> >        if (ret >= 0)
> >                list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> > -     else
> > -             kvm_put_kvm(kvm);
> >   
> >        mutex_unlock(&kvm->lock);
> >   
> >        if (ret >= 0)
> >                return ret;
> >   
> > +     kvm_put_kvm(kvm);
> >        kfree(stt);
> >    fail_acct:
> >        account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
> 
> This part is a good change, as it makes the code clearer.  The
> virt/kvm/kvm_main.c bits, however, are not necessary as explained by Sean.
> 

Thanks!
So, like this patch?
https://lkml.org/lkml/2019/11/7/763

Best regards,

Leonardo
Paul Mackerras Nov. 27, 2019, 10:57 p.m. UTC | #4
On Tue, Nov 26, 2019 at 02:52:12PM -0300, Leonardo Bras wrote:
> Fixes a possible 'use after free' of kvm variable.
> It does use mutex_unlock(&kvm->lock) after possible freeing a variable
> with kvm_put_kvm(kvm).

Comments below...

> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
> index 5834db0a54c6..a402ead833b6 100644
> --- a/arch/powerpc/kvm/book3s_64_vio.c
> +++ b/arch/powerpc/kvm/book3s_64_vio.c
> @@ -316,14 +316,13 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  
>  	if (ret >= 0)
>  		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> -	else
> -		kvm_put_kvm(kvm);
>  
>  	mutex_unlock(&kvm->lock);
>  
>  	if (ret >= 0)
>  		return ret;
>  
> +	kvm_put_kvm(kvm);

There isn't a potential use-after-free here.  We are relying on the
property that the release function (kvm_vm_release) cannot be called
in parallel with this function.  The reason is that this function
(kvm_vm_ioctl_create_spapr_tce) is handling an ioctl on a kvm VM file
descriptor.  That means that a userspace process has the file
descriptor still open.  The code that implements the close() system
call makes sure that no thread is still executing inside any system
call that is using the same file descriptor before calling the file
descriptor's release function (in this case, kvm_vm_release).  That
means that this kvm_put_kvm() call here cannot make the reference
count go to zero.

>  	kfree(stt);
>   fail_acct:
>  	account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 13efc291b1c7..f37089b60d09 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2744,10 +2744,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>  	/* Now it's all set up, let userspace reach it */
>  	kvm_get_kvm(kvm);
>  	r = create_vcpu_fd(vcpu);
> -	if (r < 0) {
> -		kvm_put_kvm(kvm);
> +	if (r < 0)
>  		goto unlock_vcpu_destroy;
> -	}
>  
>  	kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
>  
> @@ -2771,6 +2769,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>  	mutex_lock(&kvm->lock);
>  	kvm->created_vcpus--;
>  	mutex_unlock(&kvm->lock);
> +	if (r < 0)
> +		kvm_put_kvm(kvm);
>  	return r;
>  }

Once again we are inside an ioctl on the kvm VM file descriptor, so
the reference count cannot go to zero.

> @@ -3183,10 +3183,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
>  	kvm_get_kvm(kvm);
>  	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
>  	if (ret < 0) {
> -		kvm_put_kvm(kvm);
>  		mutex_lock(&kvm->lock);
>  		list_del(&dev->vm_node);
>  		mutex_unlock(&kvm->lock);
> +		kvm_put_kvm(kvm);
>  		ops->destroy(dev);
>  		return ret;
>  	}

Same again here.

Paul.
Leonardo Bras Nov. 28, 2019, 4:24 p.m. UTC | #5
On Thu, 2019-11-28 at 09:57 +1100, Paul Mackerras wrote:
> There isn't a potential use-after-free here.  We are relying on the
> property that the release function (kvm_vm_release) cannot be called
> in parallel with this function.  The reason is that this function
> (kvm_vm_ioctl_create_spapr_tce) is handling an ioctl on a kvm VM file
> descriptor.  That means that a userspace process has the file
> descriptor still open.  The code that implements the close() system
> call makes sure that no thread is still executing inside any system
> call that is using the same file descriptor before calling the file
> descriptor's release function (in this case, kvm_vm_release).  That
> means that this kvm_put_kvm() call here cannot make the reference
> count go to zero.

That was very informative. A lot of things are clear to me now.
Thanks for explaining this Paul. 

Best regards,
Leonardo
Leonardo Bras Nov. 28, 2019, 5:15 p.m. UTC | #6
On Wed, 2019-11-27 at 17:40 +0100, Paolo Bonzini wrote:
> > diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
> > index 5834db0a54c6..a402ead833b6 100644
> > --- a/arch/powerpc/kvm/book3s_64_vio.c
> > +++ b/arch/powerpc/kvm/book3s_64_vio.c
> > @@ -316,14 +316,13 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
> >   
> >        if (ret >= 0)
> >                list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> > -     else
> > -             kvm_put_kvm(kvm);
> >   
> >        mutex_unlock(&kvm->lock);
> >   
> >        if (ret >= 0)
> >                return ret;
> >   
> > +     kvm_put_kvm(kvm);
> >        kfree(stt);
> >    fail_acct:
> >        account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);

Paul, do you think this change is still valid as it 'makes the code
clearer', as said by Paolo before? I would write a new commit message
to match the change.

Best regards,
Leonardo

Patch
diff mbox series

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 5834db0a54c6..a402ead833b6 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -316,14 +316,13 @@  long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 
 	if (ret >= 0)
 		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
-	else
-		kvm_put_kvm(kvm);
 
 	mutex_unlock(&kvm->lock);
 
 	if (ret >= 0)
 		return ret;
 
+	kvm_put_kvm(kvm);
 	kfree(stt);
  fail_acct:
 	account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 13efc291b1c7..f37089b60d09 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2744,10 +2744,8 @@  static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	/* Now it's all set up, let userspace reach it */
 	kvm_get_kvm(kvm);
 	r = create_vcpu_fd(vcpu);
-	if (r < 0) {
-		kvm_put_kvm(kvm);
+	if (r < 0)
 		goto unlock_vcpu_destroy;
-	}
 
 	kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
 
@@ -2771,6 +2769,8 @@  static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	mutex_lock(&kvm->lock);
 	kvm->created_vcpus--;
 	mutex_unlock(&kvm->lock);
+	if (r < 0)
+		kvm_put_kvm(kvm);
 	return r;
 }
 
@@ -3183,10 +3183,10 @@  static int kvm_ioctl_create_device(struct kvm *kvm,
 	kvm_get_kvm(kvm);
 	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
 	if (ret < 0) {
-		kvm_put_kvm(kvm);
 		mutex_lock(&kvm->lock);
 		list_del(&dev->vm_node);
 		mutex_unlock(&kvm->lock);
+		kvm_put_kvm(kvm);
 		ops->destroy(dev);
 		return ret;
 	}