diff mbox series

[kernel] KVM: PPC: Improve KVM reference counting

Message ID 20190221034414.41777-1-aik@ozlabs.ru (mailing list archive)
State Not Applicable
Headers show
Series [kernel] KVM: PPC: Improve KVM reference counting | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success next/apply_patch Successfully applied
snowpatch_ozlabs/build-ppc64le success build succeeded & removed 0 sparse warning(s)
snowpatch_ozlabs/build-ppc64be success build succeeded & removed 0 sparse warning(s)
snowpatch_ozlabs/build-ppc64e success build succeeded & removed 0 sparse warning(s)
snowpatch_ozlabs/build-pmac32 success build succeeded & removed 0 sparse warning(s)
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 18 lines checked

Commit Message

Alexey Kardashevskiy Feb. 21, 2019, 3:44 a.m. UTC
The anon fd's ops releases the KVM reference in the release hook.
However we reference the KVM object after we create the fd so there is
small window when the release function can be called and
dereferenced the KVM object which potentially may free it.

It is not a problem at the moment as the file is created and KVM is
referenced under the KVM lock and the release function obtains the same
lock before dereferencing the KVM (although the lock is not held when
calling kvm_put_kvm()) but it is a fragile against future changes.

This references the KVM object before creating a file.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---

The original bug is described here:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cfa393811
But in this case kvm_put_kvm() is called straight away with no locks before/after/around.

---
 arch/powerpc/kvm/book3s_64_vio.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

Comments

Michael Ellerman Feb. 21, 2019, 6:26 a.m. UTC | #1
Alexey Kardashevskiy <aik@ozlabs.ru> writes:

> The anon fd's ops releases the KVM reference in the release hook.
> However we reference the KVM object after we create the fd so there is
> small window when the release function can be called and
> dereferenced the KVM object which potentially may free it.
  dereference
>
> It is not a problem at the moment as the file is created and KVM is
> referenced under the KVM lock and the release function obtains the same
> lock before dereferencing the KVM (although the lock is not held when
> calling kvm_put_kvm()) but it is a fragile against future changes.
>
> This references the KVM object before creating a file.
>
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>
> The original bug is described here:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cfa393811
> But in this case kvm_put_kvm() is called straight away with no locks before/after/around.
>
> ---
>  arch/powerpc/kvm/book3s_64_vio.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
> index 532ab797..d68c969 100644
> --- a/arch/powerpc/kvm/book3s_64_vio.c
> +++ b/arch/powerpc/kvm/book3s_64_vio.c
> @@ -338,14 +338,15 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  		}
>  	}
>  
> +	kvm_get_kvm(kvm);
>  	if (!ret)
>  		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
>  				       stt, O_RDWR | O_CLOEXEC);
>  
> -	if (ret >= 0) {
> +	if (ret >= 0)
>  		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> -		kvm_get_kvm(kvm);
> -	}
> +	else
> +		kvm_put_kvm(kvm);
>  
>  	mutex_unlock(&kvm->lock);

This looks correct to me. But I feel like the logic could be cleaner,
perhaps like this (patch below):

	mutex_lock(&kvm->lock);

	/* Check this LIOBN hasn't been previously allocated */
	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
		if (siter->liobn == args->liobn) {
			ret = -EBUSY;
			goto fail_unlock;
		}
	}

	kvm_get_kvm(kvm);
	ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
				stt, O_RDWR | O_CLOEXEC);

	if (ret < 0) {
		kvm_put_kvm(kvm);
		goto fail_unlock;
	}

	list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);

	mutex_unlock(&kvm->lock);

	return ret;

 fail_unlock:
	mutex_unlock(&kvm->lock);
 fail:


cheers

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 532ab79734c7..5d74602db0d0 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -296,7 +296,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	struct kvmppc_spapr_tce_table *stt = NULL;
 	struct kvmppc_spapr_tce_table *siter;
 	unsigned long npages, size = args->size;
-	int ret = -ENOMEM;
+	int ret;
 	int i;
 
 	if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
@@ -330,28 +330,30 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	mutex_lock(&kvm->lock);
 
 	/* Check this LIOBN hasn't been previously allocated */
-	ret = 0;
 	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
 		if (siter->liobn == args->liobn) {
 			ret = -EBUSY;
-			break;
+			goto fail_unlock;
 		}
 	}
 
-	if (!ret)
-		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
-				       stt, O_RDWR | O_CLOEXEC);
+	kvm_get_kvm(kvm);
+	ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+				stt, O_RDWR | O_CLOEXEC);
 
-	if (ret >= 0) {
-		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
-		kvm_get_kvm(kvm);
+	if (ret < 0) {
+		kvm_put_kvm(kvm);
+		goto fail_unlock;
 	}
 
+	list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
+
 	mutex_unlock(&kvm->lock);
 
-	if (ret >= 0)
-		return ret;
+	return ret;
 
+ fail_unlock:
+	mutex_unlock(&kvm->lock);
  fail:
 	for (i = 0; i < npages; i++)
 		if (stt->pages[i])
Alexey Kardashevskiy Feb. 22, 2019, 1:33 a.m. UTC | #2
On 21/02/2019 17:26, Michael Ellerman wrote:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> 
>> The anon fd's ops releases the KVM reference in the release hook.
>> However we reference the KVM object after we create the fd so there is
>> small window when the release function can be called and
>> dereferenced the KVM object which potentially may free it.
>   dereference
>>
>> It is not a problem at the moment as the file is created and KVM is
>> referenced under the KVM lock and the release function obtains the same
>> lock before dereferencing the KVM (although the lock is not held when
>> calling kvm_put_kvm()) but it is a fragile against future changes.
>>
>> This references the KVM object before creating a file.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>
>> The original bug is described here:
>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cfa393811
>> But in this case kvm_put_kvm() is called straight away with no locks before/after/around.
>>
>> ---
>>  arch/powerpc/kvm/book3s_64_vio.c | 7 ++++---
>>  1 file changed, 4 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
>> index 532ab797..d68c969 100644
>> --- a/arch/powerpc/kvm/book3s_64_vio.c
>> +++ b/arch/powerpc/kvm/book3s_64_vio.c
>> @@ -338,14 +338,15 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>>  		}
>>  	}
>>  
>> +	kvm_get_kvm(kvm);
>>  	if (!ret)
>>  		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
>>  				       stt, O_RDWR | O_CLOEXEC);
>>  
>> -	if (ret >= 0) {
>> +	if (ret >= 0)
>>  		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
>> -		kvm_get_kvm(kvm);
>> -	}
>> +	else
>> +		kvm_put_kvm(kvm);
>>  
>>  	mutex_unlock(&kvm->lock);
> 
> This looks correct to me. But I feel like the logic could be cleaner,
> perhaps like this (patch below):


And I feel that 1) your patch tries to hide what it actually does 2)
having 2 unlocks for one lock is an invite for future bugs imho, I'd
think the whole point of adding new gotos is exactly to have 1 unlock.


> 
> 	mutex_lock(&kvm->lock);
> 
> 	/* Check this LIOBN hasn't been previously allocated */
> 	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
> 		if (siter->liobn == args->liobn) {
> 			ret = -EBUSY;
> 			goto fail_unlock;
> 		}
> 	}
> 
> 	kvm_get_kvm(kvm);
> 	ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
> 				stt, O_RDWR | O_CLOEXEC);
> 
> 	if (ret < 0) {
> 		kvm_put_kvm(kvm);
> 		goto fail_unlock;
> 	}
> 
> 	list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> 
> 	mutex_unlock(&kvm->lock);
> 
> 	return ret;
> 
>  fail_unlock:
> 	mutex_unlock(&kvm->lock);
>  fail:
> 
> 
> cheers
> 
> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
> index 532ab79734c7..5d74602db0d0 100644
> --- a/arch/powerpc/kvm/book3s_64_vio.c
> +++ b/arch/powerpc/kvm/book3s_64_vio.c
> @@ -296,7 +296,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  	struct kvmppc_spapr_tce_table *stt = NULL;
>  	struct kvmppc_spapr_tce_table *siter;
>  	unsigned long npages, size = args->size;
> -	int ret = -ENOMEM;
> +	int ret;
>  	int i;
>  
>  	if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
> @@ -330,28 +330,30 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  	mutex_lock(&kvm->lock);
>  
>  	/* Check this LIOBN hasn't been previously allocated */
> -	ret = 0;
>  	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
>  		if (siter->liobn == args->liobn) {
>  			ret = -EBUSY;
> -			break;
> +			goto fail_unlock;
>  		}
>  	}
>  
> -	if (!ret)
> -		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
> -				       stt, O_RDWR | O_CLOEXEC);
> +	kvm_get_kvm(kvm);
> +	ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
> +				stt, O_RDWR | O_CLOEXEC);
>  
> -	if (ret >= 0) {
> -		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> -		kvm_get_kvm(kvm);
> +	if (ret < 0) {
> +		kvm_put_kvm(kvm);
> +		goto fail_unlock;
>  	}
>  
> +	list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> +
>  	mutex_unlock(&kvm->lock);
>  
> -	if (ret >= 0)
> -		return ret;
> +	return ret;
>  
> + fail_unlock:
> +	mutex_unlock(&kvm->lock);
>   fail:
>  	for (i = 0; i < npages; i++)
>  		if (stt->pages[i])
>
Paul Mackerras Feb. 22, 2019, 9:41 a.m. UTC | #3
On Thu, Feb 21, 2019 at 02:44:14PM +1100, Alexey Kardashevskiy wrote:
> The anon fd's ops releases the KVM reference in the release hook.
> However we reference the KVM object after we create the fd so there is
> small window when the release function can be called and
> dereferenced the KVM object which potentially may free it.
> 
> It is not a problem at the moment as the file is created and KVM is
> referenced under the KVM lock and the release function obtains the same
> lock before dereferencing the KVM (although the lock is not held when
> calling kvm_put_kvm()) but it is a fragile against future changes.
> 
> This references the KVM object before creating a file.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

Thanks, applied to my kvm-ppc-next tree.

Paul.
diff mbox series

Patch

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 532ab797..d68c969 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -338,14 +338,15 @@  long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 		}
 	}
 
+	kvm_get_kvm(kvm);
 	if (!ret)
 		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
 				       stt, O_RDWR | O_CLOEXEC);
 
-	if (ret >= 0) {
+	if (ret >= 0)
 		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
-		kvm_get_kvm(kvm);
-	}
+	else
+		kvm_put_kvm(kvm);
 
 	mutex_unlock(&kvm->lock);