diff mbox series

[01/12] KVM: PPC: Book3S HV nestedv2: Invalidate RPT before deleting a guest

Message ID 20231201132618.555031-2-vaibhav@linux.ibm.com
State New
Headers show
Series KVM: PPC: Nested APIv2 : Performance improvements | expand

Commit Message

Vaibhav Jain Dec. 1, 2023, 1:26 p.m. UTC
From: Jordan Niethe <jniethe5@gmail.com>

An L0 must invalidate the L2's RPT during H_GUEST_DELETE if this has not
already been done. This is a slow operation that means H_GUEST_DELETE
must return H_BUSY multiple times before completing. Invalidating the
tables before deleting the guest so there is less work for the L0 to do.

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
---
 arch/powerpc/include/asm/kvm_book3s.h | 1 +
 arch/powerpc/kvm/book3s_hv.c          | 6 ++++--
 arch/powerpc/kvm/book3s_hv_nested.c   | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

Comments

Aneesh Kumar K.V (IBM) Dec. 7, 2023, 9:15 a.m. UTC | #1
Vaibhav Jain <vaibhav@linux.ibm.com> writes:

> From: Jordan Niethe <jniethe5@gmail.com>
>
> An L0 must invalidate the L2's RPT during H_GUEST_DELETE if this has not
> already been done. This is a slow operation that means H_GUEST_DELETE
> must return H_BUSY multiple times before completing. Invalidating the
> tables before deleting the guest so there is less work for the L0 to do.
>
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
>  arch/powerpc/include/asm/kvm_book3s.h | 1 +
>  arch/powerpc/kvm/book3s_hv.c          | 6 ++++--
>  arch/powerpc/kvm/book3s_hv_nested.c   | 2 +-
>  3 files changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index 4f527d09c92b..a37736ed3728 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -302,6 +302,7 @@ void kvmhv_nested_exit(void);
>  void kvmhv_vm_nested_init(struct kvm *kvm);
>  long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
>  long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu);
> +void kvmhv_flush_lpid(u64 lpid);
>  void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1);
>  void kvmhv_release_all_nested(struct kvm *kvm);
>  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 1ed6ec140701..5543e8490cd9 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -5691,10 +5691,12 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
>  			kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
>  	}
>  
> -	if (kvmhv_is_nestedv2())
> +	if (kvmhv_is_nestedv2()) {
> +		kvmhv_flush_lpid(kvm->arch.lpid);
>  		plpar_guest_delete(0, kvm->arch.lpid);
>

I am not sure I follow the optimization here. I would expect the
hypervisor to kill all the translation caches as part of guest_delete.
What is the benefit of doing a lpid flush outside the guest delete?

> -	else
> +	} else {
>  		kvmppc_free_lpid(kvm->arch.lpid);
> +	}
>  
>  	kvmppc_free_pimap(kvm);
>  }
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
> index 3b658b8696bc..5c375ec1a3c6 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -503,7 +503,7 @@ void kvmhv_nested_exit(void)
>  	}
>  }
>  
> -static void kvmhv_flush_lpid(u64 lpid)
> +void kvmhv_flush_lpid(u64 lpid)
>  {
>  	long rc;
>  
> -- 
> 2.42.0
Vaibhav Jain Dec. 8, 2023, 1:45 p.m. UTC | #2
Hi Aneesh,

Thanks for looking into this patch. My responses inline below:

"Aneesh Kumar K.V (IBM)" <aneesh.kumar@kernel.org> writes:

> Vaibhav Jain <vaibhav@linux.ibm.com> writes:
>
>> From: Jordan Niethe <jniethe5@gmail.com>
>>
>> An L0 must invalidate the L2's RPT during H_GUEST_DELETE if this has not
>> already been done. This is a slow operation that means H_GUEST_DELETE
>> must return H_BUSY multiple times before completing. Invalidating the
>> tables before deleting the guest so there is less work for the L0 to do.
>>
>> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
>> ---
>>  arch/powerpc/include/asm/kvm_book3s.h | 1 +
>>  arch/powerpc/kvm/book3s_hv.c          | 6 ++++--
>>  arch/powerpc/kvm/book3s_hv_nested.c   | 2 +-
>>  3 files changed, 6 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
>> index 4f527d09c92b..a37736ed3728 100644
>> --- a/arch/powerpc/include/asm/kvm_book3s.h
>> +++ b/arch/powerpc/include/asm/kvm_book3s.h
>> @@ -302,6 +302,7 @@ void kvmhv_nested_exit(void);
>>  void kvmhv_vm_nested_init(struct kvm *kvm);
>>  long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
>>  long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu);
>> +void kvmhv_flush_lpid(u64 lpid);
>>  void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1);
>>  void kvmhv_release_all_nested(struct kvm *kvm);
>>  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index 1ed6ec140701..5543e8490cd9 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -5691,10 +5691,12 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
>>  			kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
>>  	}
>>  
>> -	if (kvmhv_is_nestedv2())
>> +	if (kvmhv_is_nestedv2()) {
>> +		kvmhv_flush_lpid(kvm->arch.lpid);
>>  		plpar_guest_delete(0, kvm->arch.lpid);
>>
>
> I am not sure I follow the optimization here. I would expect the
> hypervisor to kill all the translation caches as part of guest_delete.
> What is the benefit of doing a lpid flush outside the guest delete?
>
Thats right. However without this optimization the H_GUEST_DELETE hcall
in plpar_guest_delete() returns H_BUSY multiple times resulting in
multiple hcalls to the hypervisor until it finishes. Flushing the guest
translation cache upfront reduces the number of HCALLs L1 guests has to
make to delete a L2 guest via H_GUEST_DELETE.

>> -	else
>> +	} else {
>>  		kvmppc_free_lpid(kvm->arch.lpid);
>> +	}
>>  
>>  	kvmppc_free_pimap(kvm);
>>  }
>> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
>> index 3b658b8696bc..5c375ec1a3c6 100644
>> --- a/arch/powerpc/kvm/book3s_hv_nested.c
>> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
>> @@ -503,7 +503,7 @@ void kvmhv_nested_exit(void)
>>  	}
>>  }
>>  
>> -static void kvmhv_flush_lpid(u64 lpid)
>> +void kvmhv_flush_lpid(u64 lpid)
>>  {
>>  	long rc;
>>  
>> -- 
>> 2.42.0
Aneesh Kumar K.V (IBM) Dec. 15, 2023, 4:12 p.m. UTC | #3
Vaibhav Jain <vaibhav@linux.ibm.com> writes:

> Hi Aneesh,
>
> Thanks for looking into this patch. My responses inline below:
>
> "Aneesh Kumar K.V (IBM)" <aneesh.kumar@kernel.org> writes:
>
>> Vaibhav Jain <vaibhav@linux.ibm.com> writes:
>>
>>> From: Jordan Niethe <jniethe5@gmail.com>
>>>
>>> An L0 must invalidate the L2's RPT during H_GUEST_DELETE if this has not
>>> already been done. This is a slow operation that means H_GUEST_DELETE
>>> must return H_BUSY multiple times before completing. Invalidating the
>>> tables before deleting the guest so there is less work for the L0 to do.
>>>
>>> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
>>> ---
>>>  arch/powerpc/include/asm/kvm_book3s.h | 1 +
>>>  arch/powerpc/kvm/book3s_hv.c          | 6 ++++--
>>>  arch/powerpc/kvm/book3s_hv_nested.c   | 2 +-
>>>  3 files changed, 6 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
>>> index 4f527d09c92b..a37736ed3728 100644
>>> --- a/arch/powerpc/include/asm/kvm_book3s.h
>>> +++ b/arch/powerpc/include/asm/kvm_book3s.h
>>> @@ -302,6 +302,7 @@ void kvmhv_nested_exit(void);
>>>  void kvmhv_vm_nested_init(struct kvm *kvm);
>>>  long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
>>>  long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu);
>>> +void kvmhv_flush_lpid(u64 lpid);
>>>  void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1);
>>>  void kvmhv_release_all_nested(struct kvm *kvm);
>>>  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>> index 1ed6ec140701..5543e8490cd9 100644
>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>> @@ -5691,10 +5691,12 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
>>>  			kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
>>>  	}
>>>  
>>> -	if (kvmhv_is_nestedv2())
>>> +	if (kvmhv_is_nestedv2()) {
>>> +		kvmhv_flush_lpid(kvm->arch.lpid);
>>>  		plpar_guest_delete(0, kvm->arch.lpid);
>>>
>>
>> I am not sure I follow the optimization here. I would expect the
>> hypervisor to kill all the translation caches as part of guest_delete.
>> What is the benefit of doing a lpid flush outside the guest delete?
>>
> Thats right. However without this optimization the H_GUEST_DELETE hcall
> in plpar_guest_delete() returns H_BUSY multiple times resulting in
> multiple hcalls to the hypervisor until it finishes. Flushing the guest
> translation cache upfront reduces the number of HCALLs L1 guests has to
> make to delete a L2 guest via H_GUEST_DELETE.
>

can we add that as a comment above that kvmhv_flush_lpid()?

-aneesh
Vaibhav Jain Dec. 18, 2023, 4:54 a.m. UTC | #4
"Aneesh Kumar K.V" <aneesh.kumar@kernel.org> writes:

> Vaibhav Jain <vaibhav@linux.ibm.com> writes:
>
>> Hi Aneesh,
>>
>> Thanks for looking into this patch. My responses inline below:
>>
>> "Aneesh Kumar K.V (IBM)" <aneesh.kumar@kernel.org> writes:
>>
>>> Vaibhav Jain <vaibhav@linux.ibm.com> writes:
>>>
>>>> From: Jordan Niethe <jniethe5@gmail.com>
>>>>
>>>> An L0 must invalidate the L2's RPT during H_GUEST_DELETE if this has not
>>>> already been done. This is a slow operation that means H_GUEST_DELETE
>>>> must return H_BUSY multiple times before completing. Invalidating the
>>>> tables before deleting the guest so there is less work for the L0 to do.
>>>>
>>>> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
>>>> ---
>>>>  arch/powerpc/include/asm/kvm_book3s.h | 1 +
>>>>  arch/powerpc/kvm/book3s_hv.c          | 6 ++++--
>>>>  arch/powerpc/kvm/book3s_hv_nested.c   | 2 +-
>>>>  3 files changed, 6 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
>>>> index 4f527d09c92b..a37736ed3728 100644
>>>> --- a/arch/powerpc/include/asm/kvm_book3s.h
>>>> +++ b/arch/powerpc/include/asm/kvm_book3s.h
>>>> @@ -302,6 +302,7 @@ void kvmhv_nested_exit(void);
>>>>  void kvmhv_vm_nested_init(struct kvm *kvm);
>>>>  long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
>>>>  long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu);
>>>> +void kvmhv_flush_lpid(u64 lpid);
>>>>  void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1);
>>>>  void kvmhv_release_all_nested(struct kvm *kvm);
>>>>  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
>>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>>> index 1ed6ec140701..5543e8490cd9 100644
>>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>>> @@ -5691,10 +5691,12 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
>>>>  			kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
>>>>  	}
>>>>  
>>>> -	if (kvmhv_is_nestedv2())
>>>> +	if (kvmhv_is_nestedv2()) {
>>>> +		kvmhv_flush_lpid(kvm->arch.lpid);
>>>>  		plpar_guest_delete(0, kvm->arch.lpid);
>>>>
>>>
>>> I am not sure I follow the optimization here. I would expect the
>>> hypervisor to kill all the translation caches as part of guest_delete.
>>> What is the benefit of doing a lpid flush outside the guest delete?
>>>
>> Thats right. However without this optimization the H_GUEST_DELETE hcall
>> in plpar_guest_delete() returns H_BUSY multiple times resulting in
>> multiple hcalls to the hypervisor until it finishes. Flushing the guest
>> translation cache upfront reduces the number of HCALLs L1 guests has to
>> make to delete a L2 guest via H_GUEST_DELETE.
>>
>
> can we add that as a comment above that kvmhv_flush_lpid()?
Sure, I will put up a comment with that detail in v2 of the patch
series.

>
> -aneesh
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 4f527d09c92b..a37736ed3728 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -302,6 +302,7 @@  void kvmhv_nested_exit(void);
 void kvmhv_vm_nested_init(struct kvm *kvm);
 long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
 long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu);
+void kvmhv_flush_lpid(u64 lpid);
 void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1);
 void kvmhv_release_all_nested(struct kvm *kvm);
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1ed6ec140701..5543e8490cd9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5691,10 +5691,12 @@  static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 			kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
 	}
 
-	if (kvmhv_is_nestedv2())
+	if (kvmhv_is_nestedv2()) {
+		kvmhv_flush_lpid(kvm->arch.lpid);
 		plpar_guest_delete(0, kvm->arch.lpid);
-	else
+	} else {
 		kvmppc_free_lpid(kvm->arch.lpid);
+	}
 
 	kvmppc_free_pimap(kvm);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 3b658b8696bc..5c375ec1a3c6 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -503,7 +503,7 @@  void kvmhv_nested_exit(void)
 	}
 }
 
-static void kvmhv_flush_lpid(u64 lpid)
+void kvmhv_flush_lpid(u64 lpid)
 {
 	long rc;