Patchwork [2/3] kvm/ppc: IRQ disabling cleanup

login
register
mail settings
Submitter Scott Wood
Date July 10, 2013, 10:47 p.m.
Message ID <1373496461-2668-3-git-send-email-scottwood@freescale.com>
Download mbox | patch
Permalink /patch/258265/
State New
Headers show

Comments

Scott Wood - July 10, 2013, 10:47 p.m.
Simplify the handling of lazy EE by going directly from fully-enabled
to hard-disabled.  This replaces the lazy_irq_pending() check
(including its misplaced kvm_guest_exit() call).

As suggested by Tiejun Chen, move the interrupt disabling into
kvmppc_prepare_to_enter() rather than have each caller do it.  Also
move the IRQ enabling on heavyweight exit into
kvmppc_prepare_to_enter().

Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/include/asm/kvm_ppc.h |  6 ++++++
 arch/powerpc/kvm/book3s_pr.c       | 12 +++---------
 arch/powerpc/kvm/booke.c           | 11 +++--------
 arch/powerpc/kvm/powerpc.c         | 23 ++++++++++-------------
 4 files changed, 22 insertions(+), 30 deletions(-)
Alexander Graf - July 10, 2013, 10:57 p.m.
On 11.07.2013, at 00:47, Scott Wood wrote:

> Simplify the handling of lazy EE by going directly from fully-enabled
> to hard-disabled.  This replaces the lazy_irq_pending() check
> (including its misplaced kvm_guest_exit() call).
> 
> As suggested by Tiejun Chen, move the interrupt disabling into
> kvmppc_prepare_to_enter() rather than have each caller do it.  Also
> move the IRQ enabling on heavyweight exit into
> kvmppc_prepare_to_enter().
> 
> Signed-off-by: Scott Wood <scottwood@freescale.com>

Ben, could you please review this too? :) Thanks a bunch!

> ---
> arch/powerpc/include/asm/kvm_ppc.h |  6 ++++++
> arch/powerpc/kvm/book3s_pr.c       | 12 +++---------
> arch/powerpc/kvm/booke.c           | 11 +++--------
> arch/powerpc/kvm/powerpc.c         | 23 ++++++++++-------------
> 4 files changed, 22 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 6885846..e4474f8 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -404,6 +404,12 @@ static inline void kvmppc_fix_ee_before_entry(void)
> 	trace_hardirqs_on();
> 
> #ifdef CONFIG_PPC64
> +	/*
> +	 * To avoid races, the caller must have gone directly from having
> +	 * interrupts fully-enabled to hard-disabled.
> +	 */
> +	WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);

WARN_ON(lazy_irq_pending()); ?


Alex

> +
> 	/* Only need to enable IRQs by hard enabling them after this */
> 	local_paca->irq_happened = 0;
> 	local_paca->soft_enabled = 1;
> diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
> index ddfaf56..c13caa0 100644
> --- a/arch/powerpc/kvm/book3s_pr.c
> +++ b/arch/powerpc/kvm/book3s_pr.c
> @@ -884,14 +884,11 @@ program_interrupt:
> 		 * and if we really did time things so badly, then we just exit
> 		 * again due to a host external interrupt.
> 		 */
> -		local_irq_disable();
> 		s = kvmppc_prepare_to_enter(vcpu);
> -		if (s <= 0) {
> -			local_irq_enable();
> +		if (s <= 0)
> 			r = s;
> -		} else {
> +		else
> 			kvmppc_fix_ee_before_entry();
> -		}
> 	}
> 
> 	trace_kvm_book3s_reenter(r, vcpu);
> @@ -1121,12 +1118,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
> 	 * really did time things so badly, then we just exit again due to
> 	 * a host external interrupt.
> 	 */
> -	local_irq_disable();
> 	ret = kvmppc_prepare_to_enter(vcpu);
> -	if (ret <= 0) {
> -		local_irq_enable();
> +	if (ret <= 0)
> 		goto out;
> -	}
> 
> 	/* Save FPU state in stack */
> 	if (current->thread.regs->msr & MSR_FP)
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index 6e35351..aa3bc36 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -617,7 +617,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
> 		local_irq_enable();
> 		kvm_vcpu_block(vcpu);
> 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
> -		local_irq_disable();
> +		hard_irq_disable();
> 
> 		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
> 		r = 1;
> @@ -666,10 +666,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
> 		return -EINVAL;
> 	}
> 
> -	local_irq_disable();
> 	s = kvmppc_prepare_to_enter(vcpu);
> 	if (s <= 0) {
> -		local_irq_enable();
> 		ret = s;
> 		goto out;
> 	}
> @@ -1162,14 +1160,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 	 * aren't already exiting to userspace for some other reason.
> 	 */
> 	if (!(r & RESUME_HOST)) {
> -		local_irq_disable();
> 		s = kvmppc_prepare_to_enter(vcpu);
> -		if (s <= 0) {
> -			local_irq_enable();
> +		if (s <= 0)
> 			r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
> -		} else {
> +		else
> 			kvmppc_fix_ee_before_entry();
> -		}
> 	}
> 
> 	return r;
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 4e05f8c..2f7a221 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -64,12 +64,14 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
> {
> 	int r = 1;
> 
> -	WARN_ON_ONCE(!irqs_disabled());
> +	WARN_ON(irqs_disabled());
> +	hard_irq_disable();
> +
> 	while (true) {
> 		if (need_resched()) {
> 			local_irq_enable();
> 			cond_resched();
> -			local_irq_disable();
> +			hard_irq_disable();
> 			continue;
> 		}
> 
> @@ -95,7 +97,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
> 			local_irq_enable();
> 			trace_kvm_check_requests(vcpu);
> 			r = kvmppc_core_check_requests(vcpu);
> -			local_irq_disable();
> +			hard_irq_disable();
> 			if (r > 0)
> 				continue;
> 			break;
> @@ -108,21 +110,16 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
> 		}
> 
> #ifdef CONFIG_PPC64
> -		/* lazy EE magic */
> -		hard_irq_disable();
> -		if (lazy_irq_pending()) {
> -			/* Got an interrupt in between, try again */
> -			local_irq_enable();
> -			local_irq_disable();
> -			kvm_guest_exit();
> -			continue;
> -		}
> +		WARN_ON(lazy_irq_pending());
> #endif
> +		/* Can't use irqs_disabled() because we want hard irq state */
> +		WARN_ON(mfmsr() & MSR_EE);
> 
> 		kvm_guest_enter();
> -		break;
> +		return r;
> 	}
> 
> +	local_irq_enable();
> 	return r;
> }
> #endif /* CONFIG_KVM_BOOK3S_64_HV */
> -- 
> 1.8.1.2
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt - July 10, 2013, 11:01 p.m.
On Thu, 2013-07-11 at 00:57 +0200, Alexander Graf wrote:
> > #ifdef CONFIG_PPC64
> > +     /*
> > +      * To avoid races, the caller must have gone directly from having
> > +      * interrupts fully-enabled to hard-disabled.
> > +      */
> > +     WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
> 
> WARN_ON(lazy_irq_pending()); ?

Different semantics. What you propose will not catch irq_happened == 0 :-)

Cheers,
Ben.


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf - July 10, 2013, 11:04 p.m.
On 11.07.2013, at 01:01, Benjamin Herrenschmidt wrote:

> On Thu, 2013-07-11 at 00:57 +0200, Alexander Graf wrote:
>>> #ifdef CONFIG_PPC64
>>> +     /*
>>> +      * To avoid races, the caller must have gone directly from having
>>> +      * interrupts fully-enabled to hard-disabled.
>>> +      */
>>> +     WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
>> 
>> WARN_ON(lazy_irq_pending()); ?
> 
> Different semantics. What you propose will not catch irq_happened == 0 :-)

Right, but we only ever reach here after hard_irq_disable() I think.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood - July 10, 2013, 11:08 p.m.
On 07/10/2013 06:04:53 PM, Alexander Graf wrote:
> 
> On 11.07.2013, at 01:01, Benjamin Herrenschmidt wrote:
> 
> > On Thu, 2013-07-11 at 00:57 +0200, Alexander Graf wrote:
> >>> #ifdef CONFIG_PPC64
> >>> +     /*
> >>> +      * To avoid races, the caller must have gone directly from  
> having
> >>> +      * interrupts fully-enabled to hard-disabled.
> >>> +      */
> >>> +     WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
> >>
> >> WARN_ON(lazy_irq_pending()); ?
> >
> > Different semantics. What you propose will not catch irq_happened  
> == 0 :-)
> 
> Right, but we only ever reach here after hard_irq_disable() I think.

And the WARN_ON helps us ensure that it stays that way.

-Scott
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf - July 10, 2013, 11:09 p.m.
On 11.07.2013, at 01:08, Scott Wood wrote:

> On 07/10/2013 06:04:53 PM, Alexander Graf wrote:
>> On 11.07.2013, at 01:01, Benjamin Herrenschmidt wrote:
>> > On Thu, 2013-07-11 at 00:57 +0200, Alexander Graf wrote:
>> >>> #ifdef CONFIG_PPC64
>> >>> +     /*
>> >>> +      * To avoid races, the caller must have gone directly from having
>> >>> +      * interrupts fully-enabled to hard-disabled.
>> >>> +      */
>> >>> +     WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
>> >>
>> >> WARN_ON(lazy_irq_pending()); ?
>> >
>> > Different semantics. What you propose will not catch irq_happened == 0 :-)
>> Right, but we only ever reach here after hard_irq_disable() I think.
> 
> And the WARN_ON helps us ensure that it stays that way.

Heh - ok :). Works for me.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood - Sept. 5, 2013, 10:09 p.m.
On Thu, 2013-07-11 at 01:09 +0200, Alexander Graf wrote:
> On 11.07.2013, at 01:08, Scott Wood wrote:
> 
> > On 07/10/2013 06:04:53 PM, Alexander Graf wrote:
> >> On 11.07.2013, at 01:01, Benjamin Herrenschmidt wrote:
> >> > On Thu, 2013-07-11 at 00:57 +0200, Alexander Graf wrote:
> >> >>> #ifdef CONFIG_PPC64
> >> >>> +     /*
> >> >>> +      * To avoid races, the caller must have gone directly from having
> >> >>> +      * interrupts fully-enabled to hard-disabled.
> >> >>> +      */
> >> >>> +     WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
> >> >>
> >> >> WARN_ON(lazy_irq_pending()); ?
> >> >
> >> > Different semantics. What you propose will not catch irq_happened == 0 :-)
> >> Right, but we only ever reach here after hard_irq_disable() I think.
> > 
> > And the WARN_ON helps us ensure that it stays that way.
> 
> Heh - ok :). Works for me.

What's the status on this patch?

-Scott



--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf - Sept. 5, 2013, 11:06 p.m.
On 06.09.2013, at 00:09, Scott Wood wrote:

> On Thu, 2013-07-11 at 01:09 +0200, Alexander Graf wrote:
>> On 11.07.2013, at 01:08, Scott Wood wrote:
>> 
>>> On 07/10/2013 06:04:53 PM, Alexander Graf wrote:
>>>> On 11.07.2013, at 01:01, Benjamin Herrenschmidt wrote:
>>>>> On Thu, 2013-07-11 at 00:57 +0200, Alexander Graf wrote:
>>>>>>> #ifdef CONFIG_PPC64
>>>>>>> +     /*
>>>>>>> +      * To avoid races, the caller must have gone directly from having
>>>>>>> +      * interrupts fully-enabled to hard-disabled.
>>>>>>> +      */
>>>>>>> +     WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
>>>>>> 
>>>>>> WARN_ON(lazy_irq_pending()); ?
>>>>> 
>>>>> Different semantics. What you propose will not catch irq_happened == 0 :-)
>>>> Right, but we only ever reach here after hard_irq_disable() I think.
>>> 
>>> And the WARN_ON helps us ensure that it stays that way.
>> 
>> Heh - ok :). Works for me.
> 
> What's the status on this patch?

IIUC it was ok. Ben, could you please verify?


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood - Oct. 4, 2013, 5:22 p.m.
On Fri, 2013-09-06 at 01:06 +0200, Alexander Graf wrote:
> On 06.09.2013, at 00:09, Scott Wood wrote:
> 
> > On Thu, 2013-07-11 at 01:09 +0200, Alexander Graf wrote:
> >> On 11.07.2013, at 01:08, Scott Wood wrote:
> >> 
> >>> On 07/10/2013 06:04:53 PM, Alexander Graf wrote:
> >>>> On 11.07.2013, at 01:01, Benjamin Herrenschmidt wrote:
> >>>>> On Thu, 2013-07-11 at 00:57 +0200, Alexander Graf wrote:
> >>>>>>> #ifdef CONFIG_PPC64
> >>>>>>> +     /*
> >>>>>>> +      * To avoid races, the caller must have gone directly from having
> >>>>>>> +      * interrupts fully-enabled to hard-disabled.
> >>>>>>> +      */
> >>>>>>> +     WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
> >>>>>> 
> >>>>>> WARN_ON(lazy_irq_pending()); ?
> >>>>> 
> >>>>> Different semantics. What you propose will not catch irq_happened == 0 :-)
> >>>> Right, but we only ever reach here after hard_irq_disable() I think.
> >>> 
> >>> And the WARN_ON helps us ensure that it stays that way.
> >> 
> >> Heh - ok :). Works for me.
> > 
> > What's the status on this patch?
> 
> IIUC it was ok. Ben, could you please verify?

ping

-Scott



--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf - Dec. 29, 2013, 3:43 p.m.
On 11.07.2013, at 00:47, Scott Wood <scottwood@freescale.com> wrote:

> Simplify the handling of lazy EE by going directly from fully-enabled
> to hard-disabled.  This replaces the lazy_irq_pending() check
> (including its misplaced kvm_guest_exit() call).
> 
> As suggested by Tiejun Chen, move the interrupt disabling into
> kvmppc_prepare_to_enter() rather than have each caller do it.  Also
> move the IRQ enabling on heavyweight exit into
> kvmppc_prepare_to_enter().
> 
> Signed-off-by: Scott Wood <scottwood@freescale.com>
> ---
> arch/powerpc/include/asm/kvm_ppc.h |  6 ++++++
> arch/powerpc/kvm/book3s_pr.c       | 12 +++---------
> arch/powerpc/kvm/booke.c           | 11 +++--------
> arch/powerpc/kvm/powerpc.c         | 23 ++++++++++-------------
> 4 files changed, 22 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 6885846..e4474f8 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -404,6 +404,12 @@ static inline void kvmppc_fix_ee_before_entry(void)
> 	trace_hardirqs_on();
> 
> #ifdef CONFIG_PPC64
> +	/*
> +	 * To avoid races, the caller must have gone directly from having
> +	 * interrupts fully-enabled to hard-disabled.
> +	 */
> +	WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
> +
> 	/* Only need to enable IRQs by hard enabling them after this */
> 	local_paca->irq_happened = 0;
> 	local_paca->soft_enabled = 1;
> diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
> index ddfaf56..c13caa0 100644
> --- a/arch/powerpc/kvm/book3s_pr.c
> +++ b/arch/powerpc/kvm/book3s_pr.c
> @@ -884,14 +884,11 @@ program_interrupt:
> 		 * and if we really did time things so badly, then we just exit
> 		 * again due to a host external interrupt.
> 		 */
> -		local_irq_disable();
> 		s = kvmppc_prepare_to_enter(vcpu);
> -		if (s <= 0) {
> -			local_irq_enable();
> +		if (s <= 0)
> 			r = s;
> -		} else {
> +		else
> 			kvmppc_fix_ee_before_entry();
> -		}
> 	}

Please add a comment here stating that interrupts are hard disabled at this point.

> 
> 	trace_kvm_book3s_reenter(r, vcpu);
> @@ -1121,12 +1118,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
> 	 * really did time things so badly, then we just exit again due to
> 	 * a host external interrupt.
> 	 */
> -	local_irq_disable();
> 	ret = kvmppc_prepare_to_enter(vcpu);
> -	if (ret <= 0) {
> -		local_irq_enable();
> +	if (ret <= 0)
> 		goto out;
> -	}

Please add a comment here stating that interrupts are hard disabled at this point.

> 
> 	/* Save FPU state in stack */
> 	if (current->thread.regs->msr & MSR_FP)
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index 6e35351..aa3bc36 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -617,7 +617,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
> 		local_irq_enable();
> 		kvm_vcpu_block(vcpu);
> 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
> -		local_irq_disable();
> +		hard_irq_disable();
> 
> 		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
> 		r = 1;
> @@ -666,10 +666,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
> 		return -EINVAL;
> 	}
> 
> -	local_irq_disable();
> 	s = kvmppc_prepare_to_enter(vcpu);
> 	if (s <= 0) {
> -		local_irq_enable();
> 		ret = s;
> 		goto out;
> 	}

Please add a comment here stating that interrupts are hard disabled at this point.

> @@ -1162,14 +1160,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 	 * aren't already exiting to userspace for some other reason.
> 	 */
> 	if (!(r & RESUME_HOST)) {
> -		local_irq_disable();
> 		s = kvmppc_prepare_to_enter(vcpu);
> -		if (s <= 0) {
> -			local_irq_enable();
> +		if (s <= 0)
> 			r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
> -		} else {
> +		else
> 			kvmppc_fix_ee_before_entry();
> -		}
> 	}
> 
> 	return r;
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 4e05f8c..2f7a221 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -64,12 +64,14 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
> {
> 	int r = 1;
> 
> -	WARN_ON_ONCE(!irqs_disabled());
> +	WARN_ON(irqs_disabled());
> +	hard_irq_disable();
> +
> 	while (true) {
> 		if (need_resched()) {
> 			local_irq_enable();

One thing I find reasonably tricky in this approach is that we run local_irq_enable, but hard_irq_disable. I did dig through the code and it should work just fine, but I think we should make sure to note that this is intended and doesn't just work by accident.

Just add a comment above the first call to hard_irq_disable() that explains that local_irq_enable() will properly unroll hard_irq_disable. That way the next person reading this doesn't have to dig as deeply.

> 			cond_resched();
> -			local_irq_disable();
> +			hard_irq_disable();
> 			continue;
> 		}
> 
> @@ -95,7 +97,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
> 			local_irq_enable();
> 			trace_kvm_check_requests(vcpu);
> 			r = kvmppc_core_check_requests(vcpu);
> -			local_irq_disable();
> +			hard_irq_disable();
> 			if (r > 0)
> 				continue;
> 			break;
> @@ -108,21 +110,16 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
> 		}
> 
> #ifdef CONFIG_PPC64
> -		/* lazy EE magic */
> -		hard_irq_disable();
> -		if (lazy_irq_pending()) {
> -			/* Got an interrupt in between, try again */
> -			local_irq_enable();
> -			local_irq_disable();
> -			kvm_guest_exit();
> -			continue;
> -		}
> +		WARN_ON(lazy_irq_pending());
> #endif
> +		/* Can't use irqs_disabled() because we want hard irq state */
> +		WARN_ON(mfmsr() & MSR_EE);

The reason for lazy EE is that mfmsr() is supposed to be slow. Couldn't we check for the internal hard disable flag instead? Just create a new helper in hw_irq.h that tells us

  local_paca->irq_happened & PACA_IRQ_HARD_DIS

on PPC64 and

  !(mfmsr() & MSR_EE)

on PPC32. We can then just use that helper here and not run "expensive" mfmsr() calls.

I'm not sure whether it's actually measurable overhead in the context of the whole world switch, but why diverge from the rest of the system? If you think a new helper is overkill, I'd be fine with a simple #ifdef here just as well.

> 		kvm_guest_enter();
> -		break;
> +		return r;

This must be 0 at this point, right?

Either way, I prefer to keep the code easily understandable. How about you keep the break and just add an if (r) around the local_irq_enable() below? Then add a comment stating that the return value tells us whether entry is ok to proceed and interrupts are disabled (0) or something didn't work out and interrupts are enabled (!0).

Thanks a lot for cleaning up this whole lazy interrupt mess :).


Alex

> 	}
> 
> +	local_irq_enable();
> 	return r;
> }
> #endif /* CONFIG_KVM_BOOK3S_64_HV */
> -- 
> 1.8.1.2
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood - Jan. 3, 2014, 2:51 a.m.
On Sun, 2013-12-29 at 16:43 +0100, Alexander Graf wrote:
> On 11.07.2013, at 00:47, Scott Wood <scottwood@freescale.com> wrote:
> > diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
> > index ddfaf56..c13caa0 100644
> > --- a/arch/powerpc/kvm/book3s_pr.c
> > +++ b/arch/powerpc/kvm/book3s_pr.c
> > @@ -884,14 +884,11 @@ program_interrupt:
> > 		 * and if we really did time things so badly, then we just exit
> > 		 * again due to a host external interrupt.
> > 		 */
> > -		local_irq_disable();
> > 		s = kvmppc_prepare_to_enter(vcpu);
> > -		if (s <= 0) {
> > -			local_irq_enable();
> > +		if (s <= 0)
> > 			r = s;
> > -		} else {
> > +		else
> > 			kvmppc_fix_ee_before_entry();
> > -		}
> > 	}
> 
> Please add a comment here stating that interrupts are hard disabled at this point.

OK.

> > diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> > index 4e05f8c..2f7a221 100644
> > --- a/arch/powerpc/kvm/powerpc.c
> > +++ b/arch/powerpc/kvm/powerpc.c
> > @@ -64,12 +64,14 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
> > {
> > 	int r = 1;
> > 
> > -	WARN_ON_ONCE(!irqs_disabled());
> > +	WARN_ON(irqs_disabled());
> > +	hard_irq_disable();
> > +
> > 	while (true) {
> > 		if (need_resched()) {
> > 			local_irq_enable();
> 
> One thing I find reasonably tricky in this approach is that we run
> local_irq_enable, but hard_irq_disable. I did dig through the code and
> it should work just fine, but I think we should make sure to note that
> this is intended and doesn't just work by accident.
> 
> Just add a comment above the first call to hard_irq_disable() that
> explains that local_irq_enable() will properly unroll hard_irq_disable.
> That way the next person reading this doesn't have to dig as deeply.

There is no hard_irq_enable() -- only __hard_irq_enable() that doesn't
update local_paca->irq_happened.

This is normal use of the API.  If there does need to be a comment, it
should go in hw_irq.h. :-)

> > #ifdef CONFIG_PPC64
> > -		/* lazy EE magic */
> > -		hard_irq_disable();
> > -		if (lazy_irq_pending()) {
> > -			/* Got an interrupt in between, try again */
> > -			local_irq_enable();
> > -			local_irq_disable();
> > -			kvm_guest_exit();
> > -			continue;
> > -		}
> > +		WARN_ON(lazy_irq_pending());
> > #endif
> > +		/* Can't use irqs_disabled() because we want hard irq state */
> > +		WARN_ON(mfmsr() & MSR_EE);
> 
> The reason for lazy EE is that mfmsr() is supposed to be slow.

Not just mtmsr?

And when I complained about wrtee not being all that slow on our
hardware, Ben said it was also for perf coverage. :-)

> Couldn't we check for the internal hard disable flag instead? Just create a new
> helper in hw_irq.h that tells us
> 
>   local_paca->irq_happened & PACA_IRQ_HARD_DIS
> 
> on PPC64 and
> 
>   !(mfmsr() & MSR_EE)
> 
> on PPC32. We can then just use that helper here and not run "expensive" mfmsr() calls.

> I'm not sure whether it's actually measurable overhead in the context
> of the whole world switch, but why diverge from the rest of the system?
> If you think a new helper is overkill, I'd be fine with a simple #ifdef
> here just as well.

I'd hope a mfmsr wouldn't be so slow on any hardware as to be a big deal
here, though it'd also be nice if there were a Linux-wide way of
specifying whether a particular WARN should be always checked, or only
if the kernel is built with some debug option...

The "rest of the system" is irqs_disabled() and there's already a
comment explaining why we diverge from that.

Maybe we should just remove that check; we'd still at least have the
64-bit check in kvmppc_fix_ee_before_entry.

> > 		kvm_guest_enter();
> > -		break;
> > +		return r;
> 
> This must be 0 at this point, right?

No, it'll be 1.

> Either way, I prefer to keep the code easily understandable. How about
> you keep the break and just add an if (r) around the local_irq_enable()
> below? Then add a comment stating that the return value tells us
> whether entry is ok to proceed and interrupts are disabled (0) or
> something didn't work out and interrupts are enabled (!0).

How is it more understandable to overload what looks like a single code
path with divergent cases that have little to nothing in common?  Would
it help to add a comment on the return-to-host code path indicating that
it is only used for returning to the host?

I'd be fine with replacing "return r" with "return 1" (and getting rid
of the initialization of r at the top of the function, unless GCC
complains inappropriately).

-Scott


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf - Jan. 9, 2014, 12:38 p.m.
On 03.01.2014, at 03:51, Scott Wood <scottwood@freescale.com> wrote:

> On Sun, 2013-12-29 at 16:43 +0100, Alexander Graf wrote:
>> On 11.07.2013, at 00:47, Scott Wood <scottwood@freescale.com> wrote:
>>> diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
>>> index ddfaf56..c13caa0 100644
>>> --- a/arch/powerpc/kvm/book3s_pr.c
>>> +++ b/arch/powerpc/kvm/book3s_pr.c
>>> @@ -884,14 +884,11 @@ program_interrupt:
>>> 		 * and if we really did time things so badly, then we just exit
>>> 		 * again due to a host external interrupt.
>>> 		 */
>>> -		local_irq_disable();
>>> 		s = kvmppc_prepare_to_enter(vcpu);
>>> -		if (s <= 0) {
>>> -			local_irq_enable();
>>> +		if (s <= 0)
>>> 			r = s;
>>> -		} else {
>>> +		else
>>> 			kvmppc_fix_ee_before_entry();
>>> -		}
>>> 	}
>> 
>> Please add a comment here stating that interrupts are hard disabled at this point.
> 
> OK.
> 
>>> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
>>> index 4e05f8c..2f7a221 100644
>>> --- a/arch/powerpc/kvm/powerpc.c
>>> +++ b/arch/powerpc/kvm/powerpc.c
>>> @@ -64,12 +64,14 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
>>> {
>>> 	int r = 1;
>>> 
>>> -	WARN_ON_ONCE(!irqs_disabled());
>>> +	WARN_ON(irqs_disabled());
>>> +	hard_irq_disable();
>>> +
>>> 	while (true) {
>>> 		if (need_resched()) {
>>> 			local_irq_enable();
>> 
>> One thing I find reasonably tricky in this approach is that we run
>> local_irq_enable, but hard_irq_disable. I did dig through the code and
>> it should work just fine, but I think we should make sure to note that
>> this is intended and doesn't just work by accident.
>> 
>> Just add a comment above the first call to hard_irq_disable() that
>> explains that local_irq_enable() will properly unroll hard_irq_disable.
>> That way the next person reading this doesn't have to dig as deeply.
> 
> There is no hard_irq_enable() -- only __hard_irq_enable() that doesn't
> update local_paca->irq_happened.
> 
> This is normal use of the API.  If there does need to be a comment, it
> should go in hw_irq.h. :-)

Yeah, it's always confusing to me in other places too :). But there are only so many places that have to deal with really hard disabled interrupts.

> 
>>> #ifdef CONFIG_PPC64
>>> -		/* lazy EE magic */
>>> -		hard_irq_disable();
>>> -		if (lazy_irq_pending()) {
>>> -			/* Got an interrupt in between, try again */
>>> -			local_irq_enable();
>>> -			local_irq_disable();
>>> -			kvm_guest_exit();
>>> -			continue;
>>> -		}
>>> +		WARN_ON(lazy_irq_pending());
>>> #endif
>>> +		/* Can't use irqs_disabled() because we want hard irq state */
>>> +		WARN_ON(mfmsr() & MSR_EE);
>> 
>> The reason for lazy EE is that mfmsr() is supposed to be slow.
> 
> Not just mtmsr?
> 
> And when I complained about wrtee not being all that slow on our
> hardware, Ben said it was also for perf coverage. :-)
> 
>> Couldn't we check for the internal hard disable flag instead? Just create a new
>> helper in hw_irq.h that tells us
>> 
>>  local_paca->irq_happened & PACA_IRQ_HARD_DIS
>> 
>> on PPC64 and
>> 
>>  !(mfmsr() & MSR_EE)
>> 
>> on PPC32. We can then just use that helper here and not run "expensive" mfmsr() calls.
> 
>> I'm not sure whether it's actually measurable overhead in the context
>> of the whole world switch, but why diverge from the rest of the system?
>> If you think a new helper is overkill, I'd be fine with a simple #ifdef
>> here just as well.
> 
> I'd hope a mfmsr wouldn't be so slow on any hardware as to be a big deal
> here, though it'd also be nice if there were a Linux-wide way of
> specifying whether a particular WARN should be always checked, or only
> if the kernel is built with some debug option...
> 
> The "rest of the system" is irqs_disabled() and there's already a
> comment explaining why we diverge from that.
> 
> Maybe we should just remove that check; we'd still at least have the
> 64-bit check in kvmppc_fix_ee_before_entry.

Well, as I mentioned we're already so deep down in the guts of how interrupt handling works that it's perfectly fine to check paca->irq_happened directly here if we care.

> 
>>> 		kvm_guest_enter();
>>> -		break;
>>> +		return r;
>> 
>> This must be 0 at this point, right?
> 
> No, it'll be 1.
> 
>> Either way, I prefer to keep the code easily understandable. How about
>> you keep the break and just add an if (r) around the local_irq_enable()
>> below? Then add a comment stating that the return value tells us
>> whether entry is ok to proceed and interrupts are disabled (0) or
>> something didn't work out and interrupts are enabled (!0).
> 
> How is it more understandable to overload what looks like a single code
> path with divergent cases that have little to nothing in common?  Would
> it help to add a comment on the return-to-host code path indicating that
> it is only used for returning to the host?

Yup :)

> I'd be fine with replacing "return r" with "return 1" (and getting rid
> of the initialization of r at the top of the function, unless GCC
> complains inappropriately).

Works fine for me.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 6885846..e4474f8 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -404,6 +404,12 @@  static inline void kvmppc_fix_ee_before_entry(void)
 	trace_hardirqs_on();
 
 #ifdef CONFIG_PPC64
+	/*
+	 * To avoid races, the caller must have gone directly from having
+	 * interrupts fully-enabled to hard-disabled.
+	 */
+	WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
+
 	/* Only need to enable IRQs by hard enabling them after this */
 	local_paca->irq_happened = 0;
 	local_paca->soft_enabled = 1;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ddfaf56..c13caa0 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -884,14 +884,11 @@  program_interrupt:
 		 * and if we really did time things so badly, then we just exit
 		 * again due to a host external interrupt.
 		 */
-		local_irq_disable();
 		s = kvmppc_prepare_to_enter(vcpu);
-		if (s <= 0) {
-			local_irq_enable();
+		if (s <= 0)
 			r = s;
-		} else {
+		else
 			kvmppc_fix_ee_before_entry();
-		}
 	}
 
 	trace_kvm_book3s_reenter(r, vcpu);
@@ -1121,12 +1118,9 @@  int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	 * really did time things so badly, then we just exit again due to
 	 * a host external interrupt.
 	 */
-	local_irq_disable();
 	ret = kvmppc_prepare_to_enter(vcpu);
-	if (ret <= 0) {
-		local_irq_enable();
+	if (ret <= 0)
 		goto out;
-	}
 
 	/* Save FPU state in stack */
 	if (current->thread.regs->msr & MSR_FP)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 6e35351..aa3bc36 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -617,7 +617,7 @@  int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 		local_irq_enable();
 		kvm_vcpu_block(vcpu);
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
-		local_irq_disable();
+		hard_irq_disable();
 
 		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
 		r = 1;
@@ -666,10 +666,8 @@  int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		return -EINVAL;
 	}
 
-	local_irq_disable();
 	s = kvmppc_prepare_to_enter(vcpu);
 	if (s <= 0) {
-		local_irq_enable();
 		ret = s;
 		goto out;
 	}
@@ -1162,14 +1160,11 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * aren't already exiting to userspace for some other reason.
 	 */
 	if (!(r & RESUME_HOST)) {
-		local_irq_disable();
 		s = kvmppc_prepare_to_enter(vcpu);
-		if (s <= 0) {
-			local_irq_enable();
+		if (s <= 0)
 			r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-		} else {
+		else
 			kvmppc_fix_ee_before_entry();
-		}
 	}
 
 	return r;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4e05f8c..2f7a221 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -64,12 +64,14 @@  int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 {
 	int r = 1;
 
-	WARN_ON_ONCE(!irqs_disabled());
+	WARN_ON(irqs_disabled());
+	hard_irq_disable();
+
 	while (true) {
 		if (need_resched()) {
 			local_irq_enable();
 			cond_resched();
-			local_irq_disable();
+			hard_irq_disable();
 			continue;
 		}
 
@@ -95,7 +97,7 @@  int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			local_irq_enable();
 			trace_kvm_check_requests(vcpu);
 			r = kvmppc_core_check_requests(vcpu);
-			local_irq_disable();
+			hard_irq_disable();
 			if (r > 0)
 				continue;
 			break;
@@ -108,21 +110,16 @@  int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 		}
 
 #ifdef CONFIG_PPC64
-		/* lazy EE magic */
-		hard_irq_disable();
-		if (lazy_irq_pending()) {
-			/* Got an interrupt in between, try again */
-			local_irq_enable();
-			local_irq_disable();
-			kvm_guest_exit();
-			continue;
-		}
+		WARN_ON(lazy_irq_pending());
 #endif
+		/* Can't use irqs_disabled() because we want hard irq state */
+		WARN_ON(mfmsr() & MSR_EE);
 
 		kvm_guest_enter();
-		break;
+		return r;
 	}
 
+	local_irq_enable();
 	return r;
 }
 #endif /* CONFIG_KVM_BOOK3S_64_HV */