Patchwork powerpc: remove fpscr use from [kvm_]cvt_{fd,df}

login
register
mail settings
Submitter Andreas Schwab
Date Aug. 21, 2010, 9:43 p.m.
Message ID <m2hbinzlon.fsf@igel.home>
Download mbox | patch
Permalink /patch/62355/
State Accepted, archived
Commit 05d77ac90c0d260ae18decd70507dc4f5b71a2cb
Delegated to: Benjamin Herrenschmidt
Headers show

Comments

Andreas Schwab - Aug. 21, 2010, 9:43 p.m.
Neither lfs nor stfs touch the fpscr, so remove the restore/save of it
around them.

Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
---
 arch/powerpc/include/asm/kvm_fpu.h       |    4 +-
 arch/powerpc/include/asm/system.h        |    4 +-
 arch/powerpc/kernel/align.c              |    4 +-
 arch/powerpc/kernel/fpu.S                |   10 -------
 arch/powerpc/kvm/book3s_paired_singles.c |   44 +++++++++++++----------------
 arch/powerpc/kvm/fpu.S                   |    8 -----
 6 files changed, 26 insertions(+), 48 deletions(-)
Michael Neuling - Aug. 23, 2010, 12:23 a.m.
> Neither lfs nor stfs touch the fpscr, so remove the restore/save of it
> around them.

Do some 32 bit processors need this? 

In 32 bit before the merge, we use to have code that did:

  #if defined(CONFIG_4xx) || defined(CONFIG_E500)
   #define cvt_fd without save/restore fpscr
  #else
   #define cvt_fd with save/restore fpscr
  #end if

Kumar; does this ring any bells?

(The addition of this predates even bitkeeper)

Mikey
> 
> Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
> ---
>  arch/powerpc/include/asm/kvm_fpu.h       |    4 +-
>  arch/powerpc/include/asm/system.h        |    4 +-
>  arch/powerpc/kernel/align.c              |    4 +-
>  arch/powerpc/kernel/fpu.S                |   10 -------
>  arch/powerpc/kvm/book3s_paired_singles.c |   44 +++++++++++++---------------
-
>  arch/powerpc/kvm/fpu.S                   |    8 -----
>  6 files changed, 26 insertions(+), 48 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kv
m_fpu.h
> index c3d4f05..92daae1 100644
> --- a/arch/powerpc/include/asm/kvm_fpu.h
> +++ b/arch/powerpc/include/asm/kvm_fpu.h
> @@ -82,7 +82,7 @@ FPD_THREE_IN(fmadd)
>  FPD_THREE_IN(fnmsub)
>  FPD_THREE_IN(fnmadd)
>  
> -extern void kvm_cvt_fd(u32 *from, u64 *to, u64 *fpscr);
> -extern void kvm_cvt_df(u64 *from, u32 *to, u64 *fpscr);
> +extern void kvm_cvt_fd(u32 *from, u64 *to);
> +extern void kvm_cvt_df(u64 *from, u32 *to);
>  
>  #endif
> diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/sys
tem.h
> index 6c294ac..0b3fe78 100644
> --- a/arch/powerpc/include/asm/system.h
> +++ b/arch/powerpc/include/asm/system.h
> @@ -154,8 +154,8 @@ extern void enable_kernel_spe(void);
>  extern void giveup_spe(struct task_struct *);
>  extern void load_up_spe(struct task_struct *);
>  extern int fix_alignment(struct pt_regs *);
> -extern void cvt_fd(float *from, double *to, struct thread_struct *thread);
> -extern void cvt_df(double *from, float *to, struct thread_struct *thread);
> +extern void cvt_fd(float *from, double *to);
> +extern void cvt_df(double *from, float *to);
>  
>  #ifndef CONFIG_SMP
>  extern void discard_lazy_cpu_state(void);
> diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
> index b876e98..8184ee9 100644
> --- a/arch/powerpc/kernel/align.c
> +++ b/arch/powerpc/kernel/align.c
> @@ -889,7 +889,7 @@ int fix_alignment(struct pt_regs *regs)
>  #ifdef CONFIG_PPC_FPU
>  			preempt_disable();
>  			enable_kernel_fp();
> -			cvt_df(&data.dd, (float *)&data.v[4], &current->thread)
;
> +			cvt_df(&data.dd, (float *)&data.v[4]);
>  			preempt_enable();
>  #else
>  			return 0;
> @@ -933,7 +933,7 @@ int fix_alignment(struct pt_regs *regs)
>  #ifdef CONFIG_PPC_FPU
>  		preempt_disable();
>  		enable_kernel_fp();
> -		cvt_fd((float *)&data.v[4], &data.dd, &current->thread);
> +		cvt_fd((float *)&data.v[4], &data.dd);
>  		preempt_enable();
>  #else
>  		return 0;
> diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
> index fc8f5b1..e86c040 100644
> --- a/arch/powerpc/kernel/fpu.S
> +++ b/arch/powerpc/kernel/fpu.S
> @@ -163,24 +163,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
>  /*
>   * These are used in the alignment trap handler when emulating
>   * single-precision loads and stores.
> - * We restore and save the fpscr so the task gets the same result
> - * and exceptions as if the cpu had performed the load or store.
>   */
>  
>  _GLOBAL(cvt_fd)
> -	lfd	0,THREAD_FPSCR(r5)	/* load up fpscr value */
> -	MTFSF_L(0)
>  	lfs	0,0(r3)
>  	stfd	0,0(r4)
> -	mffs	0
> -	stfd	0,THREAD_FPSCR(r5)	/* save new fpscr value */
>  	blr
>  
>  _GLOBAL(cvt_df)
> -	lfd	0,THREAD_FPSCR(r5)	/* load up fpscr value */
> -	MTFSF_L(0)
>  	lfd	0,0(r3)
>  	stfs	0,0(r4)
> -	mffs	0
> -	stfd	0,THREAD_FPSCR(r5)	/* save new fpscr value */
>  	blr
> diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book
3s_paired_singles.c
> index 474f2e2..35a701f 100644
> --- a/arch/powerpc/kvm/book3s_paired_singles.c
> +++ b/arch/powerpc/kvm/book3s_paired_singles.c
> @@ -159,7 +159,7 @@
>  
>  static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
>  {
> -	kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt], &vcpu->arch.fpscr)
;
> +	kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]);
>  }
>  
>  static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_sto
re)
> @@ -204,7 +204,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, s
truct kvm_vcpu *vcpu,
>  	/* put in registers */
>  	switch (ls_type) {
>  	case FPU_LS_SINGLE:
> -		kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs], &vcpu->arch.fpscr);
> +		kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]);
>  		vcpu->arch.qpr[rs] = *((u32*)tmp);
>  		break;
>  	case FPU_LS_DOUBLE:
> @@ -230,7 +230,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
>  
>  	switch (ls_type) {
>  	case FPU_LS_SINGLE:
> -		kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp, &vcpu->arch.fpscr);
> +		kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp);
>  		val = *((u32*)tmp);
>  		len = sizeof(u32);
>  		break;
> @@ -296,7 +296,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, s
truct kvm_vcpu *vcpu,
>  	emulated = EMULATE_DONE;
>  
>  	/* put in registers */
> -	kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs], &vcpu->arch.fpscr);
> +	kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]);
>  	vcpu->arch.qpr[rs] = tmp[1];
>  
>  	dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0],
> @@ -314,7 +314,7 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
>  	u32 tmp[2];
>  	int len = w ? sizeof(u32) : sizeof(u64);
>  
> -	kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0], &vcpu->arch.fpscr);
> +	kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]);
>  	tmp[1] = vcpu->arch.qpr[rs];
>  
>  	r = kvmppc_st(vcpu, &addr, len, tmp, true);
> @@ -516,9 +516,9 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool
 rc,
>  	WARN_ON(rc);
>  
>  	/* PS0 */
> -	kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr);
> -	kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr);
> -	kvm_cvt_df(&fpr[reg_in3], &ps0_in3, &vcpu->arch.fpscr);
> +	kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
> +	kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
> +	kvm_cvt_df(&fpr[reg_in3], &ps0_in3);
>  
>  	if (scalar & SCALAR_LOW)
>  		ps0_in2 = qpr[reg_in2];
> @@ -529,7 +529,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool
 rc,
>  			  ps0_in1, ps0_in2, ps0_in3, ps0_out);
>  
>  	if (!(scalar & SCALAR_NO_PS0))
> -		kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
> +		kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
>  
>  	/* PS1 */
>  	ps1_in1 = qpr[reg_in1];
> @@ -566,12 +566,12 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool
 rc,
>  	WARN_ON(rc);
>  
>  	/* PS0 */
> -	kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr);
> +	kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
>  
>  	if (scalar & SCALAR_LOW)
>  		ps0_in2 = qpr[reg_in2];
>  	else
> -		kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr);
> +		kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
>  
>  	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
>  
> @@ -579,7 +579,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool r
c,
>  		dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",
>  				  ps0_in1, ps0_in2, ps0_out);
>  
> -		kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
> +		kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
>  	}
>  
>  	/* PS1 */
> @@ -615,13 +615,13 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool
 rc,
>  	WARN_ON(rc);
>  
>  	/* PS0 */
> -	kvm_cvt_df(&fpr[reg_in], &ps0_in, &vcpu->arch.fpscr);
> +	kvm_cvt_df(&fpr[reg_in], &ps0_in);
>  	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in);
>  
>  	dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",
>  			  ps0_in, ps0_out);
>  
> -	kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
> +	kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
>  
>  	/* PS1 */
>  	ps1_in = qpr[reg_in];
> @@ -671,7 +671,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, str
uct kvm_vcpu *vcpu)
>  #ifdef DEBUG
>  	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
>  		u32 f;
> -		kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr);
> +		kvm_cvt_df(&vcpu->arch.fpr[i], &f);
>  		dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx    QPR[%d] = 0x%x\n"
,
>  			i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]);
>  	}
> @@ -796,8 +796,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, str
uct kvm_vcpu *vcpu)
>  			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
>  			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
>  			kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
> -				   &vcpu->arch.qpr[ax_rd],
> -				   &vcpu->arch.fpscr);
> +				   &vcpu->arch.qpr[ax_rd]);
>  			break;
>  		case OP_4X_PS_MERGE01:
>  			WARN_ON(rcomp);
> @@ -808,19 +807,16 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, s
truct kvm_vcpu *vcpu)
>  			WARN_ON(rcomp);
>  			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
>  			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
> -				   &vcpu->arch.fpr[ax_rd],
> -				   &vcpu->arch.fpscr);
> +				   &vcpu->arch.fpr[ax_rd]);
>  			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
>  			kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
> -				   &vcpu->arch.qpr[ax_rd],
> -				   &vcpu->arch.fpscr);
> +				   &vcpu->arch.qpr[ax_rd]);
>  			break;
>  		case OP_4X_PS_MERGE11:
>  			WARN_ON(rcomp);
>  			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
>  			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
> -				   &vcpu->arch.fpr[ax_rd],
> -				   &vcpu->arch.fpscr);
> +				   &vcpu->arch.fpr[ax_rd]);
>  			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
>  			break;
>  		}
> @@ -1255,7 +1251,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, s
truct kvm_vcpu *vcpu)
>  #ifdef DEBUG
>  	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
>  		u32 f;
> -		kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr);
> +		kvm_cvt_df(&vcpu->arch.fpr[i], &f);
>  		dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);
>  	}
>  #endif
> diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S
> index cb34bbe..bf68d59 100644
> --- a/arch/powerpc/kvm/fpu.S
> +++ b/arch/powerpc/kvm/fpu.S
> @@ -273,19 +273,11 @@ FPD_THREE_IN(fnmsub)
>  FPD_THREE_IN(fnmadd)
>  
>  _GLOBAL(kvm_cvt_fd)
> -	lfd	0,0(r5)			/* load up fpscr value */
> -	MTFSF_L(0)
>  	lfs	0,0(r3)
>  	stfd	0,0(r4)
> -	mffs	0
> -	stfd	0,0(r5)			/* save new fpscr value */
>  	blr
>  
>  _GLOBAL(kvm_cvt_df)
> -	lfd	0,0(r5)			/* load up fpscr value */
> -	MTFSF_L(0)
>  	lfd	0,0(r3)
>  	stfs	0,0(r4)
> -	mffs	0
> -	stfd	0,0(r5)			/* save new fpscr value */
>  	blr
> -- 
> 1.7.2.2
> 
> 
> -- 
> Andreas Schwab, schwab@linux-m68k.org
> GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
> "And now for something completely different."
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
Benjamin Herrenschmidt - Aug. 24, 2010, 5:11 a.m.
On Mon, 2010-08-23 at 10:23 +1000, Michael Neuling wrote:
> > Neither lfs nor stfs touch the fpscr, so remove the restore/save of it
> > around them.
> 
> Do some 32 bit processors need this? 
> 
> In 32 bit before the merge, we use to have code that did:
> 
>   #if defined(CONFIG_4xx) || defined(CONFIG_E500)
>    #define cvt_fd without save/restore fpscr
>   #else
>    #define cvt_fd with save/restore fpscr
>   #end if
> 
> Kumar; does this ring any bells?

I don't see anything in the various 440 docs I have at hand that would
hint at lfd/stfs adffecting FPSCR.

Cheers,
Ben.

> (The addition of this predates even bitkeeper)
> 
> Mikey
> > 
> > Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
> > ---
> >  arch/powerpc/include/asm/kvm_fpu.h       |    4 +-
> >  arch/powerpc/include/asm/system.h        |    4 +-
> >  arch/powerpc/kernel/align.c              |    4 +-
> >  arch/powerpc/kernel/fpu.S                |   10 -------
> >  arch/powerpc/kvm/book3s_paired_singles.c |   44 +++++++++++++---------------
> -
> >  arch/powerpc/kvm/fpu.S                   |    8 -----
> >  6 files changed, 26 insertions(+), 48 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kv
> m_fpu.h
> > index c3d4f05..92daae1 100644
> > --- a/arch/powerpc/include/asm/kvm_fpu.h
> > +++ b/arch/powerpc/include/asm/kvm_fpu.h
> > @@ -82,7 +82,7 @@ FPD_THREE_IN(fmadd)
> >  FPD_THREE_IN(fnmsub)
> >  FPD_THREE_IN(fnmadd)
> >  
> > -extern void kvm_cvt_fd(u32 *from, u64 *to, u64 *fpscr);
> > -extern void kvm_cvt_df(u64 *from, u32 *to, u64 *fpscr);
> > +extern void kvm_cvt_fd(u32 *from, u64 *to);
> > +extern void kvm_cvt_df(u64 *from, u32 *to);
> >  
> >  #endif
> > diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/sys
> tem.h
> > index 6c294ac..0b3fe78 100644
> > --- a/arch/powerpc/include/asm/system.h
> > +++ b/arch/powerpc/include/asm/system.h
> > @@ -154,8 +154,8 @@ extern void enable_kernel_spe(void);
> >  extern void giveup_spe(struct task_struct *);
> >  extern void load_up_spe(struct task_struct *);
> >  extern int fix_alignment(struct pt_regs *);
> > -extern void cvt_fd(float *from, double *to, struct thread_struct *thread);
> > -extern void cvt_df(double *from, float *to, struct thread_struct *thread);
> > +extern void cvt_fd(float *from, double *to);
> > +extern void cvt_df(double *from, float *to);
> >  
> >  #ifndef CONFIG_SMP
> >  extern void discard_lazy_cpu_state(void);
> > diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
> > index b876e98..8184ee9 100644
> > --- a/arch/powerpc/kernel/align.c
> > +++ b/arch/powerpc/kernel/align.c
> > @@ -889,7 +889,7 @@ int fix_alignment(struct pt_regs *regs)
> >  #ifdef CONFIG_PPC_FPU
> >  			preempt_disable();
> >  			enable_kernel_fp();
> > -			cvt_df(&data.dd, (float *)&data.v[4], &current->thread)
> ;
> > +			cvt_df(&data.dd, (float *)&data.v[4]);
> >  			preempt_enable();
> >  #else
> >  			return 0;
> > @@ -933,7 +933,7 @@ int fix_alignment(struct pt_regs *regs)
> >  #ifdef CONFIG_PPC_FPU
> >  		preempt_disable();
> >  		enable_kernel_fp();
> > -		cvt_fd((float *)&data.v[4], &data.dd, &current->thread);
> > +		cvt_fd((float *)&data.v[4], &data.dd);
> >  		preempt_enable();
> >  #else
> >  		return 0;
> > diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
> > index fc8f5b1..e86c040 100644
> > --- a/arch/powerpc/kernel/fpu.S
> > +++ b/arch/powerpc/kernel/fpu.S
> > @@ -163,24 +163,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
> >  /*
> >   * These are used in the alignment trap handler when emulating
> >   * single-precision loads and stores.
> > - * We restore and save the fpscr so the task gets the same result
> > - * and exceptions as if the cpu had performed the load or store.
> >   */
> >  
> >  _GLOBAL(cvt_fd)
> > -	lfd	0,THREAD_FPSCR(r5)	/* load up fpscr value */
> > -	MTFSF_L(0)
> >  	lfs	0,0(r3)
> >  	stfd	0,0(r4)
> > -	mffs	0
> > -	stfd	0,THREAD_FPSCR(r5)	/* save new fpscr value */
> >  	blr
> >  
> >  _GLOBAL(cvt_df)
> > -	lfd	0,THREAD_FPSCR(r5)	/* load up fpscr value */
> > -	MTFSF_L(0)
> >  	lfd	0,0(r3)
> >  	stfs	0,0(r4)
> > -	mffs	0
> > -	stfd	0,THREAD_FPSCR(r5)	/* save new fpscr value */
> >  	blr
> > diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book
> 3s_paired_singles.c
> > index 474f2e2..35a701f 100644
> > --- a/arch/powerpc/kvm/book3s_paired_singles.c
> > +++ b/arch/powerpc/kvm/book3s_paired_singles.c
> > @@ -159,7 +159,7 @@
> >  
> >  static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
> >  {
> > -	kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt], &vcpu->arch.fpscr)
> ;
> > +	kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]);
> >  }
> >  
> >  static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_sto
> re)
> > @@ -204,7 +204,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, s
> truct kvm_vcpu *vcpu,
> >  	/* put in registers */
> >  	switch (ls_type) {
> >  	case FPU_LS_SINGLE:
> > -		kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs], &vcpu->arch.fpscr);
> > +		kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]);
> >  		vcpu->arch.qpr[rs] = *((u32*)tmp);
> >  		break;
> >  	case FPU_LS_DOUBLE:
> > @@ -230,7 +230,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, 
> struct kvm_vcpu *vcpu,
> >  
> >  	switch (ls_type) {
> >  	case FPU_LS_SINGLE:
> > -		kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp, &vcpu->arch.fpscr);
> > +		kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp);
> >  		val = *((u32*)tmp);
> >  		len = sizeof(u32);
> >  		break;
> > @@ -296,7 +296,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, s
> truct kvm_vcpu *vcpu,
> >  	emulated = EMULATE_DONE;
> >  
> >  	/* put in registers */
> > -	kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs], &vcpu->arch.fpscr);
> > +	kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]);
> >  	vcpu->arch.qpr[rs] = tmp[1];
> >  
> >  	dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0],
> > @@ -314,7 +314,7 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, 
> struct kvm_vcpu *vcpu,
> >  	u32 tmp[2];
> >  	int len = w ? sizeof(u32) : sizeof(u64);
> >  
> > -	kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0], &vcpu->arch.fpscr);
> > +	kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]);
> >  	tmp[1] = vcpu->arch.qpr[rs];
> >  
> >  	r = kvmppc_st(vcpu, &addr, len, tmp, true);
> > @@ -516,9 +516,9 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool
>  rc,
> >  	WARN_ON(rc);
> >  
> >  	/* PS0 */
> > -	kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr);
> > -	kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr);
> > -	kvm_cvt_df(&fpr[reg_in3], &ps0_in3, &vcpu->arch.fpscr);
> > +	kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
> > +	kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
> > +	kvm_cvt_df(&fpr[reg_in3], &ps0_in3);
> >  
> >  	if (scalar & SCALAR_LOW)
> >  		ps0_in2 = qpr[reg_in2];
> > @@ -529,7 +529,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool
>  rc,
> >  			  ps0_in1, ps0_in2, ps0_in3, ps0_out);
> >  
> >  	if (!(scalar & SCALAR_NO_PS0))
> > -		kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
> > +		kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
> >  
> >  	/* PS1 */
> >  	ps1_in1 = qpr[reg_in1];
> > @@ -566,12 +566,12 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool
>  rc,
> >  	WARN_ON(rc);
> >  
> >  	/* PS0 */
> > -	kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr);
> > +	kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
> >  
> >  	if (scalar & SCALAR_LOW)
> >  		ps0_in2 = qpr[reg_in2];
> >  	else
> > -		kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr);
> > +		kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
> >  
> >  	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
> >  
> > @@ -579,7 +579,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool r
> c,
> >  		dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",
> >  				  ps0_in1, ps0_in2, ps0_out);
> >  
> > -		kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
> > +		kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
> >  	}
> >  
> >  	/* PS1 */
> > @@ -615,13 +615,13 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool
>  rc,
> >  	WARN_ON(rc);
> >  
> >  	/* PS0 */
> > -	kvm_cvt_df(&fpr[reg_in], &ps0_in, &vcpu->arch.fpscr);
> > +	kvm_cvt_df(&fpr[reg_in], &ps0_in);
> >  	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in);
> >  
> >  	dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",
> >  			  ps0_in, ps0_out);
> >  
> > -	kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
> > +	kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
> >  
> >  	/* PS1 */
> >  	ps1_in = qpr[reg_in];
> > @@ -671,7 +671,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, str
> uct kvm_vcpu *vcpu)
> >  #ifdef DEBUG
> >  	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
> >  		u32 f;
> > -		kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr);
> > +		kvm_cvt_df(&vcpu->arch.fpr[i], &f);
> >  		dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx    QPR[%d] = 0x%x\n"
> ,
> >  			i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]);
> >  	}
> > @@ -796,8 +796,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, str
> uct kvm_vcpu *vcpu)
> >  			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
> >  			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
> >  			kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
> > -				   &vcpu->arch.qpr[ax_rd],
> > -				   &vcpu->arch.fpscr);
> > +				   &vcpu->arch.qpr[ax_rd]);
> >  			break;
> >  		case OP_4X_PS_MERGE01:
> >  			WARN_ON(rcomp);
> > @@ -808,19 +807,16 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, s
> truct kvm_vcpu *vcpu)
> >  			WARN_ON(rcomp);
> >  			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
> >  			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
> > -				   &vcpu->arch.fpr[ax_rd],
> > -				   &vcpu->arch.fpscr);
> > +				   &vcpu->arch.fpr[ax_rd]);
> >  			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
> >  			kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
> > -				   &vcpu->arch.qpr[ax_rd],
> > -				   &vcpu->arch.fpscr);
> > +				   &vcpu->arch.qpr[ax_rd]);
> >  			break;
> >  		case OP_4X_PS_MERGE11:
> >  			WARN_ON(rcomp);
> >  			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
> >  			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
> > -				   &vcpu->arch.fpr[ax_rd],
> > -				   &vcpu->arch.fpscr);
> > +				   &vcpu->arch.fpr[ax_rd]);
> >  			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
> >  			break;
> >  		}
> > @@ -1255,7 +1251,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, s
> truct kvm_vcpu *vcpu)
> >  #ifdef DEBUG
> >  	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
> >  		u32 f;
> > -		kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr);
> > +		kvm_cvt_df(&vcpu->arch.fpr[i], &f);
> >  		dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);
> >  	}
> >  #endif
> > diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S
> > index cb34bbe..bf68d59 100644
> > --- a/arch/powerpc/kvm/fpu.S
> > +++ b/arch/powerpc/kvm/fpu.S
> > @@ -273,19 +273,11 @@ FPD_THREE_IN(fnmsub)
> >  FPD_THREE_IN(fnmadd)
> >  
> >  _GLOBAL(kvm_cvt_fd)
> > -	lfd	0,0(r5)			/* load up fpscr value */
> > -	MTFSF_L(0)
> >  	lfs	0,0(r3)
> >  	stfd	0,0(r4)
> > -	mffs	0
> > -	stfd	0,0(r5)			/* save new fpscr value */
> >  	blr
> >  
> >  _GLOBAL(kvm_cvt_df)
> > -	lfd	0,0(r5)			/* load up fpscr value */
> > -	MTFSF_L(0)
> >  	lfd	0,0(r3)
> >  	stfs	0,0(r4)
> > -	mffs	0
> > -	stfd	0,0(r5)			/* save new fpscr value */
> >  	blr
> > -- 
> > 1.7.2.2
> > 
> > 
> > -- 
> > Andreas Schwab, schwab@linux-m68k.org
> > GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
> > "And now for something completely different."
> > _______________________________________________
> > Linuxppc-dev mailing list
> > Linuxppc-dev@lists.ozlabs.org
> > https://lists.ozlabs.org/listinfo/linuxppc-dev
> > 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
Michael Neuling - Aug. 24, 2010, 5:15 a.m.
> > Do some 32 bit processors need this? 
> > 
> > In 32 bit before the merge, we use to have code that did:
> > 
> >   #if defined(CONFIG_4xx) || defined(CONFIG_E500)
> >    #define cvt_fd without save/restore fpscr
> >   #else
> >    #define cvt_fd with save/restore fpscr
> >   #end if
> > 
> > Kumar; does this ring any bells?
> 
> I don't see anything in the various 440 docs I have at hand that would
> hint at lfd/stfs adffecting FPSCR.

The way the ifdefs are, it's the other way around.  4xx procs don't need
to save/restore fpscr and others do.

Mikey
Benjamin Herrenschmidt - Aug. 24, 2010, 5:39 a.m.
> The way the ifdefs are, it's the other way around.  4xx procs don't need
> to save/restore fpscr and others do.

Hrm, oh well, 601 manual says FPSCR is unaffected too :-)

Cheers,
Ben.
Kumar Gala - Aug. 24, 2010, 5:47 a.m.
On Aug 22, 2010, at 7:23 PM, Michael Neuling wrote:

>> Neither lfs nor stfs touch the fpscr, so remove the restore/save of it
>> around them.
> 
> Do some 32 bit processors need this? 
> 
> In 32 bit before the merge, we use to have code that did:
> 
>  #if defined(CONFIG_4xx) || defined(CONFIG_E500)
>   #define cvt_fd without save/restore fpscr
>  #else
>   #define cvt_fd with save/restore fpscr
>  #end if
> 
> Kumar; does this ring any bells?
> 
> (The addition of this predates even bitkeeper)
> 
> Mikey

Not really.  However if the ifdef is as you say that seems wrong to me.  We should be using CONFIG_PPC_FPU or !CONFIG_PPC_FPU.  As both 4xx and E500 have variants w/FPUs.

- k
Michael Neuling - Aug. 24, 2010, 5:51 a.m.
> >> Neither lfs nor stfs touch the fpscr, so remove the restore/save of =
> it
> >> around them.
> >=20
> > Do some 32 bit processors need this?=20
> >=20
> > In 32 bit before the merge, we use to have code that did:
> >=20
> >  #if defined(CONFIG_4xx) || defined(CONFIG_E500)
> >   #define cvt_fd without save/restore fpscr
> >  #else
> >   #define cvt_fd with save/restore fpscr
> >  #end if
> >=20
> > Kumar; does this ring any bells?
> >=20
> > (The addition of this predates even bitkeeper)
> >=20
> > Mikey
> 
> Not really.  However if the ifdef is as you say that seems wrong to
> me.  We should be using CONFIG_PPC_FPU or !CONFIG_PPC_FPU.  As both
> 4xx and E500 have variants w/FPUs.

It actually got changed to CONFIG_PPC_FPU, then dwg merged it with some
other versions that were around.  

Mikey
Benjamin Herrenschmidt - Aug. 25, 2010, 1:30 a.m.
On Tue, 2010-08-24 at 15:15 +1000, Michael Neuling wrote:
> > > Do some 32 bit processors need this? 
> > > 
> > > In 32 bit before the merge, we use to have code that did:
> > > 
> > >   #if defined(CONFIG_4xx) || defined(CONFIG_E500)
> > >    #define cvt_fd without save/restore fpscr
> > >   #else
> > >    #define cvt_fd with save/restore fpscr
> > >   #end if
> > > 
> > > Kumar; does this ring any bells?
> > 
> > I don't see anything in the various 440 docs I have at hand that would
> > hint at lfd/stfs adffecting FPSCR.
> 
> The way the ifdefs are, it's the other way around.  4xx procs don't need
> to save/restore fpscr and others do.

Right, my bad. In any case, Paulus reckons it's all his mistake and we
really never need to save/restore fpscr.

Cheers,
Ben.
Michael Neuling - Aug. 25, 2010, 1:34 a.m.
In message <1282699836.22370.566.camel@pasglop> you wrote:
> On Tue, 2010-08-24 at 15:15 +1000, Michael Neuling wrote:
> > > > Do some 32 bit processors need this? 
> > > > 
> > > > In 32 bit before the merge, we use to have code that did:
> > > > 
> > > >   #if defined(CONFIG_4xx) || defined(CONFIG_E500)
> > > >    #define cvt_fd without save/restore fpscr
> > > >   #else
> > > >    #define cvt_fd with save/restore fpscr
> > > >   #end if
> > > > 
> > > > Kumar; does this ring any bells?
> > > 
> > > I don't see anything in the various 440 docs I have at hand that would
> > > hint at lfd/stfs adffecting FPSCR.
> > 
> > The way the ifdefs are, it's the other way around.  4xx procs don't need
> > to save/restore fpscr and others do.
> 
> Right, my bad. In any case, Paulus reckons it's all his mistake and we
> really never need to save/restore fpscr.

ACK :-P

Mikey

Patch

diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h
index c3d4f05..92daae1 100644
--- a/arch/powerpc/include/asm/kvm_fpu.h
+++ b/arch/powerpc/include/asm/kvm_fpu.h
@@ -82,7 +82,7 @@  FPD_THREE_IN(fmadd)
 FPD_THREE_IN(fnmsub)
 FPD_THREE_IN(fnmadd)
 
-extern void kvm_cvt_fd(u32 *from, u64 *to, u64 *fpscr);
-extern void kvm_cvt_df(u64 *from, u32 *to, u64 *fpscr);
+extern void kvm_cvt_fd(u32 *from, u64 *to);
+extern void kvm_cvt_df(u64 *from, u32 *to);
 
 #endif
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index 6c294ac..0b3fe78 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -154,8 +154,8 @@  extern void enable_kernel_spe(void);
 extern void giveup_spe(struct task_struct *);
 extern void load_up_spe(struct task_struct *);
 extern int fix_alignment(struct pt_regs *);
-extern void cvt_fd(float *from, double *to, struct thread_struct *thread);
-extern void cvt_df(double *from, float *to, struct thread_struct *thread);
+extern void cvt_fd(float *from, double *to);
+extern void cvt_df(double *from, float *to);
 
 #ifndef CONFIG_SMP
 extern void discard_lazy_cpu_state(void);
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index b876e98..8184ee9 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -889,7 +889,7 @@  int fix_alignment(struct pt_regs *regs)
 #ifdef CONFIG_PPC_FPU
 			preempt_disable();
 			enable_kernel_fp();
-			cvt_df(&data.dd, (float *)&data.v[4], &current->thread);
+			cvt_df(&data.dd, (float *)&data.v[4]);
 			preempt_enable();
 #else
 			return 0;
@@ -933,7 +933,7 @@  int fix_alignment(struct pt_regs *regs)
 #ifdef CONFIG_PPC_FPU
 		preempt_disable();
 		enable_kernel_fp();
-		cvt_fd((float *)&data.v[4], &data.dd, &current->thread);
+		cvt_fd((float *)&data.v[4], &data.dd);
 		preempt_enable();
 #else
 		return 0;
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index fc8f5b1..e86c040 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -163,24 +163,14 @@  END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 /*
  * These are used in the alignment trap handler when emulating
  * single-precision loads and stores.
- * We restore and save the fpscr so the task gets the same result
- * and exceptions as if the cpu had performed the load or store.
  */
 
 _GLOBAL(cvt_fd)
-	lfd	0,THREAD_FPSCR(r5)	/* load up fpscr value */
-	MTFSF_L(0)
 	lfs	0,0(r3)
 	stfd	0,0(r4)
-	mffs	0
-	stfd	0,THREAD_FPSCR(r5)	/* save new fpscr value */
 	blr
 
 _GLOBAL(cvt_df)
-	lfd	0,THREAD_FPSCR(r5)	/* load up fpscr value */
-	MTFSF_L(0)
 	lfd	0,0(r3)
 	stfs	0,0(r4)
-	mffs	0
-	stfd	0,THREAD_FPSCR(r5)	/* save new fpscr value */
 	blr
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index 474f2e2..35a701f 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -159,7 +159,7 @@ 
 
 static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
 {
-	kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt], &vcpu->arch.fpscr);
+	kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]);
 }
 
 static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
@@ -204,7 +204,7 @@  static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	/* put in registers */
 	switch (ls_type) {
 	case FPU_LS_SINGLE:
-		kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs], &vcpu->arch.fpscr);
+		kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]);
 		vcpu->arch.qpr[rs] = *((u32*)tmp);
 		break;
 	case FPU_LS_DOUBLE:
@@ -230,7 +230,7 @@  static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	switch (ls_type) {
 	case FPU_LS_SINGLE:
-		kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp, &vcpu->arch.fpscr);
+		kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp);
 		val = *((u32*)tmp);
 		len = sizeof(u32);
 		break;
@@ -296,7 +296,7 @@  static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	emulated = EMULATE_DONE;
 
 	/* put in registers */
-	kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs], &vcpu->arch.fpscr);
+	kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]);
 	vcpu->arch.qpr[rs] = tmp[1];
 
 	dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0],
@@ -314,7 +314,7 @@  static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	u32 tmp[2];
 	int len = w ? sizeof(u32) : sizeof(u64);
 
-	kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0], &vcpu->arch.fpscr);
+	kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]);
 	tmp[1] = vcpu->arch.qpr[rs];
 
 	r = kvmppc_st(vcpu, &addr, len, tmp, true);
@@ -516,9 +516,9 @@  static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
 	WARN_ON(rc);
 
 	/* PS0 */
-	kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr);
-	kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr);
-	kvm_cvt_df(&fpr[reg_in3], &ps0_in3, &vcpu->arch.fpscr);
+	kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
+	kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
+	kvm_cvt_df(&fpr[reg_in3], &ps0_in3);
 
 	if (scalar & SCALAR_LOW)
 		ps0_in2 = qpr[reg_in2];
@@ -529,7 +529,7 @@  static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
 			  ps0_in1, ps0_in2, ps0_in3, ps0_out);
 
 	if (!(scalar & SCALAR_NO_PS0))
-		kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
+		kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
 
 	/* PS1 */
 	ps1_in1 = qpr[reg_in1];
@@ -566,12 +566,12 @@  static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
 	WARN_ON(rc);
 
 	/* PS0 */
-	kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr);
+	kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
 
 	if (scalar & SCALAR_LOW)
 		ps0_in2 = qpr[reg_in2];
 	else
-		kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr);
+		kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
 
 	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
 
@@ -579,7 +579,7 @@  static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
 		dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",
 				  ps0_in1, ps0_in2, ps0_out);
 
-		kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
+		kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
 	}
 
 	/* PS1 */
@@ -615,13 +615,13 @@  static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
 	WARN_ON(rc);
 
 	/* PS0 */
-	kvm_cvt_df(&fpr[reg_in], &ps0_in, &vcpu->arch.fpscr);
+	kvm_cvt_df(&fpr[reg_in], &ps0_in);
 	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in);
 
 	dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",
 			  ps0_in, ps0_out);
 
-	kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
+	kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
 
 	/* PS1 */
 	ps1_in = qpr[reg_in];
@@ -671,7 +671,7 @@  int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 #ifdef DEBUG
 	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
 		u32 f;
-		kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr);
+		kvm_cvt_df(&vcpu->arch.fpr[i], &f);
 		dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx    QPR[%d] = 0x%x\n",
 			i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]);
 	}
@@ -796,8 +796,7 @@  int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
 			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
 			kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
-				   &vcpu->arch.qpr[ax_rd],
-				   &vcpu->arch.fpscr);
+				   &vcpu->arch.qpr[ax_rd]);
 			break;
 		case OP_4X_PS_MERGE01:
 			WARN_ON(rcomp);
@@ -808,19 +807,16 @@  int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			WARN_ON(rcomp);
 			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
 			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
-				   &vcpu->arch.fpr[ax_rd],
-				   &vcpu->arch.fpscr);
+				   &vcpu->arch.fpr[ax_rd]);
 			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
 			kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
-				   &vcpu->arch.qpr[ax_rd],
-				   &vcpu->arch.fpscr);
+				   &vcpu->arch.qpr[ax_rd]);
 			break;
 		case OP_4X_PS_MERGE11:
 			WARN_ON(rcomp);
 			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
 			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
-				   &vcpu->arch.fpr[ax_rd],
-				   &vcpu->arch.fpscr);
+				   &vcpu->arch.fpr[ax_rd]);
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			break;
 		}
@@ -1255,7 +1251,7 @@  int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 #ifdef DEBUG
 	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
 		u32 f;
-		kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr);
+		kvm_cvt_df(&vcpu->arch.fpr[i], &f);
 		dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);
 	}
 #endif
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S
index cb34bbe..bf68d59 100644
--- a/arch/powerpc/kvm/fpu.S
+++ b/arch/powerpc/kvm/fpu.S
@@ -273,19 +273,11 @@  FPD_THREE_IN(fnmsub)
 FPD_THREE_IN(fnmadd)
 
 _GLOBAL(kvm_cvt_fd)
-	lfd	0,0(r5)			/* load up fpscr value */
-	MTFSF_L(0)
 	lfs	0,0(r3)
 	stfd	0,0(r4)
-	mffs	0
-	stfd	0,0(r5)			/* save new fpscr value */
 	blr
 
 _GLOBAL(kvm_cvt_df)
-	lfd	0,0(r5)			/* load up fpscr value */
-	MTFSF_L(0)
 	lfd	0,0(r3)
 	stfs	0,0(r4)
-	mffs	0
-	stfd	0,0(r5)			/* save new fpscr value */
 	blr