Patchwork [V2,3/3] powerpc: Fix Unaligned LE Floating Point Loads and Stores

login
register
mail settings
Submitter Tom Musta
Date Oct. 31, 2013, 6:38 p.m.
Message ID <1383244738-5986-4-git-send-email-tommusta@gmail.com>
Download mbox | patch
Permalink /patch/287585/
State Superseded
Headers show

Comments

Tom Musta - Oct. 31, 2013, 6:38 p.m.
From: Tom Musta <tommusta@gmail.com>

This patch addresses unaligned single precision floating point loads
and stores in the single-step code.  The old implementation
improperly treated an 8 byte structure as an array of two 4 byte
words, which is a classic little endian bug.

Signed-off-by: Tom Musta <tommusta@gmail.com>
---
 arch/powerpc/lib/sstep.c |   52 +++++++++++++++++++++++++++++++++++----------
 1 files changed, 40 insertions(+), 12 deletions(-)
Benjamin Herrenschmidt - Nov. 4, 2013, 2:34 a.m.
On Thu, 2013-10-31 at 13:38 -0500, Tom wrote:
> From: Tom Musta <tommusta@gmail.com>
> 
> This patch addresses unaligned single precision floating point loads
> and stores in the single-step code.  The old implementation
> improperly treated an 8 byte structure as an array of two 4 byte
> words, which is a classic little endian bug.

Do that patch differ from v1 ? I also already merged v1 of this
one (the only one I didn't merge is the emulate_step one)

Cheers,
Ben.

> Signed-off-by: Tom Musta <tommusta@gmail.com>
> ---
>  arch/powerpc/lib/sstep.c |   52 +++++++++++++++++++++++++++++++++++----------
>  1 files changed, 40 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
> index c8743e1..1cfd150 100644
> --- a/arch/powerpc/lib/sstep.c
> +++ b/arch/powerpc/lib/sstep.c
> @@ -355,22 +355,36 @@ static int __kprobes do_fp_load(int rn, int (*func)(int, unsigned long),
>  				struct pt_regs *regs)
>  {
>  	int err;
> -	unsigned long val[sizeof(double) / sizeof(long)];
> +	union {
> +		double dbl;
> +		unsigned long ul[2];
> +		struct {
> +#ifdef __BIG_ENDIAN__
> +			unsigned _pad_;
> +			unsigned word;
> +#endif
> +#ifdef __LITTLE_ENDIAN__
> +			unsigned word;
> +			unsigned _pad_;
> +#endif
> +		} single;
> +	} data;
>  	unsigned long ptr;
>  
>  	if (!address_ok(regs, ea, nb))
>  		return -EFAULT;
>  	if ((ea & 3) == 0)
>  		return (*func)(rn, ea);
> -	ptr = (unsigned long) &val[0];
> +	ptr = (unsigned long) &data.ul;
>  	if (sizeof(unsigned long) == 8 || nb == 4) {
> -		err = read_mem_unaligned(&val[0], ea, nb, regs);
> -		ptr += sizeof(unsigned long) - nb;
> +		err = read_mem_unaligned(&data.ul[0], ea, nb, regs);
> +		if (nb == 4)
> +			ptr = (unsigned long)&(data.single.word);
>  	} else {
>  		/* reading a double on 32-bit */
> -		err = read_mem_unaligned(&val[0], ea, 4, regs);
> +		err = read_mem_unaligned(&data.ul[0], ea, 4, regs);
>  		if (!err)
> -			err = read_mem_unaligned(&val[1], ea + 4, 4, regs);
> +			err = read_mem_unaligned(&data.ul[1], ea + 4, 4, regs);
>  	}
>  	if (err)
>  		return err;
> @@ -382,28 +396,42 @@ static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long),
>  				 struct pt_regs *regs)
>  {
>  	int err;
> -	unsigned long val[sizeof(double) / sizeof(long)];
> +	union {
> +		double dbl;
> +		unsigned long ul[2];
> +		struct {
> +#ifdef __BIG_ENDIAN__
> +			unsigned _pad_;
> +			unsigned word;
> +#endif
> +#ifdef __LITTLE_ENDIAN__
> +			unsigned word;
> +			unsigned _pad_;
> +#endif
> +		} single;
> +	} data;
>  	unsigned long ptr;
>  
>  	if (!address_ok(regs, ea, nb))
>  		return -EFAULT;
>  	if ((ea & 3) == 0)
>  		return (*func)(rn, ea);
> -	ptr = (unsigned long) &val[0];
> +	ptr = (unsigned long) &data.ul[0];
>  	if (sizeof(unsigned long) == 8 || nb == 4) {
> -		ptr += sizeof(unsigned long) - nb;
> +		if (nb == 4)
> +			ptr = (unsigned long)&(data.single.word);
>  		err = (*func)(rn, ptr);
>  		if (err)
>  			return err;
> -		err = write_mem_unaligned(val[0], ea, nb, regs);
> +		err = write_mem_unaligned(data.ul[0], ea, nb, regs);
>  	} else {
>  		/* writing a double on 32-bit */
>  		err = (*func)(rn, ptr);
>  		if (err)
>  			return err;
> -		err = write_mem_unaligned(val[0], ea, 4, regs);
> +		err = write_mem_unaligned(data.ul[0], ea, 4, regs);
>  		if (!err)
> -			err = write_mem_unaligned(val[1], ea + 4, 4, regs);
> +			err = write_mem_unaligned(data.ul[1], ea + 4, 4, regs);
>  	}
>  	return err;
>  }
Tom Musta - Nov. 4, 2013, 1:29 p.m.
On 11/3/2013 8:34 PM, Benjamin Herrenschmidt wrote:
> On Thu, 2013-10-31 at 13:38 -0500, Tom wrote:
>> From: Tom Musta <tommusta@gmail.com>
>>
>> This patch addresses unaligned single precision floating point loads
>> and stores in the single-step code.  The old implementation
>> improperly treated an 8 byte structure as an array of two 4 byte
>> words, which is a classic little endian bug.
> 
> Do that patch differ from v1 ? I also already merged v1 of this
> one (the only one I didn't merge is the emulate_step one)
> 
> Cheers,
> Ben.
>

Ben:  Only patch 1/3 (Enable emulate_step in Little Endian Mode) differs in V2.
Paul Mackerras - Dec. 11, 2013, 3:54 a.m.
On Thu, Oct 31, 2013 at 01:38:58PM -0500, Tom wrote:
> From: Tom Musta <tommusta@gmail.com>
> 
> This patch addresses unaligned single precision floating point loads
> and stores in the single-step code.  The old implementation
> improperly treated an 8 byte structure as an array of two 4 byte
> words, which is a classic little endian bug.
> 
> Signed-off-by: Tom Musta <tommusta@gmail.com>
> ---
>  arch/powerpc/lib/sstep.c |   52 +++++++++++++++++++++++++++++++++++----------
>  1 files changed, 40 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
> index c8743e1..1cfd150 100644
> --- a/arch/powerpc/lib/sstep.c
> +++ b/arch/powerpc/lib/sstep.c
> @@ -355,22 +355,36 @@ static int __kprobes do_fp_load(int rn, int (*func)(int, unsigned long),
>  				struct pt_regs *regs)
>  {
>  	int err;
> -	unsigned long val[sizeof(double) / sizeof(long)];
> +	union {
> +		double dbl;
> +		unsigned long ul[2];
> +		struct {
> +#ifdef __BIG_ENDIAN__
> +			unsigned _pad_;
> +			unsigned word;
> +#endif
> +#ifdef __LITTLE_ENDIAN__
> +			unsigned word;
> +			unsigned _pad_;
> +#endif
> +		} single;
> +	} data;
>  	unsigned long ptr;
>  
>  	if (!address_ok(regs, ea, nb))
>  		return -EFAULT;
>  	if ((ea & 3) == 0)
>  		return (*func)(rn, ea);
> -	ptr = (unsigned long) &val[0];
> +	ptr = (unsigned long) &data.ul;
>  	if (sizeof(unsigned long) == 8 || nb == 4) {
> -		err = read_mem_unaligned(&val[0], ea, nb, regs);
> -		ptr += sizeof(unsigned long) - nb;
> +		err = read_mem_unaligned(&data.ul[0], ea, nb, regs);
> +		if (nb == 4)
> +			ptr = (unsigned long)&(data.single.word);
>  	} else {
>  		/* reading a double on 32-bit */
> -		err = read_mem_unaligned(&val[0], ea, 4, regs);
> +		err = read_mem_unaligned(&data.ul[0], ea, 4, regs);
>  		if (!err)
> -			err = read_mem_unaligned(&val[1], ea + 4, 4, regs);
> +			err = read_mem_unaligned(&data.ul[1], ea + 4, 4, regs);

This breaks 32-bit big-endian (as well as making the code longer and
more complex).

In fact, to make this work for 64-bit little-endian, all you really
needed to do was ifdef out the statement:

		ptr += sizeof(unsigned long) - nb;

Paul.
Paul Mackerras - Dec. 11, 2013, 4:57 a.m.
On Wed, Dec 11, 2013 at 02:54:40PM +1100, Paul Mackerras wrote:
> On Thu, Oct 31, 2013 at 01:38:58PM -0500, Tom wrote:
> > From: Tom Musta <tommusta@gmail.com>
> > 
> > This patch addresses unaligned single precision floating point loads
> > and stores in the single-step code.  The old implementation
> > improperly treated an 8 byte structure as an array of two 4 byte
> > words, which is a classic little endian bug.
> > 
> > Signed-off-by: Tom Musta <tommusta@gmail.com>
> > ---
> >  arch/powerpc/lib/sstep.c |   52 +++++++++++++++++++++++++++++++++++----------
> >  1 files changed, 40 insertions(+), 12 deletions(-)
> > 
> > diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
> > index c8743e1..1cfd150 100644
> > --- a/arch/powerpc/lib/sstep.c
> > +++ b/arch/powerpc/lib/sstep.c
> > @@ -355,22 +355,36 @@ static int __kprobes do_fp_load(int rn, int (*func)(int, unsigned long),
> >  				struct pt_regs *regs)
> >  {
> >  	int err;
> > -	unsigned long val[sizeof(double) / sizeof(long)];
> > +	union {
> > +		double dbl;
> > +		unsigned long ul[2];
> > +		struct {
> > +#ifdef __BIG_ENDIAN__
> > +			unsigned _pad_;
> > +			unsigned word;
> > +#endif
> > +#ifdef __LITTLE_ENDIAN__
> > +			unsigned word;
> > +			unsigned _pad_;
> > +#endif
> > +		} single;
> > +	} data;
> >  	unsigned long ptr;
> >  
> >  	if (!address_ok(regs, ea, nb))
> >  		return -EFAULT;
> >  	if ((ea & 3) == 0)
> >  		return (*func)(rn, ea);
> > -	ptr = (unsigned long) &val[0];
> > +	ptr = (unsigned long) &data.ul;
> >  	if (sizeof(unsigned long) == 8 || nb == 4) {
> > -		err = read_mem_unaligned(&val[0], ea, nb, regs);
> > -		ptr += sizeof(unsigned long) - nb;
> > +		err = read_mem_unaligned(&data.ul[0], ea, nb, regs);
> > +		if (nb == 4)
> > +			ptr = (unsigned long)&(data.single.word);
> >  	} else {
> >  		/* reading a double on 32-bit */
> > -		err = read_mem_unaligned(&val[0], ea, 4, regs);
> > +		err = read_mem_unaligned(&data.ul[0], ea, 4, regs);
> >  		if (!err)
> > -			err = read_mem_unaligned(&val[1], ea + 4, 4, regs);
> > +			err = read_mem_unaligned(&data.ul[1], ea + 4, 4, regs);
> 
> This breaks 32-bit big-endian (as well as making the code longer and
> more complex).

And in fact none of this code will get executed in little-endian mode
anyway, since we still have this in the middle of emulate_step():

	/*
	 * Following cases are for loads and stores, so bail out
	 * if we're in little-endian mode.
	 */
	if (regs->msr & MSR_LE)
		return 0;

Paul.
Tom Musta - Dec. 12, 2013, 3:08 p.m.
On 12/10/2013 10:57 PM, Paul Mackerras wrote:
> On Wed, Dec 11, 2013 at 02:54:40PM +1100, Paul Mackerras wrote:

>> This breaks 32-bit big-endian (as well as making the code longer and
>> more complex).
> 
> And in fact none of this code will get executed in little-endian mode
> anyway, since we still have this in the middle of emulate_step():
> 
> 	/*
> 	 * Following cases are for loads and stores, so bail out
> 	 * if we're in little-endian mode.
> 	 */
> 	if (regs->msr & MSR_LE)
> 		return 0;
> 
> Paul.
> 

See patch 1/3 to explain how it becomes relevant in LE.

I will take another look at the change.
Tom Musta - Dec. 12, 2013, 8:33 p.m.
On 12/12/2013 9:08 AM, Tom Musta wrote:
> On 12/10/2013 10:57 PM, Paul Mackerras wrote:
>> On Wed, Dec 11, 2013 at 02:54:40PM +1100, Paul Mackerras wrote:
> 
>>> This breaks 32-bit big-endian (as well as making the code longer and
>>> more complex).
>>
>> And in fact none of this code will get executed in little-endian mode
>> anyway, since we still have this in the middle of emulate_step():
>>
>> 	/*
>> 	 * Following cases are for loads and stores, so bail out
>> 	 * if we're in little-endian mode.
>> 	 */
>> 	if (regs->msr & MSR_LE)
>> 		return 0;
>>
>> Paul.
>>
> 
> See patch 1/3 to explain how it becomes relevant in LE.
> 
> I will take another look at the change.
> 

It appears that patch 1/3 never got picked up, even though I thought Ben & I
had worked through that.

And I agree that the code could be simpler.  I will work up a patch to address
these two issues.
Paul Mackerras - Dec. 12, 2013, 9:19 p.m.
On Thu, Dec 12, 2013 at 02:33:36PM -0600, Tom Musta wrote:
> On 12/12/2013 9:08 AM, Tom Musta wrote:
> > On 12/10/2013 10:57 PM, Paul Mackerras wrote:
> >> On Wed, Dec 11, 2013 at 02:54:40PM +1100, Paul Mackerras wrote:
> > 
> >>> This breaks 32-bit big-endian (as well as making the code longer and
> >>> more complex).
> >>
> >> And in fact none of this code will get executed in little-endian mode
> >> anyway, since we still have this in the middle of emulate_step():
> >>
> >> 	/*
> >> 	 * Following cases are for loads and stores, so bail out
> >> 	 * if we're in little-endian mode.
> >> 	 */
> >> 	if (regs->msr & MSR_LE)
> >> 		return 0;
> >>
> >> Paul.
> >>
> > 
> > See patch 1/3 to explain how it becomes relevant in LE.
> > 
> > I will take another look at the change.
> > 
> 
> It appears that patch 1/3 never got picked up, even though I thought Ben & I
> had worked through that.
> 
> And I agree that the code could be simpler.  I will work up a patch to address
> these two issues.

The other thing that's important for us to know is how you are testing
these changes.  For something like this I'd like to see a description
of the tests you have done in the commit message.

I have been hacking on sstep.c pretty heavily myself recently, so we
will need to coordinate on the changes.

Paul.

Patch

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index c8743e1..1cfd150 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -355,22 +355,36 @@  static int __kprobes do_fp_load(int rn, int (*func)(int, unsigned long),
 				struct pt_regs *regs)
 {
 	int err;
-	unsigned long val[sizeof(double) / sizeof(long)];
+	union {
+		double dbl;
+		unsigned long ul[2];
+		struct {
+#ifdef __BIG_ENDIAN__
+			unsigned _pad_;
+			unsigned word;
+#endif
+#ifdef __LITTLE_ENDIAN__
+			unsigned word;
+			unsigned _pad_;
+#endif
+		} single;
+	} data;
 	unsigned long ptr;
 
 	if (!address_ok(regs, ea, nb))
 		return -EFAULT;
 	if ((ea & 3) == 0)
 		return (*func)(rn, ea);
-	ptr = (unsigned long) &val[0];
+	ptr = (unsigned long) &data.ul;
 	if (sizeof(unsigned long) == 8 || nb == 4) {
-		err = read_mem_unaligned(&val[0], ea, nb, regs);
-		ptr += sizeof(unsigned long) - nb;
+		err = read_mem_unaligned(&data.ul[0], ea, nb, regs);
+		if (nb == 4)
+			ptr = (unsigned long)&(data.single.word);
 	} else {
 		/* reading a double on 32-bit */
-		err = read_mem_unaligned(&val[0], ea, 4, regs);
+		err = read_mem_unaligned(&data.ul[0], ea, 4, regs);
 		if (!err)
-			err = read_mem_unaligned(&val[1], ea + 4, 4, regs);
+			err = read_mem_unaligned(&data.ul[1], ea + 4, 4, regs);
 	}
 	if (err)
 		return err;
@@ -382,28 +396,42 @@  static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long),
 				 struct pt_regs *regs)
 {
 	int err;
-	unsigned long val[sizeof(double) / sizeof(long)];
+	union {
+		double dbl;
+		unsigned long ul[2];
+		struct {
+#ifdef __BIG_ENDIAN__
+			unsigned _pad_;
+			unsigned word;
+#endif
+#ifdef __LITTLE_ENDIAN__
+			unsigned word;
+			unsigned _pad_;
+#endif
+		} single;
+	} data;
 	unsigned long ptr;
 
 	if (!address_ok(regs, ea, nb))
 		return -EFAULT;
 	if ((ea & 3) == 0)
 		return (*func)(rn, ea);
-	ptr = (unsigned long) &val[0];
+	ptr = (unsigned long) &data.ul[0];
 	if (sizeof(unsigned long) == 8 || nb == 4) {
-		ptr += sizeof(unsigned long) - nb;
+		if (nb == 4)
+			ptr = (unsigned long)&(data.single.word);
 		err = (*func)(rn, ptr);
 		if (err)
 			return err;
-		err = write_mem_unaligned(val[0], ea, nb, regs);
+		err = write_mem_unaligned(data.ul[0], ea, nb, regs);
 	} else {
 		/* writing a double on 32-bit */
 		err = (*func)(rn, ptr);
 		if (err)
 			return err;
-		err = write_mem_unaligned(val[0], ea, 4, regs);
+		err = write_mem_unaligned(data.ul[0], ea, 4, regs);
 		if (!err)
-			err = write_mem_unaligned(val[1], ea + 4, 4, regs);
+			err = write_mem_unaligned(data.ul[1], ea + 4, 4, regs);
 	}
 	return err;
 }