diff mbox series

[mid-end] Fix PR85123 incorrect copies

Message ID 20180405122903.GA509@arm.com
State New
Headers show
Series [mid-end] Fix PR85123 incorrect copies | expand

Commit Message

Tamar Christina April 5, 2018, 12:29 p.m. UTC
Hi All,

This patch fixes the code generation for copy_blkmode_to_reg by calculating
the bitsize per iteration doing the maximum copy allowed that does not read more
than the amount of bits left to copy.

This fixes the bad code generation reported and also still produces better code
in most cases. For targets that don't support fast unaligned access it defaults
to the old 1 byte copy (MIN alignment).

This produces for the copying of a 3 byte structure:

fun3:
	adrp	x1, .LANCHOR0
	add	x1, x1, :lo12:.LANCHOR0
	mov	x0, 0
	sub	sp, sp, #16
	ldrh	w2, [x1, 16]
	ldrb	w1, [x1, 18]
	add	sp, sp, 16
	bfi	x0, x2, 0, 16
	bfi	x0, x1, 16, 8
	ret

whereas before it was producing

fun3:
	adrp	x0, .LANCHOR0
	add	x2, x0, :lo12:.LANCHOR0
	sub	sp, sp, #16
	ldrh	w1, [x0, #:lo12:.LANCHOR0]
	ldrb	w0, [x2, 2]
	strh	w1, [sp, 8]
	strb	w0, [sp, 10]
	ldr	w0, [sp, 8]
	add	sp, sp, 16
	ret

Cross compiled on aarch64-none-elf and no issues
Bootstrapped powerpc64-unknown-linux-gnu, x86_64-pc-linux-gnu,
arm-none-linux-gnueabihf, aarch64-none-linux-gnu with no issues.

Regtested aarch64-none-elf, x86_64-pc-linux-gnu, powerpc64-unknown-linux-gnu
and arm-none-linux-gnueabihf and found no issues.

Regression on powerpc (pr63594-2.c) is fixed now.

OK for trunk?

Thanks,
Tamar

gcc/
2018-04-05  Tamar Christina  <tamar.christina@arm.com>

	PR middle-end/85123
	* expr.c (copy_blkmode_to_reg): Fix wrong code gen.

--

Comments

Alan Modra April 5, 2018, 11:45 p.m. UTC | #1
On Thu, Apr 05, 2018 at 01:29:06PM +0100, Tamar Christina wrote:
> diff --git a/gcc/expr.c b/gcc/expr.c
> index 00660293f72e5441a6421a280b04c57fca2922b8..7daeb8c91d758edf0b3dc37f6927380b6f3df877 100644
> --- a/gcc/expr.c
> +++ b/gcc/expr.c
> @@ -2749,7 +2749,7 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
>  {
>    int i, n_regs;
>    unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0, bytes;
> -  unsigned int bitsize;
> +   unsigned int bitsize = 0;
>    rtx *dst_words, dst, x, src_word = NULL_RTX, dst_word = NULL_RTX;
>    /* No current ABI uses variable-sized modes to pass a BLKmnode type.  */
>    fixed_size_mode mode = as_a <fixed_size_mode> (mode_in);
> @@ -2782,7 +2782,7 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
>  
>    n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
>    dst_words = XALLOCAVEC (rtx, n_regs);
> -  bitsize = BITS_PER_WORD;
> +
>    if (targetm.slow_unaligned_access (word_mode, TYPE_ALIGN (TREE_TYPE (src))))
>      bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
>  

You calculate bitsize here, then override it in the loop?  Doesn't
that mean strict align targets will use mis-aligned loads and stores?

> @@ -2791,6 +2791,17 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
>         bitpos < bytes * BITS_PER_UNIT;
>         bitpos += bitsize, xbitpos += bitsize)
>      {
> +      /* Find the largest integer mode that can be used to copy all or as
> +	 many bits as possible of the structure.  */
> +      opt_scalar_int_mode mode_iter;
> +      FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
> +      if (GET_MODE_BITSIZE (mode_iter.require ())
> +			    <= ((bytes * BITS_PER_UNIT) - bitpos)
> +	  && GET_MODE_BITSIZE (mode_iter.require ()) <= BITS_PER_WORD)
> +	bitsize = GET_MODE_BITSIZE (mode_iter.require ());
> +      else
> +	break;
> +

This isn't correct.  Consider a 6 byte struct on a 4 byte word, 8 bit
byte, big-endian target when targetm.calls.return_in_msb is false.

In this scenario, copy_blkmode_to_reg should return two registers, set
as if they had been loaded from two words in memory laid out as
follows (left to right increasing byte addresses):
 _______________________         _______________________
|  0  |  0  |  s0 |  s1 |       |  s2 |  s3 |  s4 |  s5 |
 ~~~~~~~~~~~~~~~~~~~~~~~         ~~~~~~~~~~~~~~~~~~~~~~~

So we will have xbitpos=16 first time around the loop.  That means
your new code will attempt to store 32 bits into a bit-field starting
at bit 16 in the first 32-bit register, and of course fail.

This scenario used to be handled correctly, at least when the struct
wasn't over-aligned.
Richard Biener April 6, 2018, 10:23 a.m. UTC | #2
On Fri, 6 Apr 2018, Alan Modra wrote:

> On Thu, Apr 05, 2018 at 01:29:06PM +0100, Tamar Christina wrote:
> > diff --git a/gcc/expr.c b/gcc/expr.c
> > index 00660293f72e5441a6421a280b04c57fca2922b8..7daeb8c91d758edf0b3dc37f6927380b6f3df877 100644
> > --- a/gcc/expr.c
> > +++ b/gcc/expr.c
> > @@ -2749,7 +2749,7 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
> >  {
> >    int i, n_regs;
> >    unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0, bytes;
> > -  unsigned int bitsize;
> > +   unsigned int bitsize = 0;
> >    rtx *dst_words, dst, x, src_word = NULL_RTX, dst_word = NULL_RTX;
> >    /* No current ABI uses variable-sized modes to pass a BLKmnode type.  */
> >    fixed_size_mode mode = as_a <fixed_size_mode> (mode_in);
> > @@ -2782,7 +2782,7 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
> >  
> >    n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
> >    dst_words = XALLOCAVEC (rtx, n_regs);
> > -  bitsize = BITS_PER_WORD;
> > +
> >    if (targetm.slow_unaligned_access (word_mode, TYPE_ALIGN (TREE_TYPE (src))))
> >      bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
> >  
> 
> You calculate bitsize here, then override it in the loop?  Doesn't
> that mean strict align targets will use mis-aligned loads and stores?
> 
> > @@ -2791,6 +2791,17 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
> >         bitpos < bytes * BITS_PER_UNIT;
> >         bitpos += bitsize, xbitpos += bitsize)
> >      {
> > +      /* Find the largest integer mode that can be used to copy all or as
> > +	 many bits as possible of the structure.  */
> > +      opt_scalar_int_mode mode_iter;
> > +      FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
> > +      if (GET_MODE_BITSIZE (mode_iter.require ())
> > +			    <= ((bytes * BITS_PER_UNIT) - bitpos)
> > +	  && GET_MODE_BITSIZE (mode_iter.require ()) <= BITS_PER_WORD)
> > +	bitsize = GET_MODE_BITSIZE (mode_iter.require ());
> > +      else
> > +	break;
> > +
> 
> This isn't correct.  Consider a 6 byte struct on a 4 byte word, 8 bit
> byte, big-endian target when targetm.calls.return_in_msb is false.
> 
> In this scenario, copy_blkmode_to_reg should return two registers, set
> as if they had been loaded from two words in memory laid out as
> follows (left to right increasing byte addresses):
>  _______________________         _______________________
> |  0  |  0  |  s0 |  s1 |       |  s2 |  s3 |  s4 |  s5 |
>  ~~~~~~~~~~~~~~~~~~~~~~~         ~~~~~~~~~~~~~~~~~~~~~~~
> 
> So we will have xbitpos=16 first time around the loop.  That means
> your new code will attempt to store 32 bits into a bit-field starting
> at bit 16 in the first 32-bit register, and of course fail.
> 
> This scenario used to be handled correctly, at least when the struct
> wasn't over-aligned.

I wonder if it's best to revert the original regression causing patch
and look for a proper solution in the GCC 9 timeframe?

Thanks,
Richard.
Eric Botcazou April 6, 2018, 11:25 a.m. UTC | #3
> I wonder if it's best to revert the original regression causing patch
> and look for a proper solution in the GCC 9 timeframe?

Probably the best course of action indeed, as changes in this area need to be 
thoroughly tested on various targets to cover all the cases.
Richard Biener April 6, 2018, 11:35 a.m. UTC | #4
On Fri, 6 Apr 2018, Eric Botcazou wrote:

> > I wonder if it's best to revert the original regression causing patch
> > and look for a proper solution in the GCC 9 timeframe?
> 
> Probably the best course of action indeed, as changes in this area need to be 
> thoroughly tested on various targets to cover all the cases.

So please go ahead with that now.  If we wind up with an obviously
correct solution we can reconsider later, backporting to GCC 8.2+ is
possible as well.

Thanks,
Richard.
Tamar Christina April 6, 2018, 12:55 p.m. UTC | #5
> -----Original Message-----
> From: Richard Biener <rguenther@suse.de>
> Sent: Friday, April 6, 2018 12:36
> To: Eric Botcazou <ebotcazou@adacore.com>
> Cc: gcc-patches@gcc.gnu.org; Alan Modra <amodra@gmail.com>; Tamar
> Christina <Tamar.Christina@arm.com>; nd <nd@arm.com>;
> law@redhat.com; ian@airs.com; bergner@vnet.ibm.com
> Subject: Re: [PATCH][GCC][mid-end] Fix PR85123 incorrect copies
> 
> On Fri, 6 Apr 2018, Eric Botcazou wrote:
> 
> > > I wonder if it's best to revert the original regression causing
> > > patch and look for a proper solution in the GCC 9 timeframe?
> >
> > Probably the best course of action indeed, as changes in this area
> > need to be thoroughly tested on various targets to cover all the cases.
> 
> So please go ahead with that now.  If we wind up with an obviously correct
> solution we can reconsider later, backporting to GCC 8.2+ is possible as well.

I was going to suggest a more conservative approach of only doing it if there is no
Padding correction. Since it seems to me the only targets that have an issue are those
which use padding corrections.

But I'll revert the patch then.

> 
> Thanks,
> Richard.
Tamar Christina April 6, 2018, 1:17 p.m. UTC | #6
Patch has been reverted as r259169 as requested.

Sorry for the breakage,
Tamar.

> -----Original Message-----
> From: Richard Biener <rguenther@suse.de>
> Sent: Friday, April 6, 2018 12:36
> To: Eric Botcazou <ebotcazou@adacore.com>
> Cc: gcc-patches@gcc.gnu.org; Alan Modra <amodra@gmail.com>; Tamar
> Christina <Tamar.Christina@arm.com>; nd <nd@arm.com>;
> law@redhat.com; ian@airs.com; bergner@vnet.ibm.com
> Subject: Re: [PATCH][GCC][mid-end] Fix PR85123 incorrect copies
> 
> On Fri, 6 Apr 2018, Eric Botcazou wrote:
> 
> > > I wonder if it's best to revert the original regression causing
> > > patch and look for a proper solution in the GCC 9 timeframe?
> >
> > Probably the best course of action indeed, as changes in this area
> > need to be thoroughly tested on various targets to cover all the cases.
> 
> So please go ahead with that now.  If we wind up with an obviously correct
> solution we can reconsider later, backporting to GCC 8.2+ is possible as well.
> 
> Thanks,
> Richard.
diff mbox series

Patch

diff --git a/gcc/expr.c b/gcc/expr.c
index 00660293f72e5441a6421a280b04c57fca2922b8..7daeb8c91d758edf0b3dc37f6927380b6f3df877 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -2749,7 +2749,7 @@  copy_blkmode_to_reg (machine_mode mode_in, tree src)
 {
   int i, n_regs;
   unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0, bytes;
-  unsigned int bitsize;
+   unsigned int bitsize = 0;
   rtx *dst_words, dst, x, src_word = NULL_RTX, dst_word = NULL_RTX;
   /* No current ABI uses variable-sized modes to pass a BLKmnode type.  */
   fixed_size_mode mode = as_a <fixed_size_mode> (mode_in);
@@ -2782,7 +2782,7 @@  copy_blkmode_to_reg (machine_mode mode_in, tree src)
 
   n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
   dst_words = XALLOCAVEC (rtx, n_regs);
-  bitsize = BITS_PER_WORD;
+
   if (targetm.slow_unaligned_access (word_mode, TYPE_ALIGN (TREE_TYPE (src))))
     bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
 
@@ -2791,6 +2791,17 @@  copy_blkmode_to_reg (machine_mode mode_in, tree src)
        bitpos < bytes * BITS_PER_UNIT;
        bitpos += bitsize, xbitpos += bitsize)
     {
+      /* Find the largest integer mode that can be used to copy all or as
+	 many bits as possible of the structure.  */
+      opt_scalar_int_mode mode_iter;
+      FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
+      if (GET_MODE_BITSIZE (mode_iter.require ())
+			    <= ((bytes * BITS_PER_UNIT) - bitpos)
+	  && GET_MODE_BITSIZE (mode_iter.require ()) <= BITS_PER_WORD)
+	bitsize = GET_MODE_BITSIZE (mode_iter.require ());
+      else
+	break;
+
       /* We need a new destination pseudo each time xbitpos is
 	 on a word boundary and when xbitpos == padding_correction
 	 (the first time through).  */