diff mbox

Fix reload inheritance of post-modified addresses.

Message ID g48vyssuds.fsf@richards-desktop-2.stglab.manchester.uk.ibm.com
State New
Headers show

Commit Message

Richard Sandiford Jan. 10, 2011, 2:31 p.m. UTC
This patch fixes a miscompilation of SPEC2006 gromacs with ARM+VFP.
Thanks to Dave Gilbert for narrowing down the failure to a particular
rtl instruction and a particular group of flags.

The original failure was seen on Thumb, but the brute-force testcase
in the patch triggers the bug for ARM as well.  

The problem occurs when both a post-modified address _and_ the
base register need reloads.  E.g.:

(insn 88 85 91 3 (set (reg:SF 313 [ *pointer4_5 ])
        (mem:SF (post_modify:SI (reg/v/f:SI 173 [ pointer4 ])
                (plus:SI (reg/v/f:SI 173 [ pointer4 ])
                    (const_int 12 [0xc]))) [2 *pointer4_5+0 S4 A32])) /tmp/postmod-2.c:40 630 {*movsf_vfp}
     (expr_list:REG_INC (reg/v/f:SI 173 [ pointer4 ])
        (nil)))
----
Reloads for insn # 88
Reload 0: reload_in (SI) = (reg/v/f:SI 173 [ pointer4 ])
	reload_out (SI) = (reg/v/f:SI 173 [ pointer4 ])
	CORE_REGS, RELOAD_OTHER (opnum = 1)
	reload_in_reg: (reg/v/f:SI 173 [ pointer4 ])
	reload_out_reg: (reg/v/f:SI 173 [ pointer4 ])
	reload_reg_rtx: (reg:SI 7 r7)
Reload 1: reload_in (SI) = (post_modify:SI (reg/v/f:SI 173 [ pointer4 ])
                                                    (plus:SI (reg/v/f:SI 173 [ pointer4 ])
                                                        (const_int 12 [0xc])))
	reload_out (VOID) = (post_modify:SI (reg/v/f:SI 173 [ pointer4 ])
                                                    (plus:SI (reg/v/f:SI 173 [ pointer4 ])
                                                        (const_int 12 [0xc])))
	CORE_REGS, RELOAD_FOR_INPUT (opnum = 1), inc by 4
	reload_in_reg: (post_modify:SI (reg/v/f:SI 173 [ pointer4 ])
                                                    (plus:SI (reg/v/f:SI 173 [ pointer4 ])
                                                        (const_int 12 [0xc])))
	reload_reg_rtx: (reg:SI 1 r1)

The inner reload (0) comes from find_reloads_address_1, while the
outer reload (1) comes from the constraint matching code (post-modifies
aren't valid for VFP loads).

In this case, reload will think that both reload registers -- r7 and r1 --
can be inherited by later instructions that need register 173.  This is true
of r7, which holds the final modified value, but not of r1, which holds
the deferenced (i.e. unmodified) address.

The equivalence is recorded by this part of emit_reloads:

	  /* Maybe the spill reg contains a copy of reload_out.  */
	  if (rld[r].out != 0
	      && (REG_P (rld[r].out)
#ifdef AUTO_INC_DEC
		  || ! rld[r].out_reg
#endif
		  || REG_P (rld[r].out_reg)))
	    {
	      rtx reg;
	      enum machine_mode mode;
	      int regno, nregs;

	      reg = reload_reg_rtx_for_output[r];
	      mode = GET_MODE (reg);
	      regno = REGNO (reg);
	      nregs = hard_regno_nregs[regno][mode];
	      if (reload_regs_reach_end_p (regno, nregs, rld[r].opnum,
					   rld[r].when_needed))
		{
		  rtx out = (REG_P (rld[r].out)
			     ? rld[r].out
			     : rld[r].out_reg
			     ? rld[r].out_reg
/* AUTO_INC */		     : XEXP (rld[r].in_reg, 0));
		  [...record that REGNO == OUT...]

where (rather cryptically) !rld[r].out_reg means that the reloaded
value is an automodified address, and where OUT will then be the
base register.  The REGNO == OUT equivalence is OK for PRE_MODIFY
addresses, where the address reload register holds the final value,
but not for POST_MODIFY addresses, where it holds the original value.

reload_as_needed has code to fix this up after the fact for POST_INC
and POST_DEC:

	  /* Likewise for regs altered by auto-increment in this insn.
	     REG_INC notes have been changed by reloading:
	     find_reloads_address_1 records substitutions for them,
	     which have been performed by subst_reloads above.  */
	  for (i = n_reloads - 1; i >= 0; i--)
	    {
	      rtx in_reg = rld[i].in_reg;
	      if (in_reg)
		{
		  enum rtx_code code = GET_CODE (in_reg);
		  /* PRE_INC / PRE_DEC will have the reload register ending up
		     with the same value as the stack slot, but that doesn't
		     hold true for POST_INC / POST_DEC.  Either we have to
		     convert the memory access to a true POST_INC / POST_DEC,
		     or we can't use the reload register for inheritance.  */
		  if ((code == POST_INC || code == POST_DEC)
		      && TEST_HARD_REG_BIT (reg_reloaded_valid,
					    REGNO (rld[i].reg_rtx))
		      /* Make sure it is the inc/dec pseudo, and not
			 some other (e.g. output operand) pseudo.  */
		      && ((unsigned) reg_reloaded_contents[REGNO (rld[i].reg_rtx)]
			  == REGNO (XEXP (in_reg, 0))))
		    [...reinstate a POST_{INC,DEC} or invalidate the
			equivalence...]

But this code wouldn't handle the testcase even if POST_MODIFY
were added to the list.  The code runs after subst_reloads,
so XEXP (in_reg, 0) would be the reload register while
reg_reloaded_contents[REGNO (rld[i].reg_rtx)] would be the reloaded
pseudo register.  We have pretty much lost track of the original
operand by this time.

The problem is that (post_dec X) and (post_inc X) are handled in a very

Comments

Ian Bolton Jan. 12, 2011, 10:29 a.m. UTC | #1
Hi Richard,

> This patch fixes a miscompilation of SPEC2006 gromacs with ARM+VFP.
> Thanks to Dave Gilbert for narrowing down the failure to a particular
> rtl instruction and a particular group of flags.

Your patch also fixes this:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47166

Thanks to Bernd for debugging this.

This patch is needed on trunk and 4.5 branch, so I hope it can be
reviewed soon.

Cheers,
Ian
Bernd Schmidt Jan. 12, 2011, 12:22 p.m. UTC | #2
On 01/12/2011 11:29 AM, Ian Bolton wrote:
> Hi Richard,
> 
>> This patch fixes a miscompilation of SPEC2006 gromacs with ARM+VFP.
>> Thanks to Dave Gilbert for narrowing down the failure to a particular
>> rtl instruction and a particular group of flags.
> 
> Your patch also fixes this:
> 
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47166
> 
> Thanks to Bernd for debugging this.

Actually that's a slightly different patch - I had to disable this code
for PRE_MODIFY as well.


Bernd
Richard Sandiford Jan. 12, 2011, 2:31 p.m. UTC | #3
Bernd Schmidt <bernds@codesourcery.com> writes:
> On 01/12/2011 11:29 AM, Ian Bolton wrote:
>> Hi Richard,
>> 
>>> This patch fixes a miscompilation of SPEC2006 gromacs with ARM+VFP.
>>> Thanks to Dave Gilbert for narrowing down the failure to a particular
>>> rtl instruction and a particular group of flags.
>> 
>> Your patch also fixes this:
>> 
>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47166
>> 
>> Thanks to Bernd for debugging this.
>
> Actually that's a slightly different patch - I had to disable this code
> for PRE_MODIFY as well.

Ah. :-)  What went wrong with PRE_MODIFY?  That big comment was trying to
justify why PRE_MODIFY was actually OK...

Richard
Bernd Schmidt Jan. 12, 2011, 2:36 p.m. UTC | #4
On 01/12/2011 03:31 PM, Richard Sandiford wrote:
> Bernd Schmidt <bernds@codesourcery.com> writes:
>> On 01/12/2011 11:29 AM, Ian Bolton wrote:
>>> Hi Richard,
>>>
>>>> This patch fixes a miscompilation of SPEC2006 gromacs with ARM+VFP.
>>>> Thanks to Dave Gilbert for narrowing down the failure to a particular
>>>> rtl instruction and a particular group of flags.
>>>
>>> Your patch also fixes this:
>>>
>>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47166
>>>
>>> Thanks to Bernd for debugging this.
>>
>> Actually that's a slightly different patch - I had to disable this code
>> for PRE_MODIFY as well.
> 
> Ah. :-)  What went wrong with PRE_MODIFY?  That big comment was trying to
> justify why PRE_MODIFY was actually OK...

One problem was that inc_for_reload returned the wrong insn. It
generated a sequence
  r1 = r1 + 280
  r3 = r1

with r1 being the reload reg for the inner pseudo, r3 the reload reg for
the PRE_MODIFY. It returns the add insn, causing us to set
spill_reg_store for R3 to an insn that sets R1. If that's fixed, the
other problem is that spill_reg_stored_to is set incorrectly. Presumably
it should be set to R3 here, but rather than add more interdependencies
I decided it would be best just to skip this code for PRE_MODIFY as well.


Bernd
Bernd Schmidt Jan. 12, 2011, 2:39 p.m. UTC | #5
On 01/12/2011 03:36 PM, Bernd Schmidt wrote:
> On 01/12/2011 03:31 PM, Richard Sandiford wrote:
>>
>> Ah. :-)  What went wrong with PRE_MODIFY?  That big comment was trying to
>> justify why PRE_MODIFY was actually OK...
> 
> One problem was that inc_for_reload returned the wrong insn. It
> generated a sequence
>   r1 = r1 + 280
>   r3 = r1
> 
> with r1 being the reload reg for the inner pseudo, r3 the reload reg for
> the PRE_MODIFY. It returns the add insn, causing us to set
> spill_reg_store for R3 to an insn that sets R1. If that's fixed, the
> other problem is that spill_reg_stored_to is set incorrectly. Presumably
> it should be set to R3 here, but rather than add more interdependencies
> I decided it would be best just to skip this code for PRE_MODIFY as well.

BTW, I think we should be running pass_auto_inc_dec after reload and
just delete all of that code.


Bernd
Richard Sandiford Jan. 12, 2011, 3:33 p.m. UTC | #6
Bernd Schmidt <bernds@codesourcery.com> writes:
> On 01/12/2011 03:31 PM, Richard Sandiford wrote:
>> Bernd Schmidt <bernds@codesourcery.com> writes:
>>> On 01/12/2011 11:29 AM, Ian Bolton wrote:
>>>> Hi Richard,
>>>>
>>>>> This patch fixes a miscompilation of SPEC2006 gromacs with ARM+VFP.
>>>>> Thanks to Dave Gilbert for narrowing down the failure to a particular
>>>>> rtl instruction and a particular group of flags.
>>>>
>>>> Your patch also fixes this:
>>>>
>>>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47166
>>>>
>>>> Thanks to Bernd for debugging this.
>>>
>>> Actually that's a slightly different patch - I had to disable this code
>>> for PRE_MODIFY as well.
>> 
>> Ah. :-)  What went wrong with PRE_MODIFY?  That big comment was trying to
>> justify why PRE_MODIFY was actually OK...
>
> One problem was that inc_for_reload returned the wrong insn. It
> generated a sequence
>   r1 = r1 + 280
>   r3 = r1
>
> with r1 being the reload reg for the inner pseudo, r3 the reload reg for
> the PRE_MODIFY. It returns the add insn, causing us to set
> spill_reg_store for R3 to an insn that sets R1. If that's fixed, the
> other problem is that spill_reg_stored_to is set incorrectly. Presumably
> it should be set to R3 here, but rather than add more interdependencies
> I decided it would be best just to skip this code for PRE_MODIFY as well.

Ah, right.  In that case I'll defer to your patch.

Richard
diff mbox

Patch

different way from (post_modify X Y) in situations where X isn't a suitable
base register, and hasn't been allocated a suitable base register.
In the case of POST_INC and POST_DEC, find_reloads_address_1 reloads
the whole POST_INC or POST_DEC.  In the case of POST_MODIFY,
find_reloads_address_1 just reloads X.

That is, for POST_INC and POST_DEC, an outer reload is used to handle
two cases:

(1) when X isn't a suitable register
(2) when X _is_ a suitable register, but the constraints don't allow
    auto-inc addresses.

The reload_as_needed code above then tries to reinstate the POST_INC or
POST_DEC if possible (presumably only of use for (1)).  It invalidates
the equivalence if this fails.

In contrast, POST_MODIFY uses an inner reload for (1) and an outer reload
for (2).  And it has to do that because Y might need reloading too.

Thus, AFAICT, the kind of inner+outer reload seen in the testcase can
only occur with {PRE,POST}_MODIFY, not with {PRE,POST}_{INC,DEC}.

I don't really like the way the POST_INC and POST_DEC code seems to
deliberately record an invalid equivalence and then fix it up later,
but it seems to have worked.  I'm reluctant to run the risk of
pessimising it (or introducing new bugs) by handling it in the same
way as POST_MODIFY.  I'll give it a try though if that's preferred.

As things stand, I think the patch below is the correct fix.

Tested on arm-linux-gnueabi using --with-float=softfp.  Also sanity-
checked by bootstrapping & regression testing on x86_64-linux-gnu.
OK to install?

Richard


gcc/
	* reload1.c (emit_reload_insns): Don't record equivalences for
	POST_MODIFY addresses.

gcc/testsuite/
	* gcc.c-torture/execute/postmod-1.c: New test.

Index: gcc/reload1.c
===================================================================
--- gcc/reload1.c	2011-01-10 09:28:36.000000000 +0000
+++ gcc/reload1.c	2011-01-10 14:21:07.000000000 +0000
@@ -8086,10 +8086,41 @@  emit_reload_insns (struct insn_chain *ch
 	  /* Maybe the spill reg contains a copy of reload_out.  */
 	  if (rld[r].out != 0
 	      && (REG_P (rld[r].out)
-#ifdef AUTO_INC_DEC
-		  || ! rld[r].out_reg
-#endif
-		  || REG_P (rld[r].out_reg)))
+		  || (rld[r].out_reg
+		      ? REG_P (rld[r].out_reg)
+		      /* The reload value is an auto-modification of
+			 some kind.  PRE_INC, POST_INC, PRE_DEC and
+			 POST_DEC reloads can occur in two situations:
+
+			 (1) if the operand isn't a base register
+			 (2) if the operand is a base register but the
+			     constraints don't allow auto-modified addresses.
+
+			 Neither situation should generate reloads of
+			 XEXP (rld[r].out, 0).
+
+			 For these four rtx codes, we record an equivalence
+			 between the reload register and the operand on the
+			 optimistic assumption that we can make the
+			 equivalence hold.  reload_as_needed must then
+			 either make it hold or invalidate the equivalence.
+
+			 PRE_MODIFY and POST_MODIFY addresses only occur
+			 for (2) above; (1) is handled by reloading the
+			 base register instead.  This makes it hard for
+			 reload_as_needed to fix things up in the same
+			 way as for *_INC and *_DEC, because the fixup
+			 code runs after the reload substitutions have
+			 been made.  We can't therefore make the same
+			 optimistic assumption in this case.
+
+			 inc_for_reload makes sure that PRE_MODIFY
+			 reloads leave the reload register holding the
+			 final _modified_ value, and that POST_MODIFY
+			 reloads leave the reload register holding the
+			 original _unmodified_ value.  We can only record
+			 an equivalence for the former case.  */
+		      : GET_CODE (rld[r].out) != POST_MODIFY)))
 	    {
 	      rtx reg;
 	      enum machine_mode mode;
Index: gcc/testsuite/gcc.c-torture/execute/postmod-1.c
===================================================================
--- /dev/null	2010-12-14 12:47:12.274544604 +0000
+++ gcc/testsuite/gcc.c-torture/execute/postmod-1.c	2011-01-10 13:52:05.000000000 +0000
@@ -0,0 +1,62 @@ 
+#define DECLARE_ARRAY(A) array##A[0x10]
+#define DECLARE_COUNTER(A) counter##A = 0
+#define DECLARE_POINTER(A) *pointer##A = array##A + x
+/* Create a loop that allows post-modification of pointerA, followed by
+   a use of the post-modified address.  */
+#define BEFORE(A) counter##A += *pointer##A, pointer##A += 3
+#define AFTER(A) counter##A += pointer##A[x]
+
+/* Set up the arrays so that one iteration of the loop sets the counter
+   to 3.0f.  */
+#define INIT_ARRAY(A) array##A[1] = 1.0f, array##A[5] = 2.0f
+
+/* Check that the loop worked correctly for all values.  */
+#define CHECK_ARRAY(A) exit_code |= (counter##A != 3.0f)
+
+/* Having 6 copies triggered the bug for ARM and Thumb.  */
+#define MANY(A) A (0), A (1), A (2), A (3), A (4), A (5)
+
+/* Each addendA should be allocated a register.  */
+#define INIT_VOLATILE(A) addend##A = vol
+#define ADD_VOLATILE(A) vol += addend##A
+
+/* Having 5 copies triggered the bug for ARM and Thumb.  */
+#define MANY2(A) A (0), A (1), A (2), A (3), A (4)
+
+float MANY (DECLARE_ARRAY);
+float MANY (DECLARE_COUNTER);
+
+volatile int stop = 1;
+volatile int vol;
+
+void __attribute__((noinline))
+foo (int x)
+{
+  float MANY (DECLARE_POINTER);
+  int i;
+
+  do
+    {
+      MANY (BEFORE);
+      MANY (AFTER);
+      /* Create an inner loop that should ensure the code above
+	 has registers free for reload inheritance.  */
+      {
+	int MANY2 (INIT_VOLATILE);
+	for (i = 0; i < 10; i++)
+	  MANY2 (ADD_VOLATILE);
+      }
+    }
+  while (!stop);
+}
+
+int
+main (void)
+{
+  int exit_code = 0;
+
+  MANY (INIT_ARRAY);
+  foo (1);
+  MANY (CHECK_ARRAY);
+  return exit_code;
+}