diff mbox

PR 69577: Invalid RA of destination subregs

Message ID 87lh73hu6w.fsf@e105548-lin.cambridge.arm.com
State New
Headers show

Commit Message

Richard Sandiford Feb. 2, 2016, 5:56 p.m. UTC
[Resending without disclaimer, sorry]

Uros Bizjak <ubizjak@gmail.com> writes:
> On Tue, Feb 2, 2016 at 5:54 PM, Kyrill Tkachov
> <kyrylo.tkachov@foss.arm.com> wrote:
>> Hi Richard,
>>
>>
>> On 02/02/16 14:56, Richard Sandiford wrote:
>>>
>>> In PR 69577 we have:
>>>
>>>    A: (set (reg:V2TI X) ...)
>>>    B: (set (subreg:TI (reg:V2TI X) 0) ...)
>>>
>>> X gets allocated to an AVX register, as usual for V2TI.  The problem is
>>> that the movti for B doesn't then preserve the other half of X, even
>>> though the subreg semantics are supposed to guarantee that.
>>>
>>> If instead the same value had been set by:
>>>
>>>    A': (set (subreg:TI (reg:V2TI X) 16) ...)
>>>    B: (set (subreg:TI (reg:V2TI X) 0) ...)
>>>
>>> the subreg in A' would have prevented the use of AVX registers for X,
>>> since you can't directly access the high part.
>>>
>>> IMO these are really the same thing.  An alternative way to view it
>>> is that the original sequence is equivalent to:
>>>
>>>    A: (set (reg:V2TI X) ...)
>>>    B1: (set (subreg:TI (reg:V2TI X) 0) ...)
>>>    B2: (set (subreg:TI (reg:V2TI X) 16) (subreg:TI (reg:V2TI X) 16))
>>>
>>> in which B2 is a no-op and therefore implicit.  The handling ought
>>> to be the same regardless of whether there is an rtl insn that
>>> explicitly assigns to (subreg:TI (reg:V2TI X) 16).
>>>
>>> This patch implements that idea.  Hopefully the comments explain
>>> what's going on.
>>>
>>> Tested on x86_64-linux-gnu so far.  Will test on aarch64-linux-gnu and
>>> arm-linux-gnueabihf as well.  OK to install if the additional testing
>>> succeeds?
>>
>>
>> For me this patch causes an ICE when building libgcc during an
>> aarch64-none-elf build.
>> It's a segfault with the trace:
>> 0xb0ac2a crash_signal
>>         $SRC/gcc/toplev.c:335
>> 0xa7cfd7 init_subregs_of_mode()
>>         $SRC/gcc/reginfo.c:1345
>> 0x96fc4b init_costs
>>         $SRC/gcc/ira-costs.c:2187
>> 0x97419e ira_set_pseudo_classes(bool, _IO_FILE*)
>>         $SRC/gcc/ira-costs.c:2237
>> 0x106fd1e alloc_global_sched_pressure_data
>>         $SRC/gcc/haifa-sched.c:7244
>> 0x106fd1e sched_init()
>>         $SRC/gcc/haifa-sched.c:7394
>> 0x107109a haifa_sched_init()
>>         $SRC/gcc/haifa-sched.c:7406
>> 0xab37ac schedule_insns()
>>         $SRC/gcc/sched-rgn.c:3504
>> 0xab3f5b rest_of_handle_sched
>>         $SRC/gcc/sched-rgn.c:3717
>> 0xab3f5b execute
>>         $SRC/gcc/sched-rgn.c:3825
>
> Also on x86_64-linux-gnu when building -m32 multilib:
>
> Program received signal SIGSEGV, Segmentation fault.
> 0x0000000000d28264 in init_subregs_of_mode () at
> /home/uros/gcc-svn/trunk/gcc/reginfo.c:1345
> 1345            FOR_EACH_INSN_DEF (def, insn)
> (gdb) p insn
> $1 = (rtx_insn *) 0x7fffef9f4d40
> (gdb) p debug_rtx (insn)
> (code_label 60 31 39 10 9 "" [3 uses])
> $2 = void
> (gdb) p def
> $3 = (df_ref) 0x0

Bah, sorry.  I test with --enable-checking=yes,rtl,df, and it turns out
that df checking masks this kind of problem.  -m32 builds (and tests)
fine with it but not without.

Here's the patch again with the obvious fix.  Retesting now with just
--enable-checking=yes,rtl.

Thanks,
Richard


gcc/
	PR rtl-optimization/69577
	* reginfo.c (record_subregs_of_mode): Add a partial_def parameter.
	(find_subregs_of_mode): Update accordingly.  Iterate over partial
	definitions.

gcc/testsuite/
	PR rtl-optimization/69577
	* gcc.target/i386/pr69577.c: New test.

Comments

Richard Sandiford Feb. 3, 2016, 10:35 p.m. UTC | #1
Richard Sandiford <richard.sandiford@arm.com> writes:
> Uros Bizjak <ubizjak@gmail.com> writes:
>> On Tue, Feb 2, 2016 at 5:54 PM, Kyrill Tkachov
>> <kyrylo.tkachov@foss.arm.com> wrote:
>>> Hi Richard,
>>>
>>>
>>> On 02/02/16 14:56, Richard Sandiford wrote:
>>>>
>>>> In PR 69577 we have:
>>>>
>>>>    A: (set (reg:V2TI X) ...)
>>>>    B: (set (subreg:TI (reg:V2TI X) 0) ...)
>>>>
>>>> X gets allocated to an AVX register, as usual for V2TI.  The problem is
>>>> that the movti for B doesn't then preserve the other half of X, even
>>>> though the subreg semantics are supposed to guarantee that.
>>>>
>>>> If instead the same value had been set by:
>>>>
>>>>    A': (set (subreg:TI (reg:V2TI X) 16) ...)
>>>>    B: (set (subreg:TI (reg:V2TI X) 0) ...)
>>>>
>>>> the subreg in A' would have prevented the use of AVX registers for X,
>>>> since you can't directly access the high part.
>>>>
>>>> IMO these are really the same thing.  An alternative way to view it
>>>> is that the original sequence is equivalent to:
>>>>
>>>>    A: (set (reg:V2TI X) ...)
>>>>    B1: (set (subreg:TI (reg:V2TI X) 0) ...)
>>>>    B2: (set (subreg:TI (reg:V2TI X) 16) (subreg:TI (reg:V2TI X) 16))
>>>>
>>>> in which B2 is a no-op and therefore implicit.  The handling ought
>>>> to be the same regardless of whether there is an rtl insn that
>>>> explicitly assigns to (subreg:TI (reg:V2TI X) 16).
>>>>
>>>> This patch implements that idea.  Hopefully the comments explain
>>>> what's going on.
>>>>
>>>> Tested on x86_64-linux-gnu so far.  Will test on aarch64-linux-gnu and
>>>> arm-linux-gnueabihf as well.  OK to install if the additional testing
>>>> succeeds?
>>>
>>>
>>> For me this patch causes an ICE when building libgcc during an
>>> aarch64-none-elf build.
>>> It's a segfault with the trace:
>>> 0xb0ac2a crash_signal
>>>         $SRC/gcc/toplev.c:335
>>> 0xa7cfd7 init_subregs_of_mode()
>>>         $SRC/gcc/reginfo.c:1345
>>> 0x96fc4b init_costs
>>>         $SRC/gcc/ira-costs.c:2187
>>> 0x97419e ira_set_pseudo_classes(bool, _IO_FILE*)
>>>         $SRC/gcc/ira-costs.c:2237
>>> 0x106fd1e alloc_global_sched_pressure_data
>>>         $SRC/gcc/haifa-sched.c:7244
>>> 0x106fd1e sched_init()
>>>         $SRC/gcc/haifa-sched.c:7394
>>> 0x107109a haifa_sched_init()
>>>         $SRC/gcc/haifa-sched.c:7406
>>> 0xab37ac schedule_insns()
>>>         $SRC/gcc/sched-rgn.c:3504
>>> 0xab3f5b rest_of_handle_sched
>>>         $SRC/gcc/sched-rgn.c:3717
>>> 0xab3f5b execute
>>>         $SRC/gcc/sched-rgn.c:3825
>>
>> Also on x86_64-linux-gnu when building -m32 multilib:
>>
>> Program received signal SIGSEGV, Segmentation fault.
>> 0x0000000000d28264 in init_subregs_of_mode () at
>> /home/uros/gcc-svn/trunk/gcc/reginfo.c:1345
>> 1345            FOR_EACH_INSN_DEF (def, insn)
>> (gdb) p insn
>> $1 = (rtx_insn *) 0x7fffef9f4d40
>> (gdb) p debug_rtx (insn)
>> (code_label 60 31 39 10 9 "" [3 uses])
>> $2 = void
>> (gdb) p def
>> $3 = (df_ref) 0x0
>
> Bah, sorry.  I test with --enable-checking=yes,rtl,df, and it turns out
> that df checking masks this kind of problem.  -m32 builds (and tests)
> fine with it but not without.
>
> Here's the patch again with the obvious fix.  Retesting now with just
> --enable-checking=yes,rtl.

Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabihf.
OK to install?

Thanks,
Richard

> gcc/
> 	PR rtl-optimization/69577
> 	* reginfo.c (record_subregs_of_mode): Add a partial_def parameter.
> 	(find_subregs_of_mode): Update accordingly.  Iterate over partial
> 	definitions.
>
> gcc/testsuite/
> 	PR rtl-optimization/69577
> 	* gcc.target/i386/pr69577.c: New test.
>
> diff --git a/gcc/reginfo.c b/gcc/reginfo.c
> index 6814eed..ccf53bf 100644
> --- a/gcc/reginfo.c
> +++ b/gcc/reginfo.c
> @@ -1244,8 +1244,16 @@ simplifiable_subregs (const subreg_shape &shape)
>  static HARD_REG_SET **valid_mode_changes;
>  static obstack valid_mode_changes_obstack;
>  
> +/* Restrict the choice of register for SUBREG_REG (SUBREG) based
> +   on information about SUBREG.
> +
> +   If PARTIAL_DEF, SUBREG is a partial definition of a multipart inner
> +   register and we want to ensure that the other parts of the inner
> +   register are correctly preserved.  If !PARTIAL_DEF we need to
> +   ensure that SUBREG itself can be formed.  */
> +
>  static void
> -record_subregs_of_mode (rtx subreg)
> +record_subregs_of_mode (rtx subreg, bool partial_def)
>  {
>    unsigned int regno;
>  
> @@ -1256,15 +1264,41 @@ record_subregs_of_mode (rtx subreg)
>    if (regno < FIRST_PSEUDO_REGISTER)
>      return;
>  
> +  subreg_shape shape (shape_of_subreg (subreg));
> +  if (partial_def)
> +    {
> +      /* The number of independently-accessible SHAPE.outer_mode values
> +	 in SHAPE.inner_mode is GET_MODE_SIZE (SHAPE.inner_mode) / SIZE.
> +	 We need to check that the assignment will preserve all the other
> +	 SIZE-byte chunks in the inner register besides the one that
> +	 includes SUBREG.
> +
> +	 In practice it is enough to check whether an equivalent
> +	 SHAPE.inner_mode value in an adjacent SIZE-byte chunk can be formed.
> +	 If the underlying registers are small enough, both subregs will
> +	 be valid.  If the underlying registers are too large, one of the
> +	 subregs will be invalid.
> +
> +	 This relies on the fact that we've already been passed
> +	 SUBREG with PARTIAL_DEF set to false.  */
> +      unsigned int size = MAX (REGMODE_NATURAL_SIZE (shape.inner_mode),
> +			       GET_MODE_SIZE (shape.outer_mode));
> +      gcc_checking_assert (size < GET_MODE_SIZE (shape.inner_mode));
> +      if (shape.offset >= size)
> +	shape.offset -= size;
> +      else
> +	shape.offset += size;
> +    }
> +
>    if (valid_mode_changes[regno])
>      AND_HARD_REG_SET (*valid_mode_changes[regno],
> -		      simplifiable_subregs (shape_of_subreg (subreg)));
> +		      simplifiable_subregs (shape));
>    else
>      {
>        valid_mode_changes[regno]
>  	= XOBNEW (&valid_mode_changes_obstack, HARD_REG_SET);
>        COPY_HARD_REG_SET (*valid_mode_changes[regno],
> -			 simplifiable_subregs (shape_of_subreg (subreg)));
> +			 simplifiable_subregs (shape));
>      }
>  }
>  
> @@ -1277,7 +1311,7 @@ find_subregs_of_mode (rtx x)
>    int i;
>  
>    if (code == SUBREG)
> -    record_subregs_of_mode (x);
> +    record_subregs_of_mode (x, false);
>  
>    /* Time for some deep diving.  */
>    for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
> @@ -1305,7 +1339,14 @@ init_subregs_of_mode (void)
>    FOR_EACH_BB_FN (bb, cfun)
>      FOR_BB_INSNS (bb, insn)
>        if (NONDEBUG_INSN_P (insn))
> -        find_subregs_of_mode (PATTERN (insn));
> +	{
> +	  find_subregs_of_mode (PATTERN (insn));
> +	  df_ref def;
> +	  FOR_EACH_INSN_DEF (def, insn)
> +	    if (DF_REF_FLAGS_IS_SET (def, DF_REF_PARTIAL)
> +		&& df_read_modify_subreg_p (DF_REF_REG (def)))
> +	      record_subregs_of_mode (DF_REF_REG (def), true);
> +	}
>  }
>  
>  const HARD_REG_SET *
> diff --git a/gcc/testsuite/gcc.target/i386/pr69577.c b/gcc/testsuite/gcc.target/i386/pr69577.c
> new file mode 100644
> index 0000000..d680539
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr69577.c
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-options "-O -fno-forward-propagate -fno-split-wide-types -mavx" } */
> +
> +typedef unsigned int u32;
> +typedef unsigned __int128 u128;
> +typedef unsigned __int128 v32u128 __attribute__ ((vector_size (32)));
> +
> +u128 __attribute__ ((noinline, noclone))
> +foo (u32 u32_0, v32u128 v32u128_0)
> +{
> +  v32u128_0[0] >>= u32_0;
> +  v32u128_0 += (v32u128) {u32_0, 0};
> +  return u32_0 + v32u128_0[0] + v32u128_0[1];
> +}
> +
> +int
> +main()
> +{
> +  u128 x = foo (1, (v32u128) {1, 4});
> +  if (x != 6)
> +    __builtin_abort ();
> +  return 0;
> +}
Richard Henderson Feb. 4, 2016, 2:27 a.m. UTC | #2
On 02/03/2016 04:56 AM, Richard Sandiford wrote:
>
> gcc/
> 	PR rtl-optimization/69577
> 	* reginfo.c (record_subregs_of_mode): Add a partial_def parameter.
> 	(find_subregs_of_mode): Update accordingly.  Iterate over partial
> 	definitions.
>
> gcc/testsuite/
> 	PR rtl-optimization/69577
> 	* gcc.target/i386/pr69577.c: New test.

Ok.


r~
diff mbox

Patch

diff --git a/gcc/reginfo.c b/gcc/reginfo.c
index 6814eed..ccf53bf 100644
--- a/gcc/reginfo.c
+++ b/gcc/reginfo.c
@@ -1244,8 +1244,16 @@  simplifiable_subregs (const subreg_shape &shape)
 static HARD_REG_SET **valid_mode_changes;
 static obstack valid_mode_changes_obstack;
 
+/* Restrict the choice of register for SUBREG_REG (SUBREG) based
+   on information about SUBREG.
+
+   If PARTIAL_DEF, SUBREG is a partial definition of a multipart inner
+   register and we want to ensure that the other parts of the inner
+   register are correctly preserved.  If !PARTIAL_DEF we need to
+   ensure that SUBREG itself can be formed.  */
+
 static void
-record_subregs_of_mode (rtx subreg)
+record_subregs_of_mode (rtx subreg, bool partial_def)
 {
   unsigned int regno;
 
@@ -1256,15 +1264,41 @@  record_subregs_of_mode (rtx subreg)
   if (regno < FIRST_PSEUDO_REGISTER)
     return;
 
+  subreg_shape shape (shape_of_subreg (subreg));
+  if (partial_def)
+    {
+      /* The number of independently-accessible SHAPE.outer_mode values
+	 in SHAPE.inner_mode is GET_MODE_SIZE (SHAPE.inner_mode) / SIZE.
+	 We need to check that the assignment will preserve all the other
+	 SIZE-byte chunks in the inner register besides the one that
+	 includes SUBREG.
+
+	 In practice it is enough to check whether an equivalent
+	 SHAPE.inner_mode value in an adjacent SIZE-byte chunk can be formed.
+	 If the underlying registers are small enough, both subregs will
+	 be valid.  If the underlying registers are too large, one of the
+	 subregs will be invalid.
+
+	 This relies on the fact that we've already been passed
+	 SUBREG with PARTIAL_DEF set to false.  */
+      unsigned int size = MAX (REGMODE_NATURAL_SIZE (shape.inner_mode),
+			       GET_MODE_SIZE (shape.outer_mode));
+      gcc_checking_assert (size < GET_MODE_SIZE (shape.inner_mode));
+      if (shape.offset >= size)
+	shape.offset -= size;
+      else
+	shape.offset += size;
+    }
+
   if (valid_mode_changes[regno])
     AND_HARD_REG_SET (*valid_mode_changes[regno],
-		      simplifiable_subregs (shape_of_subreg (subreg)));
+		      simplifiable_subregs (shape));
   else
     {
       valid_mode_changes[regno]
 	= XOBNEW (&valid_mode_changes_obstack, HARD_REG_SET);
       COPY_HARD_REG_SET (*valid_mode_changes[regno],
-			 simplifiable_subregs (shape_of_subreg (subreg)));
+			 simplifiable_subregs (shape));
     }
 }
 
@@ -1277,7 +1311,7 @@  find_subregs_of_mode (rtx x)
   int i;
 
   if (code == SUBREG)
-    record_subregs_of_mode (x);
+    record_subregs_of_mode (x, false);
 
   /* Time for some deep diving.  */
   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
@@ -1305,7 +1339,14 @@  init_subregs_of_mode (void)
   FOR_EACH_BB_FN (bb, cfun)
     FOR_BB_INSNS (bb, insn)
       if (NONDEBUG_INSN_P (insn))
-        find_subregs_of_mode (PATTERN (insn));
+	{
+	  find_subregs_of_mode (PATTERN (insn));
+	  df_ref def;
+	  FOR_EACH_INSN_DEF (def, insn)
+	    if (DF_REF_FLAGS_IS_SET (def, DF_REF_PARTIAL)
+		&& df_read_modify_subreg_p (DF_REF_REG (def)))
+	      record_subregs_of_mode (DF_REF_REG (def), true);
+	}
 }
 
 const HARD_REG_SET *
diff --git a/gcc/testsuite/gcc.target/i386/pr69577.c b/gcc/testsuite/gcc.target/i386/pr69577.c
new file mode 100644
index 0000000..d680539
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr69577.c
@@ -0,0 +1,25 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-O -fno-forward-propagate -fno-split-wide-types -mavx" } */
+
+typedef unsigned int u32;
+typedef unsigned __int128 u128;
+typedef unsigned __int128 v32u128 __attribute__ ((vector_size (32)));
+
+u128 __attribute__ ((noinline, noclone))
+foo (u32 u32_0, v32u128 v32u128_0)
+{
+  v32u128_0[0] >>= u32_0;
+  v32u128_0 += (v32u128) {u32_0, 0};
+  return u32_0 + v32u128_0[0] + v32u128_0[1];
+}
+
+int
+main()
+{
+  u128 x = foo (1, (v32u128) {1, 4});
+  if (x != 6)
+    __builtin_abort ();
+  return 0;
+}