diff mbox

Fix regcprop noop move handling (PR rtl-optimization/69896)

Message ID 20160222212606.GS3017@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Feb. 22, 2016, 9:26 p.m. UTC
Hi!

The following testcase is miscompiled, because prepare_shrink_wrap
attempts to copyprop_hardreg_forward_1 the first bb.  We see
DImode rbx being copied to DImode r11, and then we have (dead since
postreload) an assignment of SImode r11d to SImode ebx, and later on
some uses of DImode r11.  copyprop_hardreg_forward_1 notes the oldest
reg is rbx, and replaces in the SImode move r11d with ebx (that is fine),
and later on the DImode uses of r11 with rbx.  The problem is that
we now have a DImode rbx setter, then SImode noop move of ebx to ebx
(which by definition means the upper bits are undefined), and then DImode
rbx use.  If the noop move is DCEd soon enough, that wouldn't be a problem,
but before that happens, regrename is performed and we get the last
use of DImode rbx replaced with rcx and the ebx, ebx noop move changed int
ecx = ebx SImode move.  That of course doesn't work.

The problem is that copyprop_hardreg_forward_1 ignores noop_p moves (most
likely in the hope that they will be soon optimized away).  That is fine
if they use as wide mode as we have recorded for the register, but if it is
narrower, we can choose to either remove the noop move, or adjust the
regcprop data structure.  The patch below chooses to do both of these,
the first one if DCE would remove the noop move, the latter otherwise.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-02-22  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/69896
	* regcprop.c: Include cfgrtl.h.
	(copyprop_hardreg_forward_1): If noop_p insn uses narrower
	than remembered mode, either delete it (if noop_move_p), or
	treat like copy_p but not noop_p instruction.

	* gcc.dg/pr69896.c: New test.


	Jakub

Comments

Jeff Law Feb. 25, 2016, 6:29 a.m. UTC | #1
On 02/22/2016 02:26 PM, Jakub Jelinek wrote:
> Hi!
>
> The following testcase is miscompiled, because prepare_shrink_wrap
> attempts to copyprop_hardreg_forward_1 the first bb.  We see
> DImode rbx being copied to DImode r11, and then we have (dead since
> postreload) an assignment of SImode r11d to SImode ebx, and later on
> some uses of DImode r11.  copyprop_hardreg_forward_1 notes the oldest
> reg is rbx, and replaces in the SImode move r11d with ebx (that is fine),
> and later on the DImode uses of r11 with rbx.  The problem is that
> we now have a DImode rbx setter, then SImode noop move of ebx to ebx
> (which by definition means the upper bits are undefined), and then DImode
> rbx use.  If the noop move is DCEd soon enough, that wouldn't be a problem,
> but before that happens, regrename is performed and we get the last
> use of DImode rbx replaced with rcx and the ebx, ebx noop move changed int
> ecx = ebx SImode move.  That of course doesn't work.
>
> The problem is that copyprop_hardreg_forward_1 ignores noop_p moves (most
> likely in the hope that they will be soon optimized away).  That is fine
> if they use as wide mode as we have recorded for the register, but if it is
> narrower, we can choose to either remove the noop move, or adjust the
> regcprop data structure.  The patch below chooses to do both of these,
> the first one if DCE would remove the noop move, the latter otherwise.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2016-02-22  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR rtl-optimization/69896
> 	* regcprop.c: Include cfgrtl.h.
> 	(copyprop_hardreg_forward_1): If noop_p insn uses narrower
> 	than remembered mode, either delete it (if noop_move_p), or
> 	treat like copy_p but not noop_p instruction.
>
> 	* gcc.dg/pr69896.c: New test.
One could argue we should have DCE'd the nop move before we get here, 
but even if we did, making regcprop safe WRT narrower nop-moves is the 
right thing to do IMHO.

OK.

Jeff
diff mbox

Patch

--- gcc/regcprop.c.jj	2016-01-04 14:55:53.000000000 +0100
+++ gcc/regcprop.c	2016-02-22 16:47:49.587425032 +0100
@@ -32,6 +32,7 @@ 
 #include "addresses.h"
 #include "tree-pass.h"
 #include "rtl-iter.h"
+#include "cfgrtl.h"
 
 /* The following code does forward propagation of hard register copies.
    The object is to eliminate as many dependencies as possible, so that
@@ -739,9 +740,9 @@  static bool
 copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd)
 {
   bool anything_changed = false;
-  rtx_insn *insn;
+  rtx_insn *insn, *next;
 
-  for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn))
+  for (insn = BB_HEAD (bb); ; insn = next)
     {
       int n_ops, i, predicated;
       bool is_asm, any_replacements;
@@ -751,6 +752,7 @@  copyprop_hardreg_forward_1 (basic_block
       bool changed = false;
       struct kill_set_value_data ksvd;
 
+      next = NEXT_INSN (insn);
       if (!NONDEBUG_INSN_P (insn))
 	{
 	  if (DEBUG_INSN_P (insn))
@@ -1042,6 +1044,23 @@  copyprop_hardreg_forward_1 (basic_block
       bool noop_p = (copy_p
 		     && rtx_equal_p (SET_DEST (set), SET_SRC (set)));
 
+      /* If a noop move is using narrower mode than we have recorded,
+	 we need to either remove the noop move, or kill_set_value.  */
+      if (noop_p
+	  && (GET_MODE_BITSIZE (GET_MODE (SET_DEST (set)))
+	      < GET_MODE_BITSIZE (vd->e[REGNO (SET_DEST (set))].mode)))
+	{
+	  if (noop_move_p (insn))
+	    {
+	      bool last = insn == BB_END (bb);
+	      delete_insn (insn);
+	      if (last)
+		break;
+	    }
+	  else
+	    noop_p = false;
+	}
+
       if (!noop_p)
 	{
 	  /* Notice stores.  */
--- gcc/testsuite/gcc.dg/pr69896.c.jj	2016-02-22 17:02:53.219068093 +0100
+++ gcc/testsuite/gcc.dg/pr69896.c	2016-02-22 16:59:43.000000000 +0100
@@ -0,0 +1,33 @@ 
+/* PR rtl-optimization/69896 */
+/* { dg-do run { target int128 } } */
+/* { dg-options "-w -O -fcaller-saves -fno-dse -frename-registers -fno-tree-ter" } */
+/* { dg-additional-options "-mno-sse" { target x86_64-*-* i?86-*-* } } */
+
+typedef unsigned short A;
+typedef unsigned short B __attribute__ ((vector_size (32)));
+typedef unsigned int C;
+typedef unsigned int D __attribute__ ((vector_size (32)));
+typedef unsigned long long E;
+typedef unsigned long long F __attribute__ ((vector_size (32)));
+typedef unsigned __int128 G;
+typedef unsigned __int128 H __attribute__ ((vector_size (32)));
+
+G __attribute__ ((noinline, noclone))
+foo (A a, C b, E c, G d, A e, C f, E g, G h, B i, D j, F k, H l, B m, D n, F o, H p)
+{
+  j /= (D) { -c, -c, ~h, 1, ~l[0], -m[0], p[0], 1} | 1;
+  l %= (H) o | 1;
+  l[1] = (l[1] << (e & 127)) | (l[1] >> (e & 127));
+  return j[6] + l[0] + l[1] + n[7] + o[0] + o[2] + o[3] + p[0] + p[1];
+}
+
+int
+main ()
+{
+  if (__CHAR_BIT__ != 8 || sizeof (A) != 2 || sizeof (C) != 4 || sizeof (E) != 8 || sizeof (G) != 16)
+    return 0;
+  G x = foo (0, 1, 2, 3, 4, 5, 6, 7, (B) {}, (D) {}, (F) {}, (H) {}, (B) {}, (D) {}, (F) {}, (H) { 0xffffffffffffffffULL, 0x74a3e4aULL });
+  if ((E) x != 0x00000000074a3e49ULL || (E) (x >> 64) != 1)
+    __builtin_abort ();
+  return 0;
+}