diff mbox

Fix REE for vector modes (PR rtl-optimization/64286, take 2)

Message ID 20150113161136.GC1405@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Jan. 13, 2015, 4:11 p.m. UTC
On Mon, Jan 12, 2015 at 02:29:53PM -0700, Jeff Law wrote:
> On 01/12/15 12:59, Jakub Jelinek wrote:
> >Hi!
> >
> >As mentioned in the PR, giving up for all vector mode extensions
> >is unnecessary, but unlike scalar integer extensions, where the low part
> >of the extended value is the original value, for vectors this is not true,
> >thus the old value is lost.  Which means we can perform REE, but only if
> >all uses of the definition are the same (code+mode) extension.
> >
> >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> >
> >2015-01-12  Jakub Jelinek  <jakub@redhat.com>
> >
> >	PR rtl-optimization/64286
> >	* ree.c (add_removable_extension): Don't add vector mode
> >	extensions if all uses of the source register aren't the same
> >	vector extensions.
> >
> >	* gcc.target/i386/avx2-pr64286.c: New test.
> Does it make sense to remove your change for 59754 in combine_reaching_defs?
> Shouldn't this patch handle that case as well?

You're right, this patch handles that too.

New patch, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2015-01-13  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/64286
	* ree.c (combine_reaching_defs): Move part of comment earlier,
	remove !SCALAR_INT_MODE_P check.
	(add_removable_extension): Don't add vector mode
	extensions if all uses of the source register aren't the same
	vector extensions.

	* gcc.target/i386/avx2-pr64286.c: New test.



	Jakub

Comments

Jeff Law Jan. 13, 2015, 7:10 p.m. UTC | #1
On 01/13/15 09:11, Jakub Jelinek wrote:
> On Mon, Jan 12, 2015 at 02:29:53PM -0700, Jeff Law wrote:
>> On 01/12/15 12:59, Jakub Jelinek wrote:
>>> Hi!
>>>
>>> As mentioned in the PR, giving up for all vector mode extensions
>>> is unnecessary, but unlike scalar integer extensions, where the low part
>>> of the extended value is the original value, for vectors this is not true,
>>> thus the old value is lost.  Which means we can perform REE, but only if
>>> all uses of the definition are the same (code+mode) extension.
>>>
>>> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>>>
>>> 2015-01-12  Jakub Jelinek  <jakub@redhat.com>
>>>
>>> 	PR rtl-optimization/64286
>>> 	* ree.c (add_removable_extension): Don't add vector mode
>>> 	extensions if all uses of the source register aren't the same
>>> 	vector extensions.
>>>
>>> 	* gcc.target/i386/avx2-pr64286.c: New test.
>> Does it make sense to remove your change for 59754 in combine_reaching_defs?
>> Shouldn't this patch handle that case as well?
>
> You're right, this patch handles that too.
>
> New patch, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk?
>
> 2015-01-13  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR rtl-optimization/64286
> 	* ree.c (combine_reaching_defs): Move part of comment earlier,
> 	remove !SCALAR_INT_MODE_P check.
> 	(add_removable_extension): Don't add vector mode
> 	extensions if all uses of the source register aren't the same
> 	vector extensions.
>
> 	* gcc.target/i386/avx2-pr64286.c: New test.
OK.  Thanks for taking care of this.  I can't seem to find time for 
doing any real debugging or bugfixing.

jeff
diff mbox

Patch

--- gcc/ree.c.jj	2015-01-12 21:29:07.023060045 +0100
+++ gcc/ree.c	2015-01-13 09:43:32.158449885 +0100
@@ -783,6 +783,17 @@  combine_reaching_defs (ext_cand *cand, c
        != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn)))));
   if (copy_needed)
     {
+      /* Considering transformation of
+	 (set (reg1) (expression))
+	 ...
+	 (set (reg2) (any_extend (reg1)))
+
+	 into
+
+	 (set (reg2) (any_extend (expression)))
+	 (set (reg1) (reg2))
+	 ...  */
+
       /* In theory we could handle more than one reaching def, it
 	 just makes the code to update the insn stream more complex.  */
       if (state->defs_list.length () != 1)
@@ -798,18 +809,6 @@  combine_reaching_defs (ext_cand *cand, c
       if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
 	return false;
 
-      /* Transformation of
-	 (set (reg1) (expression))
-	 (set (reg2) (any_extend (reg1)))
-	 into
-	 (set (reg2) (any_extend (expression)))
-	 (set (reg1) (reg2))
-	 is only valid for scalar integral modes, as it relies on the low
-	 subreg of reg1 to have the value of (expression), which is not true
-	 e.g. for vector modes.  */
-      if (!SCALAR_INT_MODE_P (GET_MODE (SET_DEST (PATTERN (cand->insn)))))
-	return false;
-
       machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn)));
       rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn)));
 
@@ -1027,6 +1026,7 @@  add_removable_extension (const_rtx expr,
 	 different extension.  FIXME: this obviously can be improved.  */
       for (def = defs; def; def = def->next)
 	if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))])
+	    && idx != -1U
 	    && (cand = &(*insn_list)[idx - 1])
 	    && cand->code != code)
 	  {
@@ -1038,6 +1038,57 @@  add_removable_extension (const_rtx expr,
 	      }
 	    return;
 	  }
+	/* For vector mode extensions, ensure that all uses of the
+	   XEXP (src, 0) register are the same extension (both code
+	   and to which mode), as unlike integral extensions lowpart
+	   subreg of the sign/zero extended register are not equal
+	   to the original register, so we have to change all uses or
+	   none.  */
+	else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0))))
+	  {
+	    if (idx == 0)
+	      {
+		struct df_link *ref_chain, *ref_link;
+
+		ref_chain = DF_REF_CHAIN (def->ref);
+		for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+		  {
+		    if (ref_link->ref == NULL
+			|| DF_REF_INSN_INFO (ref_link->ref) == NULL)
+		      {
+			idx = -1U;
+			break;
+		      }
+		    rtx_insn *use_insn = DF_REF_INSN (ref_link->ref);
+		    const_rtx use_set;
+		    if (use_insn == insn || DEBUG_INSN_P (use_insn))
+		      continue;
+		    if (!(use_set = single_set (use_insn))
+			|| !REG_P (SET_DEST (use_set))
+			|| GET_MODE (SET_DEST (use_set)) != GET_MODE (dest)
+			|| GET_CODE (SET_SRC (use_set)) != code
+			|| !rtx_equal_p (XEXP (SET_SRC (use_set), 0),
+					 XEXP (src, 0)))
+		      {
+			idx = -1U;
+			break;
+		      }
+		  }
+		if (idx == -1U)
+		  def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
+	      }
+	    if (idx == -1U)
+	      {
+		if (dump_file)
+		  {
+		    fprintf (dump_file, "Cannot eliminate extension:\n");
+		    print_rtl_single (dump_file, insn);
+		    fprintf (dump_file,
+			     " because some vector uses aren't extension\n");
+		  }
+		return;
+	      }
+	  }
 
       /* Then add the candidate to the list and insert the reaching definitions
          into the definition map.  */
--- gcc/testsuite/gcc.target/i386/avx2-pr64286.c.jj	2015-01-13 09:33:53.986612033 +0100
+++ gcc/testsuite/gcc.target/i386/avx2-pr64286.c	2015-01-13 09:33:53.986612033 +0100
@@ -0,0 +1,37 @@ 
+/* PR rtl-optimization/64286 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-require-effective-target avx2 } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <x86intrin.h>
+#include "avx2-check.h"
+
+__m128i v;
+__m256i w;
+
+__attribute__((noinline, noclone)) void
+foo (__m128i *p, __m128i *q)
+{
+  __m128i a = _mm_loadu_si128 (p);
+  __m128i b = _mm_xor_si128 (a, v);
+  w = _mm256_cvtepu8_epi16 (a);
+  *q = b;
+}
+
+static void
+avx2_test (void)
+{
+  v = _mm_set1_epi8 (0x40);
+  __m128i c = _mm_set_epi8 (16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
+  __m128i d;
+  foo (&c, &d);
+  __m128i e = _mm_set_epi8 (0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49,
+			    0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41);
+  __m256i f = _mm256_set_epi16 (16, 15, 14, 13, 12, 11, 10, 9,
+				8, 7, 6, 5, 4, 3, 2, 1);
+  if (memcmp (&w, &f, sizeof (w)) != 0
+      || memcmp (&d, &e, sizeof (d)) != 0)
+    abort ();
+}