diff mbox series

Fix x86-64 zero_extended_scalar_load_operand caused wrong-code (PR target/82703)

Message ID 20171026214856.GX14653@tucnak
State New
Headers show
Series Fix x86-64 zero_extended_scalar_load_operand caused wrong-code (PR target/82703) | expand

Commit Message

Jakub Jelinek Oct. 26, 2017, 9:48 p.m. UTC
Hi!

The following testcase is miscompiled, because due to STV we end up
with a load from V2DFmode MEM for which get_pool_entry returns
a V1TImode vector.  maybe_get_pool_constant doesn't have code
to handle mode differences between what get_pool_constant returns
and the requested mode.  While it wouldn't be that hard to add,
this all is done properly in avoid_constant_pool_reference already,
which does everything maybe_get_pool_constant needs to do.

So, this patch just removes the maybe_get_pool_constant function and
uses avoid_constant_pool_reference instead.  I've looked into history
and the latter has been introduced in 2001, the former in 2002, but at
that point the latter didn't do any delegitimization (but has the
mode conversions at that point), while the former had hand-written
delegitimization.  But since then delegitimization has been added to
a_c_p_r and custom delegitimization changed into the full blown one in
m_g_p_c.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/7.x?

2017-10-26  Jakub Jelinek  <jakub@redhat.com>

	PR target/82703
	* config/i386/i386-protos.h (maybe_get_pool_constant): Removed.
	* config/i386/i386.c (maybe_get_pool_constant): Removed.
	(ix86_split_to_parts): Use avoid_constant_pool_reference instead of
	maybe_get_pool_constant.
	* config/i386/predicates.md (zero_extended_scalar_load_operand):
	Likewise.

	* gcc.dg/pr82703.c: New test.


	Jakub

Comments

Uros Bizjak Oct. 27, 2017, 8 a.m. UTC | #1
On Thu, Oct 26, 2017 at 11:48 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> The following testcase is miscompiled, because due to STV we end up
> with a load from V2DFmode MEM for which get_pool_entry returns
> a V1TImode vector.  maybe_get_pool_constant doesn't have code
> to handle mode differences between what get_pool_constant returns
> and the requested mode.  While it wouldn't be that hard to add,
> this all is done properly in avoid_constant_pool_reference already,
> which does everything maybe_get_pool_constant needs to do.
>
> So, this patch just removes the maybe_get_pool_constant function and
> uses avoid_constant_pool_reference instead.  I've looked into history
> and the latter has been introduced in 2001, the former in 2002, but at
> that point the latter didn't do any delegitimization (but has the
> mode conversions at that point), while the former had hand-written
> delegitimization.  But since then delegitimization has been added to
> a_c_p_r and custom delegitimization changed into the full blown one in
> m_g_p_c.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/7.x?
>
> 2017-10-26  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/82703
>         * config/i386/i386-protos.h (maybe_get_pool_constant): Removed.
>         * config/i386/i386.c (maybe_get_pool_constant): Removed.
>         (ix86_split_to_parts): Use avoid_constant_pool_reference instead of
>         maybe_get_pool_constant.
>         * config/i386/predicates.md (zero_extended_scalar_load_operand):
>         Likewise.
>
>         * gcc.dg/pr82703.c: New test.

OK.

Thanks,
Uros.

> --- gcc/config/i386/i386-protos.h.jj    2017-04-20 12:19:54.000000000 +0200
> +++ gcc/config/i386/i386-protos.h       2017-10-26 13:14:05.172571175 +0200
> @@ -282,8 +282,6 @@ extern bool i386_pe_type_dllexport_p (tr
>
>  extern int i386_pe_reloc_rw_mask (void);
>
> -extern rtx maybe_get_pool_constant (rtx);
> -
>  extern char internal_label_prefix[16];
>  extern int internal_label_prefix_len;
>
> --- gcc/config/i386/i386.c.jj   2017-09-30 10:26:43.000000000 +0200
> +++ gcc/config/i386/i386.c      2017-10-26 13:15:03.917891804 +0200
> @@ -19830,20 +19830,6 @@ ix86_expand_clear (rtx dest)
>    emit_insn (tmp);
>  }
>
> -/* X is an unchanging MEM.  If it is a constant pool reference, return
> -   the constant pool rtx, else NULL.  */
> -
> -rtx
> -maybe_get_pool_constant (rtx x)
> -{
> -  x = ix86_delegitimize_address (XEXP (x, 0));
> -
> -  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
> -    return get_pool_constant (x);
> -
> -  return NULL_RTX;
> -}
> -
>  void
>  ix86_expand_move (machine_mode mode, rtx operands[])
>  {
> @@ -25371,11 +25357,7 @@ ix86_split_to_parts (rtx operand, rtx *p
>    /* Optimize constant pool reference to immediates.  This is used by fp
>       moves, that force all constants to memory to allow combining.  */
>    if (MEM_P (operand) && MEM_READONLY_P (operand))
> -    {
> -      rtx tmp = maybe_get_pool_constant (operand);
> -      if (tmp)
> -       operand = tmp;
> -    }
> +    operand = avoid_constant_pool_reference (operand);
>
>    if (MEM_P (operand) && !offsettable_memref_p (operand))
>      {
> --- gcc/config/i386/predicates.md.jj    2017-04-20 12:19:54.000000000 +0200
> +++ gcc/config/i386/predicates.md       2017-10-26 13:16:27.399926361 +0200
> @@ -975,9 +975,9 @@ (define_predicate "zero_extended_scalar_
>    (match_code "mem")
>  {
>    unsigned n_elts;
> -  op = maybe_get_pool_constant (op);
> +  op = avoid_constant_pool_reference (op);
>
> -  if (!(op && GET_CODE (op) == CONST_VECTOR))
> +  if (GET_CODE (op) != CONST_VECTOR)
>      return false;
>
>    n_elts = CONST_VECTOR_NUNITS (op);
> --- gcc/testsuite/gcc.dg/pr82703.c.jj   2017-10-26 13:19:23.879885830 +0200
> +++ gcc/testsuite/gcc.dg/pr82703.c      2017-10-26 13:18:42.000000000 +0200
> @@ -0,0 +1,28 @@
> +/* PR target/82703 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fno-tree-sra -ftree-vectorize" } */
> +
> +__attribute__((noinline, noclone)) void
> +compare (const double *p, const double *q)
> +{
> +  for (int i = 0; i < 3; ++i)
> +    if (p[i] != q[i])
> +      __builtin_abort ();
> +}
> +
> +double vr[3] = { 4, 4, 4 };
> +
> +int
> +main ()
> +{
> +  double v1[3] = { 1, 2, 3 };
> +  double v2[3] = { 3, 2, 1 };
> +  double v3[3];
> +  __builtin_memcpy (v3, v1, sizeof (v1));
> +  for (int i = 0; i < 3; ++i)
> +    v3[i] += v2[i];
> +  for (int i = 0; i < 3; ++i)
> +    v1[i] += v2[i];
> +  compare (v3, vr);
> +  return 0;
> +}
>
>         Jakub
diff mbox series

Patch

--- gcc/config/i386/i386-protos.h.jj	2017-04-20 12:19:54.000000000 +0200
+++ gcc/config/i386/i386-protos.h	2017-10-26 13:14:05.172571175 +0200
@@ -282,8 +282,6 @@  extern bool i386_pe_type_dllexport_p (tr
 
 extern int i386_pe_reloc_rw_mask (void);
 
-extern rtx maybe_get_pool_constant (rtx);
-
 extern char internal_label_prefix[16];
 extern int internal_label_prefix_len;
 
--- gcc/config/i386/i386.c.jj	2017-09-30 10:26:43.000000000 +0200
+++ gcc/config/i386/i386.c	2017-10-26 13:15:03.917891804 +0200
@@ -19830,20 +19830,6 @@  ix86_expand_clear (rtx dest)
   emit_insn (tmp);
 }
 
-/* X is an unchanging MEM.  If it is a constant pool reference, return
-   the constant pool rtx, else NULL.  */
-
-rtx
-maybe_get_pool_constant (rtx x)
-{
-  x = ix86_delegitimize_address (XEXP (x, 0));
-
-  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
-    return get_pool_constant (x);
-
-  return NULL_RTX;
-}
-
 void
 ix86_expand_move (machine_mode mode, rtx operands[])
 {
@@ -25371,11 +25357,7 @@  ix86_split_to_parts (rtx operand, rtx *p
   /* Optimize constant pool reference to immediates.  This is used by fp
      moves, that force all constants to memory to allow combining.  */
   if (MEM_P (operand) && MEM_READONLY_P (operand))
-    {
-      rtx tmp = maybe_get_pool_constant (operand);
-      if (tmp)
-	operand = tmp;
-    }
+    operand = avoid_constant_pool_reference (operand);
 
   if (MEM_P (operand) && !offsettable_memref_p (operand))
     {
--- gcc/config/i386/predicates.md.jj	2017-04-20 12:19:54.000000000 +0200
+++ gcc/config/i386/predicates.md	2017-10-26 13:16:27.399926361 +0200
@@ -975,9 +975,9 @@  (define_predicate "zero_extended_scalar_
   (match_code "mem")
 {
   unsigned n_elts;
-  op = maybe_get_pool_constant (op);
+  op = avoid_constant_pool_reference (op);
 
-  if (!(op && GET_CODE (op) == CONST_VECTOR))
+  if (GET_CODE (op) != CONST_VECTOR)
     return false;
 
   n_elts = CONST_VECTOR_NUNITS (op);
--- gcc/testsuite/gcc.dg/pr82703.c.jj	2017-10-26 13:19:23.879885830 +0200
+++ gcc/testsuite/gcc.dg/pr82703.c	2017-10-26 13:18:42.000000000 +0200
@@ -0,0 +1,28 @@ 
+/* PR target/82703 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-tree-sra -ftree-vectorize" } */
+
+__attribute__((noinline, noclone)) void
+compare (const double *p, const double *q)
+{
+  for (int i = 0; i < 3; ++i)
+    if (p[i] != q[i])
+      __builtin_abort ();
+}
+
+double vr[3] = { 4, 4, 4 };
+
+int
+main ()
+{
+  double v1[3] = { 1, 2, 3 };
+  double v2[3] = { 3, 2, 1 };
+  double v3[3];
+  __builtin_memcpy (v3, v1, sizeof (v1));
+  for (int i = 0; i < 3; ++i)
+    v3[i] += v2[i];
+  for (int i = 0; i < 3; ++i)
+    v1[i] += v2[i];
+  compare (v3, vr);
+  return 0;
+}