diff mbox

[rs6000] Fix vec_shr define_expand

Message ID 55DC7837.1020404@linux.vnet.ibm.com
State New
Headers show

Commit Message

Pat Haugen Aug. 25, 2015, 2:14 p.m. UTC
The following patch fixes the vec_shr expander to do a shift instead of 
a rotate. CPU2006 benchmark 482.sphinx3 recently started failing due to 
this issue.  Bootstrapped and tested on ppc64/ppc64le with no new 
regressions. Ok for trunk? And ok for 4.9/5 (with equivalent change to 
vec_shl expander which exists in those releases) after bootstrap/regtest?

-Pat


2015-08-25  Pat Haugen  <pthaugen@us.ibm.com>

         * config/rs6000/vector.md (vec_shr_<mode>): Fix to do a shift
         instead of a rotate.


gcc/testsuite:
         * gcc.target/powerpc/vec-shr.c: New.

Comments

David Edelsohn Aug. 25, 2015, 3:04 p.m. UTC | #1
On Tue, Aug 25, 2015 at 10:14 AM, Pat Haugen
<pthaugen@linux.vnet.ibm.com> wrote:
> The following patch fixes the vec_shr expander to do a shift instead of a
> rotate. CPU2006 benchmark 482.sphinx3 recently started failing due to this
> issue.  Bootstrapped and tested on ppc64/ppc64le with no new regressions. Ok
> for trunk? And ok for 4.9/5 (with equivalent change to vec_shl expander
> which exists in those releases) after bootstrap/regtest?
>
> -Pat
>
>
> 2015-08-25  Pat Haugen  <pthaugen@us.ibm.com>
>
>         * config/rs6000/vector.md (vec_shr_<mode>): Fix to do a shift
>         instead of a rotate.
>
> gcc/testsuite:
>         * gcc.target/powerpc/vec-shr.c: New.

This is okay.

As Peter and I noticed

+  zero_reg = gen_reg_rtx(<MODE>mode);

This needs a space after gen_rtx_rtx.

Thanks, David
diff mbox

Patch

Index: gcc/config/rs6000/vector.md
===================================================================
--- gcc/config/rs6000/vector.md	(revision 227041)
+++ gcc/config/rs6000/vector.md	(working copy)
@@ -977,6 +977,8 @@  (define_expand "movmisalign<mode>"
 ;; General shift amounts can be supported using vsro + vsr. We're
 ;; not expecting to see these yet (the vectorizer currently
 ;; generates only shifts by a whole number of vector elements).
+;; Note that the vec_shr operation is actually defined as 
+;; 'shift toward element 0' so is a shr for LE and shl for BE.
 (define_expand "vec_shr_<mode>"
   [(match_operand:VEC_L 0 "vlogical_operand" "")
    (match_operand:VEC_L 1 "vlogical_operand" "")
@@ -987,6 +989,7 @@  (define_expand "vec_shr_<mode>"
   rtx bitshift = operands[2];
   rtx shift;
   rtx insn;
+  rtx zero_reg, op1, op2;
   HOST_WIDE_INT bitshift_val;
   HOST_WIDE_INT byteshift_val;
 
@@ -996,19 +999,29 @@  (define_expand "vec_shr_<mode>"
   if (bitshift_val & 0x7)
     FAIL;
   byteshift_val = (bitshift_val >> 3);
+  zero_reg = gen_reg_rtx(<MODE>mode);
+  emit_move_insn (zero_reg, CONST0_RTX (<MODE>mode));
   if (!BYTES_BIG_ENDIAN)
-    byteshift_val = 16 - byteshift_val;
+    {
+      byteshift_val = 16 - byteshift_val;
+      op1 = zero_reg;
+      op2 = operands[1];
+    }
+  else
+    {
+      op1 = operands[1];
+      op2 = zero_reg;
+    }
+
   if (TARGET_VSX && (byteshift_val & 0x3) == 0)
     {
       shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2);
-      insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1],
-				     shift);
+      insn = gen_vsx_xxsldwi_<mode> (operands[0], op1, op2, shift);
     }
   else
     {
       shift = gen_rtx_CONST_INT (QImode, byteshift_val);
-      insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
-					shift);
+      insn = gen_altivec_vsldoi_<mode> (operands[0], op1, op2, shift);
     }
 
   emit_insn (insn);
Index: gcc/testsuite/gcc.target/powerpc/vec-shr.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vec-shr.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vec-shr.c	(working copy)
@@ -0,0 +1,34 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#include <stdlib.h>
+
+typedef struct { double r, i; } complex;
+#define LEN 30
+complex c[LEN];
+double d[LEN];
+
+void
+foo (complex *c, double *d, int len1)
+{
+  int i;
+  for (i = 0; i < len1; i++)
+    {
+      c[i].r = d[i];
+      c[i].i = 0.0;
+    }
+}
+
+int
+main (void)
+{
+  int i;
+  for (i = 0; i < LEN; i++)
+    d[i] = (double) i;
+  foo (c, d, LEN);
+  for (i=0;i<LEN;i++)
+    if ((c[i].r != (double) i) || (c[i].i != 0.0))
+      abort ();
+  return 0;
+}
+