diff mbox

[rs6000] Correct vsx_set and vsx_extract patterns for little endian

Message ID 1384974052.8558.8.camel@oc8801110288.ibm.com
State New
Headers show

Commit Message

Bill Schmidt Nov. 20, 2013, 7 p.m. UTC
Hi,

This patch corrects the various vsx_set_* and vsx_extract_* patterns to
work correctly with little endian.  For the most part this requires the
usual "subtract from N-1" modification, where N is the number of
elements.

Extracting element zero for big endian V2DI or V2DF mode is optimized
using the scalar register equivalence.  Since we can similarly optimize
extraction of element one for big endian V2DI or V2DF mode, I added a
variant that does this.  I am not sure how useful this is, and we can
remove it if you like.

The existing testcase gcc.target/powerpc/pr48258-1.c fails when counting
the number of occurrences of xxsldwi.  It expects to see 6, but we
generate 9 of them for LE.  This is because there are three extracts of
element zero of a V4SF in the testcase.  The scalar equivalence allows
us to avoid the xxsldwi in BE but not in LE.  Therefore I've disabled
this test for little endian.

Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
regressions.  Is this ok for trunk?

Thanks,
Bill


gcc:

2013-11-20  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* config/rs6000/vsx.md (vsx_set_<mode>): Adjust for little endian.
	(vsx_extract_<mode>): Likewise.
	(*vsx_extract_<mode>_one_le): New LE variant on
	*vsx_extract_<mode>_zero.
	(vsx_extract_v4sf): Adjust for little endian.


gcc/testsuite:

2013-11-20  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* gcc.target/powerpc/pr48258-1.c: Skip for little endian.

Comments

Bill Schmidt Nov. 20, 2013, 7:16 p.m. UTC | #1
On Wed, 2013-11-20 at 13:00 -0600, Bill Schmidt wrote:
> Extracting element zero for big endian V2DI or V2DF mode is optimized
> using the scalar register equivalence.  Since we can similarly optimize
> extraction of element one for big endian V2DI or V2DF mode, I added a
                                ^
Oops.  Clearly I mean little endian here.

> variant that does this.  I am not sure how useful this is, and we can
> remove it if you like.
David Edelsohn Nov. 20, 2013, 9:27 p.m. UTC | #2
On Wed, Nov 20, 2013 at 2:00 PM, Bill Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> Hi,
>
> This patch corrects the various vsx_set_* and vsx_extract_* patterns to
> work correctly with little endian.  For the most part this requires the
> usual "subtract from N-1" modification, where N is the number of
> elements.
>
> Extracting element zero for big endian V2DI or V2DF mode is optimized
> using the scalar register equivalence.  Since we can similarly optimize
> extraction of element one for big endian V2DI or V2DF mode, I added a
> variant that does this.  I am not sure how useful this is, and we can
> remove it if you like.
>
> The existing testcase gcc.target/powerpc/pr48258-1.c fails when counting
> the number of occurrences of xxsldwi.  It expects to see 6, but we
> generate 9 of them for LE.  This is because there are three extracts of
> element zero of a V4SF in the testcase.  The scalar equivalence allows
> us to avoid the xxsldwi in BE but not in LE.  Therefore I've disabled
> this test for little endian.
>
> Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
> regressions.  Is this ok for trunk?
>
> Thanks,
> Bill
>
>
> gcc:
>
> 2013-11-20  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>         * config/rs6000/vsx.md (vsx_set_<mode>): Adjust for little endian.
>         (vsx_extract_<mode>): Likewise.
>         (*vsx_extract_<mode>_one_le): New LE variant on
>         *vsx_extract_<mode>_zero.
>         (vsx_extract_v4sf): Adjust for little endian.
>
>
> gcc/testsuite:
>
> 2013-11-20  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>         * gcc.target/powerpc/pr48258-1.c: Skip for little endian.

Okay.

And thanks for the optimization to extract element one for LE.

Thanks, David
diff mbox

Patch

Index: gcc/testsuite/gcc.target/powerpc/pr48258-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr48258-1.c	(revision 205053)
+++ gcc/testsuite/gcc.target/powerpc/pr48258-1.c	(working copy)
@@ -1,5 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*le-*-* } { "*" } { "" } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
 /* { dg-options "-O3 -mcpu=power7 -mabi=altivec -ffast-math -fno-unroll-loops" } */
 /* { dg-final { scan-assembler-times "xvaddsp" 3 } } */
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md	(revision 205053)
+++ gcc/config/rs6000/vsx.md	(working copy)
@@ -1497,9 +1497,10 @@ 
 		      UNSPEC_VSX_SET))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  if (INTVAL (operands[3]) == 0)
+  int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
+  if (INTVAL (operands[3]) == idx_first)
     return \"xxpermdi %x0,%x2,%x1,1\";
-  else if (INTVAL (operands[3]) == 1)
+  else if (INTVAL (operands[3]) == 1 - idx_first)
     return \"xxpermdi %x0,%x1,%x2,0\";
   else
     gcc_unreachable ();
@@ -1514,8 +1515,12 @@ 
 			[(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
+  int fldDM;
   gcc_assert (UINTVAL (operands[2]) <= 1);
-  operands[3] = GEN_INT (INTVAL (operands[2]) << 1);
+  fldDM = INTVAL (operands[2]) << 1;
+  if (!BYTES_BIG_ENDIAN)
+    fldDM = 3 - fldDM;
+  operands[3] = GEN_INT (fldDM);
   return \"xxpermdi %x0,%x1,%x1,%3\";
 }
   [(set_attr "type" "vecperm")])
@@ -1535,6 +1540,21 @@ 
 	(const_string "fpload")))
    (set_attr "length" "4")])  
 
+;; Optimize extracting element 1 from memory for little endian
+(define_insn "*vsx_extract_<mode>_one_le"
+  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+	(vec_select:<VS_scalar>
+	 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
+	 (parallel [(const_int 1)])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && !WORDS_BIG_ENDIAN"
+  "lxsd%U1x %x0,%y1"
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(const_string "fpload")))
+   (set_attr "length" "4")])  
+
 ;; Extract a SF element from V4SF
 (define_insn_and_split "vsx_extract_v4sf"
   [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
@@ -1555,7 +1575,7 @@ 
   rtx op2 = operands[2];
   rtx op3 = operands[3];
   rtx tmp;
-  HOST_WIDE_INT ele = INTVAL (op2);
+  HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
 
   if (ele == 0)
     tmp = op1;