diff mbox

[rs6000] Correct programmer access to vperm for little endian

Message ID 1384537519.8213.138.camel@gnopaine
State New
Headers show

Commit Message

Bill Schmidt Nov. 15, 2013, 5:45 p.m. UTC
Hi,

A previous patch of mine was misguided.  It modified the altivec_vperm_*
patterns to use the little endian conversion trick of reversing the
input operands and complementing the permute control vector.

Looking at the Altivec manual, we really can't do this.  These patterns
need to be direct pass-throughs to the vperm instruction, as shown in
Figure 4-95 on page 130 of
http://www.freescale.com/files/32bit/doc/ref_manual/ALTIVECPIM.pdf.
Section 4.2 on page 49 confirms that big-endian byte ordering is to be
used with the Altivec instruction descriptions.

This patch reverts that specific change, cleans up some associated
commentary in another part, and modifies the one test case affected by
the change.  gcc.dg/vmx/3b-15.c performs the argument reversal and
control vector complementing in the source code, as all users will need
to do when porting code containing vec_perm calls to little endian.

Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
regressions.  Is this ok for trunk?

Thanks,
Bill


gcc:

2013-11-15  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* config/rs6000/altivec.md (UNSPEC_VPERM_X, UNSPEC_VPERM_UNS_X):
	Remove.
	(altivec_vperm_<mode>): Revert earlier little endian change.
	(*altivec_vperm_<mode>_internal): Remove.
	(altivec_vperm_<mode>_uns): Revert earlier little endian change.
	(*altivec_vperm_<mode>_uns_internal): Remove.
	* config/rs6000/vector.md (vec_realign_load_<mode>): Revise
	commentary.

gcc/testsuite:

2013-11-15  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* gcc.dg/vmx/3b-15.c: Revise for little endian.

Comments

David Edelsohn Nov. 15, 2013, 6:44 p.m. UTC | #1
On Fri, Nov 15, 2013 at 12:45 PM, Bill Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> Hi,
>
> A previous patch of mine was misguided.  It modified the altivec_vperm_*
> patterns to use the little endian conversion trick of reversing the
> input operands and complementing the permute control vector.
>
> Looking at the Altivec manual, we really can't do this.  These patterns
> need to be direct pass-throughs to the vperm instruction, as shown in
> Figure 4-95 on page 130 of
> http://www.freescale.com/files/32bit/doc/ref_manual/ALTIVECPIM.pdf.
> Section 4.2 on page 49 confirms that big-endian byte ordering is to be
> used with the Altivec instruction descriptions.
>
> This patch reverts that specific change, cleans up some associated
> commentary in another part, and modifies the one test case affected by
> the change.  gcc.dg/vmx/3b-15.c performs the argument reversal and
> control vector complementing in the source code, as all users will need
> to do when porting code containing vec_perm calls to little endian.
>
> Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
> regressions.  Is this ok for trunk?
>
> Thanks,
> Bill
>
>
> gcc:
>
> 2013-11-15  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>         * config/rs6000/altivec.md (UNSPEC_VPERM_X, UNSPEC_VPERM_UNS_X):
>         Remove.
>         (altivec_vperm_<mode>): Revert earlier little endian change.
>         (*altivec_vperm_<mode>_internal): Remove.
>         (altivec_vperm_<mode>_uns): Revert earlier little endian change.
>         (*altivec_vperm_<mode>_uns_internal): Remove.
>         * config/rs6000/vector.md (vec_realign_load_<mode>): Revise
>         commentary.
>
> gcc/testsuite:
>
> 2013-11-15  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>         * gcc.dg/vmx/3b-15.c: Revise for little endian.

Okay.

Thanks, David
diff mbox

Patch

Index: gcc/testsuite/gcc.dg/vmx/3b-15.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/3b-15.c	(revision 204792)
+++ gcc/testsuite/gcc.dg/vmx/3b-15.c	(working copy)
@@ -3,7 +3,11 @@ 
 vector unsigned char
 f (vector unsigned char a, vector unsigned char b, vector unsigned char c)
 {
+#ifdef __BIG_ENDIAN__
   return vec_perm(a,b,c); 
+#else
+  return vec_perm(b,a,c);
+#endif
 }
 
 static void test()
@@ -12,8 +16,13 @@  static void test()
 					    8,9,10,11,12,13,14,15}),
 		     ((vector unsigned char){70,71,72,73,74,75,76,77,
 					    78,79,80,81,82,83,84,85}),
+#ifdef __BIG_ENDIAN__
 		     ((vector unsigned char){0x1,0x14,0x18,0x10,0x16,0x15,0x19,0x1a,
 					    0x1c,0x1c,0x1c,0x12,0x8,0x1d,0x1b,0xe})),
+#else
+                     ((vector unsigned char){0x1e,0xb,0x7,0xf,0x9,0xa,0x6,0x5,
+                                            0x3,0x3,0x3,0xd,0x17,0x2,0x4,0x11})),
+#endif
 		   ((vector unsigned char){1,74,78,70,76,75,79,80,82,82,82,72,8,83,81,14})),
 	"f");
 }
Index: gcc/config/rs6000/vector.md
===================================================================
--- gcc/config/rs6000/vector.md	(revision 204792)
+++ gcc/config/rs6000/vector.md	(working copy)
@@ -966,8 +966,8 @@ 
     	      				 operands[2], operands[3]));
   else
     {
-      /* Avoid the "subtract from splat31" workaround for vperm since
-         we have changed lvsr to lvsl instead.  */
+      /* We have changed lvsr to lvsl, so to complete the transformation
+         of vperm for LE, we must swap the inputs.  */
       rtx unspec = gen_rtx_UNSPEC (<MODE>mode,
                                    gen_rtvec (3, operands[2],
                                               operands[1], operands[3]),
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md	(revision 204792)
+++ gcc/config/rs6000/altivec.md	(working copy)
@@ -59,8 +59,6 @@ 
    UNSPEC_VSUMSWS
    UNSPEC_VPERM
    UNSPEC_VPERM_UNS
-   UNSPEC_VPERM_X
-   UNSPEC_VPERM_UNS_X
    UNSPEC_VRFIN
    UNSPEC_VCFUX
    UNSPEC_VCFSX
@@ -1393,91 +1391,21 @@ 
   "vrfiz %0,%1"
   [(set_attr "type" "vecfloat")])
 
-(define_insn_and_split "altivec_vperm_<mode>"
+(define_insn "altivec_vperm_<mode>"
   [(set (match_operand:VM 0 "register_operand" "=v")
 	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
 		    (match_operand:VM 2 "register_operand" "v")
 		    (match_operand:V16QI 3 "register_operand" "v")]
-		   UNSPEC_VPERM_X))]
-  "TARGET_ALTIVEC"
-  "#"
-  "!reload_in_progress && !reload_completed"
-  [(set (match_dup 0) (match_dup 4))]
-{
-  if (BYTES_BIG_ENDIAN)
-    operands[4] = gen_rtx_UNSPEC (<MODE>mode,
-                                  gen_rtvec (3, operands[1],
- 		                             operands[2], operands[3]),
-                                  UNSPEC_VPERM);
-  else
-    {
-      /* We want to subtract from 31, but we can't vspltisb 31 since
-         it's out of range.  -1 works as well because only the low-order
-         five bits of the permute control vector elements are used.  */
-      rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
-                                         gen_rtx_CONST_INT (QImode, -1));
-      rtx tmp = gen_reg_rtx (V16QImode);
-      emit_move_insn (tmp, splat);
-      rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]);
-      emit_move_insn (tmp, sel);
-      operands[4] = gen_rtx_UNSPEC (<MODE>mode,
-                                    gen_rtvec (3, operands[2],
-		 		               operands[1], tmp),
-		                    UNSPEC_VPERM);
-    }
-}
-  [(set_attr "type" "vecperm")])
-
-(define_insn "*altivec_vperm_<mode>_internal"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
-		    (match_operand:VM 2 "register_operand" "v")
-		    (match_operand:V16QI 3 "register_operand" "+v")]
 		   UNSPEC_VPERM))]
   "TARGET_ALTIVEC"
   "vperm %0,%1,%2,%3"
   [(set_attr "type" "vecperm")])
 
-(define_insn_and_split "altivec_vperm_<mode>_uns"
+(define_insn "altivec_vperm_<mode>_uns"
   [(set (match_operand:VM 0 "register_operand" "=v")
 	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
 		    (match_operand:VM 2 "register_operand" "v")
 		    (match_operand:V16QI 3 "register_operand" "v")]
-		   UNSPEC_VPERM_UNS_X))]
-  "TARGET_ALTIVEC"
-  "#"
-  "!reload_in_progress && !reload_completed"
-  [(set (match_dup 0) (match_dup 4))]
-{
-  if (BYTES_BIG_ENDIAN)
-    operands[4] = gen_rtx_UNSPEC (<MODE>mode,
-                                  gen_rtvec (3, operands[1],
-				             operands[2], operands[3]),
-                                  UNSPEC_VPERM_UNS);
-  else
-    {
-      /* We want to subtract from 31, but we can't vspltisb 31 since
-         it's out of range.  -1 works as well because only the low-order
-         five bits of the permute control vector elements are used.  */
-      rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
-                                         gen_rtx_CONST_INT (QImode, -1));
-      rtx tmp = gen_reg_rtx (V16QImode);
-      emit_move_insn (tmp, splat);
-      rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]);
-      emit_move_insn (tmp, sel);
-      operands[4] = gen_rtx_UNSPEC (<MODE>mode,
-                                    gen_rtvec (3, operands[2],
-				               operands[1], tmp),
-		                    UNSPEC_VPERM_UNS);
-    }
-}
-  [(set_attr "type" "vecperm")])
-
-(define_insn "*altivec_vperm_<mode>_uns_internal"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
-		    (match_operand:VM 2 "register_operand" "v")
-		    (match_operand:V16QI 3 "register_operand" "+v")]
 		   UNSPEC_VPERM_UNS))]
   "TARGET_ALTIVEC"
   "vperm %0,%1,%2,%3"