diff mbox

[rs6000] Add support for xxpermr and vpermr instructions

Message ID 56EAFE3D.1040802@linux.vnet.ibm.com
State New
Headers show

Commit Message

Kelvin Nilsen March 17, 2016, 6:58 p.m. UTC
This patch adds support for two new Power9 instructions, xxpermr and 
vpermr, providing more efficient vector permutation operations on
little-endian configurations. These new instructions are described in
the Power ISA 3.0 document.  Selection of the new instructions is
conditioned upon TARGET_P9_VECTOR and !VECTOR_ELT_ORDER_BIG.

The patch has bootstrapped and tested on powerpc64le-unknown-linux-gnu
and powerpc64-unknown-linux-gnu with no regressions.  Is this ok for GCC 
7 when stage 1 opens?

(A previous version of this patch was distributed and approved, but 
further experience with testing of P9 fusion instructions revealed a 
problem with that particular code expansion.  So this new revision of 
the patch omits the fusion instruction generation pattern.)

Thanks.

gcc/testsuite/ChangeLog:

2016-03-17  Kelvin Nilsen  <kelvin@gcc.gnu.org>

	* gcc.target/powerpc/p9-permute.c: Generalize test to run on
         big-endian Power9 in addition to little-endian Power9.
	* gcc.target/powerpc/p9-vpermr.c: New test.


gcc/ChangeLog:

2016-03-17  Kelvin Nilsen  <kelvin@gcc.gnu.org>

	* config/rs6000/altivec.md: (UNSPEC_VPERMR): New unspec
         constant.
         (*altivecvpermr_<mode>_internal): New insn.
	* config/rs6000/rs6000.c (rs6000_expand_vector_set): If
         !BYTES_BIG_ENDIAN and TARGET_P9_VECTOR, expand using template
         that translates into new xxpermr or vpermr instructions.
	(altivec_expand_vec_perm_le): if TARGET_P9_VECTOR, expand using
         template that translates into new xxpermr or vpermr
         instructions.

Comments

David Edelsohn March 18, 2016, 1:07 a.m. UTC | #1
On Thu, Mar 17, 2016 at 2:58 PM, Kelvin Nilsen
<kdnilsen@linux.vnet.ibm.com> wrote:
>
> This patch adds support for two new Power9 instructions, xxpermr and vpermr,
> providing more efficient vector permutation operations on
> little-endian configurations. These new instructions are described in
> the Power ISA 3.0 document.  Selection of the new instructions is
> conditioned upon TARGET_P9_VECTOR and !VECTOR_ELT_ORDER_BIG.
>
> The patch has bootstrapped and tested on powerpc64le-unknown-linux-gnu
> and powerpc64-unknown-linux-gnu with no regressions.  Is this ok for GCC 7
> when stage 1 opens?
>
> (A previous version of this patch was distributed and approved, but further
> experience with testing of P9 fusion instructions revealed a problem with
> that particular code expansion.  So this new revision of the patch omits the
> fusion instruction generation pattern.)
>
> Thanks.
>
> gcc/testsuite/ChangeLog:
>
> 2016-03-17  Kelvin Nilsen  <kelvin@gcc.gnu.org>
>
>         * gcc.target/powerpc/p9-permute.c: Generalize test to run on
>         big-endian Power9 in addition to little-endian Power9.
>         * gcc.target/powerpc/p9-vpermr.c: New test.
>
>
> gcc/ChangeLog:
>
> 2016-03-17  Kelvin Nilsen  <kelvin@gcc.gnu.org>
>
>         * config/rs6000/altivec.md: (UNSPEC_VPERMR): New unspec
>         constant.
>         (*altivecvpermr_<mode>_internal): New insn.
>         * config/rs6000/rs6000.c (rs6000_expand_vector_set): If
>         !BYTES_BIG_ENDIAN and TARGET_P9_VECTOR, expand using template
>         that translates into new xxpermr or vpermr instructions.
>         (altivec_expand_vec_perm_le): if TARGET_P9_VECTOR, expand using
>         template that translates into new xxpermr or vpermr
>         instructions.

This is okay for GCC 7.

Thanks, David
diff mbox

Patch

Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md	(revision 233539)
+++ gcc/config/rs6000/altivec.md	(working copy)
@@ -58,6 +58,7 @@ 
    UNSPEC_VSUM2SWS
    UNSPEC_VSUMSWS
    UNSPEC_VPERM
+   UNSPEC_VPERMR
    UNSPEC_VPERM_UNS
    UNSPEC_VRFIN
    UNSPEC_VCFUX
@@ -1962,6 +1963,19 @@ 
   [(set_attr "type" "vecperm")
    (set_attr "length" "4,4,8")])
 
+(define_insn "*altivec_vpermr_<mode>_internal"
+  [(set (match_operand:VM 0 "register_operand" "=v,?wo")
+	(unspec:VM [(match_operand:VM 1 "register_operand" "v,0")
+		    (match_operand:VM 2 "register_operand" "v,wo")
+		    (match_operand:V16QI 3 "register_operand" "v,wo")]
+		   UNSPEC_VPERMR))]
+  "TARGET_P9_VECTOR"
+  "@
+   vpermr %0,%1,%2,%3
+   xxpermr %x0,%x2,%x3"
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "4,4")])
+
 (define_insn "altivec_vperm_v8hiv16qi"
   [(set (match_operand:V16QI 0 "register_operand" "=v,?wo,?&wo")
 	(unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,0,wo")
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 233539)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -6553,19 +6553,27 @@  rs6000_expand_vector_set (rtx target, rtx val, int
 			UNSPEC_VPERM);
   else 
     {
-      /* Invert selector.  We prefer to generate VNAND on P8 so
-         that future fusion opportunities can kick in, but must
-         generate VNOR elsewhere.  */
-      rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
-      rtx iorx = (TARGET_P8_VECTOR
-		  ? gen_rtx_IOR (V16QImode, notx, notx)
-		  : gen_rtx_AND (V16QImode, notx, notx));
-      rtx tmp = gen_reg_rtx (V16QImode);
-      emit_insn (gen_rtx_SET (tmp, iorx));
-
-      /* Permute with operands reversed and adjusted selector.  */
-      x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
-			  UNSPEC_VPERM);
+      if (TARGET_P9_VECTOR)
+	x = gen_rtx_UNSPEC (mode,
+			    gen_rtvec (3, target, reg, 
+				       force_reg (V16QImode, x)),
+			    UNSPEC_VPERMR);
+      else
+	{
+	  /* Invert selector.  We prefer to generate VNAND on P8 so
+	     that future fusion opportunities can kick in, but must
+	     generate VNOR elsewhere.  */
+	  rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
+	  rtx iorx = (TARGET_P8_VECTOR
+		      ? gen_rtx_IOR (V16QImode, notx, notx)
+		      : gen_rtx_AND (V16QImode, notx, notx));
+	  rtx tmp = gen_reg_rtx (V16QImode);
+	  emit_insn (gen_rtx_SET (tmp, iorx));
+	  
+	  /* Permute with operands reversed and adjusted selector.  */
+	  x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
+			      UNSPEC_VPERM);
+	}
     }
 
   emit_insn (gen_rtx_SET (target, x));
@@ -33421,18 +33429,26 @@  altivec_expand_vec_perm_le (rtx operands[4])
   if (!REG_P (target))
     tmp = gen_reg_rtx (mode);
 
-  /* Invert the selector with a VNAND if available, else a VNOR.
-     The VNAND is preferred for future fusion opportunities.  */
-  notx = gen_rtx_NOT (V16QImode, sel);
-  iorx = (TARGET_P8_VECTOR
-	  ? gen_rtx_IOR (V16QImode, notx, notx)
-	  : gen_rtx_AND (V16QImode, notx, notx));
-  emit_insn (gen_rtx_SET (norreg, iorx));
+  if (TARGET_P9_VECTOR)
+    {
+      unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel), 
+			       UNSPEC_VPERMR);
+    }
+  else
+    {
+      /* Invert the selector with a VNAND if available, else a VNOR.
+	 The VNAND is preferred for future fusion opportunities.  */
+      notx = gen_rtx_NOT (V16QImode, sel);
+      iorx = (TARGET_P8_VECTOR
+	      ? gen_rtx_IOR (V16QImode, notx, notx)
+	      : gen_rtx_AND (V16QImode, notx, notx));
+      emit_insn (gen_rtx_SET (norreg, iorx));
+      
+      /* Permute with operands reversed and adjusted selector.  */
+      unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
+			       UNSPEC_VPERM);
+    }
 
-  /* Permute with operands reversed and adjusted selector.  */
-  unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
-			   UNSPEC_VPERM);
-
   /* Copy into target, possibly by way of a register.  */
   if (!REG_P (target))
     {
Index: gcc/testsuite/gcc.target/powerpc/p9-permute.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p9-permute.c	(revision 233539)
+++ gcc/testsuite/gcc.target/powerpc/p9-permute.c	(working copy)
@@ -1,4 +1,4 @@ 
-/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-do compile { target { powerpc64*-*-* } } } */
 /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
 /* { dg-options "-mcpu=power9 -O2" } */
 
@@ -16,5 +16,6 @@  permute (vector long long *p, vector long long *q,
   return vec_perm (a, b, mask);
 }
 
+/* expect xxpermr on little-endian, xxperm on big-endian */
 /* { dg-final { scan-assembler	   "xxperm" } } */
 /* { dg-final { scan-assembler-not "vperm"  } } */
Index: gcc/testsuite/gcc.target/powerpc/p9-vpermr.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p9-vpermr.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/p9-vpermr.c	(revision 234260)
@@ -0,0 +1,19 @@ 
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+#include <altivec.h>
+
+vector long long
+permute (vector long long *p, vector long long *q, vector unsigned char mask)
+{
+  vector long long a = *p;
+  vector long long b = *q;
+
+  /* Force a, b to be in altivec registers to select vpermr insn.  */
+  __asm__ (" # a: %x0, b: %x1" : "+v" (a), "+v" (b));
+
+  return vec_perm (a, b, mask);
+}
+
+/* { dg-final { scan-assembler	   "vpermr" } } */