diff mbox series

[rs6000] Add builtin support for vec_insert4b, vec_extract4b

Message ID 1518638907.7508.30.camel@us.ibm.com
State New
Headers show
Series [rs6000] Add builtin support for vec_insert4b, vec_extract4b | expand

Commit Message

Carl Love Feb. 14, 2018, 8:08 p.m. UTC
GCC maintainers:

Per Segher's comments on the first version of the patch.  I split the
patch into two.  The first patch (this one) adds the ABI specified
vec_insert4b and vec_extract builtins.  It adds a runnable file to test
the ABI specified builtin instances.  Note, the runnable test file does
not test for illegal argument values such as the const int second
argument > 12 or of the wrong type.

Note, the rtl for vec_insert4b in vsx.md is a copy of the vec_vinsert4b
code with the name changed.  The rtl for vec_extract4b is new.

The second patch removes all of the non-ABI builtin support.

Additionally, I have addressed the other comments from Segher with
regards to formatting issues and rtl register specification.

This patch has been tested on:

  powerpc64le-unknown-linux-gnu (Power 8 LE)
  powerpc64le-unknown-linux-gnu (Power 9 LE)

with no regressions.

Let me know if the patch looks OK or not. Thanks.

The patch should also be ported to GCC 7 so we are in compliance with
the ABI.

                           Carl Love

-----------------------------------------------------------------------

    gcc/ChangeLog:

    2018-02-13  Carl Love  <cel@us.ibm.com>

        * config/rs6000/altivec.h: Add builtin names vec_extract4b
        vec_insert4b.
        * config/rs6000/rs6000-builtin.def: Add INSERT4B and EXTRACT4B
        definitions.
        * config/rs6000/rs6000-c.c: Add the definitions for
        P9V_BUILTIN_VEC_EXTRACT4B and P9V_BUILTIN_VEC_INSERT4B.
        * config/rs6000/rs6000.c (altivec_expand_builtin): Add
        P9V_BUILTIN_EXTRACT4B and P9V_BUILTIN_INSERT4B case statements.
        * config/rs6000/vsx.md: Add define_insn extract4b.  Add define_expand
	definition for insert4b and define insn *insert3b_internal.
        * doc/extend.texi: Add documentation for vec_extract4b.

    gcc/testsuite/ChangeLog:

    2018-02-13  Carl Love  <cel@us.ibm.com>
        * gcc.target/powerpc/builtins-7-p9-runnable.c: New runnable test file
        for the ABI definitions for vec_extract4b and vec_insert4b.
---
 gcc/config/rs6000/altivec.h                        |   2 +
 gcc/config/rs6000/rs6000-builtin.def               |   4 +
 gcc/config/rs6000/rs6000-c.c                       |   8 +
 gcc/config/rs6000/rs6000.c                         |   2 +
 gcc/config/rs6000/vsx.md                           |  41 +++++
 gcc/doc/extend.texi                                |   7 +
 .../gcc.target/powerpc/builtins-7-p9-runnable.c    | 169 +++++++++++++++++++++
 7 files changed, 233 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c

Comments

Segher Boessenkool Feb. 16, 2018, 2:26 p.m. UTC | #1
Hi!

On Wed, Feb 14, 2018 at 12:08:27PM -0800, Carl Love wrote:
> Per Segher's comments on the first version of the patch.  I split the
> patch into two.

Thanks, much easier to read.

>     2018-02-13  Carl Love  <cel@us.ibm.com>
> 
>         * config/rs6000/altivec.h: Add builtin names vec_extract4b
>         vec_insert4b.

	* config/rs6000/altivec.h (vec_extract4b, vec_insert4b): New.

(Similar for the rest of the changelog).

> --- a/gcc/config/rs6000/rs6000-c.c
> +++ b/gcc/config/rs6000/rs6000-c.c
> @@ -5433,6 +5433,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
>      RS6000_BTI_INTDI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 },
>    { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B,
>      RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 },
> +  { P9V_BUILTIN_VEC_EXTRACT4B, P9V_BUILTIN_EXTRACT4B,
> +    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, 0 },

The old builtin use unsigned int for the element number (but signed is
correct, yes).

Looks good.  Okay for trunk.  Thanks!


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 684cb1990..3bce2ae39 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -435,6 +435,8 @@ 
 #define vec_vctzw __builtin_vec_vctzw
 #define vec_vextract4b __builtin_vec_vextract4b
 #define vec_vinsert4b __builtin_vec_vinsert4b
+#define vec_extract4b __builtin_vec_extract4b
+#define vec_insert4b __builtin_vec_insert4b
 #define vec_vprtyb __builtin_vec_vprtyb
 #define vec_vprtybd __builtin_vec_vprtybd
 #define vec_vprtybw __builtin_vec_vprtybw
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 86604da46..420d12e29 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2229,6 +2229,8 @@  BU_P9V_AV_2 (VEXTUWRX, "vextuwrx",		CONST,	vextuwrx)
 BU_P9V_VSX_2 (VEXTRACT4B,   "vextract4b",	CONST,	vextract4b)
 BU_P9V_VSX_3 (VINSERT4B,    "vinsert4b",	CONST,	vinsert4b)
 BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di",	CONST,	vinsert4b_di)
+BU_P9V_VSX_3 (INSERT4B,    "insert4b",		CONST,  insert4b)
+BU_P9V_VSX_2 (EXTRACT4B,   "extract4b", 	CONST,  extract4b)
 
 /* Hardware IEEE 128-bit floating point round to odd instrucitons added in ISA
    3.0 (power9).  */
@@ -2291,11 +2293,13 @@  BU_P9V_OVERLOAD_2 (XL_LEN_R,	"xl_len_r")
 BU_P9V_OVERLOAD_2 (VEXTULX,	"vextulx")
 BU_P9V_OVERLOAD_2 (VEXTURX,	"vexturx")
 BU_P9V_OVERLOAD_2 (VEXTRACT4B,	"vextract4b")
+BU_P9V_OVERLOAD_2 (EXTRACT4B,  "extract4b")
 
 /* ISA 3.0 Vector scalar overloaded 3 argument functions */
 BU_P9V_OVERLOAD_3 (STXVL,	"stxvl")
 BU_P9V_OVERLOAD_3 (XST_LEN_R,	"xst_len_r")
 BU_P9V_OVERLOAD_3 (VINSERT4B,	"vinsert4b")
+BU_P9V_OVERLOAD_3 (INSERT4B,    "insert4b")
 
 /* Overloaded CMPNE support was implemented prior to Power 9,
    so is not mentioned here.  */
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index a68be511c..56e66db98 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -5433,6 +5433,8 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTDI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 },
   { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B,
     RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 },
+  { P9V_BUILTIN_VEC_EXTRACT4B, P9V_BUILTIN_EXTRACT4B,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, 0 },
 
   { P9V_BUILTIN_VEC_VEXTRACT_FP_FROM_SHORTH, P9V_BUILTIN_VEXTRACT_FP_FROM_SHORTH,
     RS6000_BTI_V4SF, RS6000_BTI_unsigned_V8HI, 0, 0 },
@@ -5492,6 +5494,12 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
 
+  { P9V_BUILTIN_VEC_INSERT4B, P9V_BUILTIN_INSERT4B,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V4SI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI },
+  { P9V_BUILTIN_VEC_INSERT4B, P9V_BUILTIN_INSERT4B,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI },
   { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B,
     RS6000_BTI_V16QI, RS6000_BTI_V4SI,
     RS6000_BTI_V16QI, RS6000_BTI_UINTSI },
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6a6801aad..f8d8b9687 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -15730,6 +15730,7 @@  altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
 
     case P9V_BUILTIN_VEXTRACT4B:
     case P9V_BUILTIN_VEC_VEXTRACT4B:
+    case P9V_BUILTIN_VEC_EXTRACT4B:
       arg1 = CALL_EXPR_ARG (exp, 1);
       STRIP_NOPS (arg1);
 
@@ -15747,6 +15748,7 @@  altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case P9V_BUILTIN_VINSERT4B:
     case P9V_BUILTIN_VINSERT4B_DI:
     case P9V_BUILTIN_VEC_VINSERT4B:
+    case P9V_BUILTIN_VEC_INSERT4B:
       arg2 = CALL_EXPR_ARG (exp, 2);
       STRIP_NOPS (arg2);
 
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 86efdced2..266923f98 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5204,6 +5204,47 @@ 
 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
 ;; endian version needs to adjust the byte number, and the V4SI element in
 ;; vinsert4b.
+(define_insn "extract4b"
+  [(set (match_operand:V2DI 0 "vsx_register_operand")
+       (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+                     (match_operand:QI 2 "const_0_to_12_operand" "n")]
+                    UNSPEC_XXEXTRACTUW))]
+  "TARGET_P9_VECTOR"
+{
+  if (!VECTOR_ELT_ORDER_BIG)
+    operands[2] = GEN_INT (12 - INTVAL (operands[2]));
+
+  return "xxextractuw %x0,%x1,%2";
+})
+
+(define_expand "insert4b"
+  [(set (match_operand:V16QI 0 "vsx_register_operand")
+	(unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
+		       (match_operand:V16QI 2 "vsx_register_operand")
+		       (match_operand:QI 3 "const_0_to_12_operand")]
+		   UNSPEC_XXINSERTW))]
+  "TARGET_P9_VECTOR"
+{
+  if (!VECTOR_ELT_ORDER_BIG)
+    {
+      rtx op1 = operands[1];
+      rtx v4si_tmp = gen_reg_rtx (V4SImode);
+      emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
+      operands[1] = v4si_tmp;
+      operands[3] = GEN_INT (12 - INTVAL (operands[3]));
+    }
+})
+
+(define_insn "*insert4b_internal"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+	(unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
+		       (match_operand:V16QI 2 "vsx_register_operand" "0")
+		       (match_operand:QI 3 "const_0_to_12_operand" "n")]
+		   UNSPEC_XXINSERTW))]
+  "TARGET_P9_VECTOR"
+  "xxinsertw %x0,%x1,%3"
+  [(set_attr "type" "vecperm")])
+
 (define_expand "vextract4b"
   [(set (match_operand:DI 0 "gpc_reg_operand")
 	(unspec:DI [(match_operand:V16QI 1 "vsx_register_operand")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index cb9df971a..13dbac42e 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -19055,8 +19055,15 @@  vector int vec_vctzw (vector int);
 vector unsigned int vec_vctzw (vector int);
 
 long long vec_vextract4b (const vector signed char, const int);
+vector unsigned long long vec_extract4b (vector unsigned char,
+                                         const int);
+long long vec_extract4b (const vector signed char, const int);
 long long vec_vextract4b (const vector unsigned char, const int);
 
+vector unsigned char vec_insert4b (vector signed int, vector unsigned char,
+                                   const int);
+vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char,
+                                   const int);
 vector signed char vec_insert4b (vector int, vector signed char, const int);
 vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char,
                                    const int);
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c
new file mode 100644
index 000000000..137b46b05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c
@@ -0,0 +1,169 @@ 
+/* { dg-do run { target { powerpc*-*-* && p9vector_hw } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+#include <altivec.h>
+#define TRUE 1
+#define FALSE 0
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#define EXTRACT 0
+
+void abort (void);
+
+int result_wrong_ull (vector unsigned long long vec_expected,
+		      vector unsigned long long vec_actual)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    if (vec_expected[i] != vec_actual[i])
+      return TRUE;
+
+  return FALSE;
+}
+
+int result_wrong_uc (vector unsigned char vec_expected,
+		     vector unsigned char vec_actual)
+{
+  int i;
+
+  for (i = 0; i < 16; i++)
+    if (vec_expected[i] != vec_actual[i])
+      return TRUE;
+
+  return FALSE;
+}
+
+#ifdef DEBUG
+void print_ull (vector unsigned long long vec_expected,
+		vector unsigned long long vec_actual)
+{
+  int i;
+
+  printf("expected unsigned long long data\n");
+  for (i = 0; i < 2; i++)
+    printf(" %lld,", vec_expected[i]);
+
+  printf("\nactual signed char data\n");
+  for (i = 0; i < 2; i++)
+    printf(" %lld,", vec_actual[i]);
+  printf("\n");
+}
+
+void print_uc (vector unsigned char vec_expected,
+	       vector unsigned char vec_actual)
+{
+  int i;
+
+  printf("expected unsigned char data\n");
+  for (i = 0; i < 16; i++)
+    printf(" %d,", vec_expected[i]);
+
+  printf("\nactual unsigned char data\n");
+  for (i = 0; i < 16; i++)
+    printf(" %d,", vec_actual[i]);
+  printf("\n");
+}
+#endif
+
+#if EXTRACT
+vector unsigned long long
+vext (vector unsigned char *vc)
+{
+  return vextract_si_vchar (*vc, 5);
+}
+#endif
+
+int main()
+{
+   vector signed int vsi_arg;
+   vector unsigned char vec_uc_arg, vec_uc_result, vec_uc_expected;
+   vector unsigned long long vec_ull_result, vec_ull_expected;
+   unsigned long long ull_result, ull_expected;
+
+   vec_uc_arg = (vector unsigned char){1, 2, 3, 4,
+				       5, 6, 7, 8,
+				       9, 10, 11, 12,
+				       13, 14, 15, 16};
+
+   vsi_arg = (vector signed int){0xA, 0xB, 0xC, 0xD};
+
+   vec_uc_expected = (vector unsigned char){0xC, 0, 0, 0,
+					    5, 6, 7, 8,
+					    9, 10, 11, 12,
+					    13, 14, 15, 16};
+   /* Test vec_insert4b() */
+   /* Insert into char 0 location */
+   vec_uc_result = vec_insert4b (vsi_arg, vec_uc_arg, 0);
+
+   if (result_wrong_uc(vec_uc_expected, vec_uc_result))
+     {
+#ifdef DEBUG
+        printf("Error: vec_insert4b pos 0, result does not match expected result\n");
+	print_uc (vec_uc_expected, vec_uc_result);
+#else
+        abort();
+#endif
+      }
+
+   /* insert into char 4 location */
+   vec_uc_expected = (vector unsigned char){1, 2, 3, 4,
+					    0xC, 0, 0, 0,
+					    9, 10, 11, 12,
+					    13, 14, 15, 16};
+   vec_uc_result = vec_insert4b (vsi_arg, vec_uc_arg, 4);
+
+   if (result_wrong_uc(vec_uc_expected, vec_uc_result))
+     {
+#ifdef DEBUG
+        printf("Error: vec_insert4b pos 4, result does not match expected result\n");
+	print_uc (vec_uc_expected, vec_uc_result);
+#else
+        abort();
+#endif
+      }
+
+   /* Test vec_extract4b() */
+   /* Extract 4b, from char 0 location */
+   vec_uc_arg = (vector unsigned char){10, 0, 0, 0,
+				       20, 0, 0, 0,
+				       30, 0, 0, 0,
+				       40, 0, 0, 0};
+
+   vec_ull_expected = (vector unsigned long long){0, 10};
+   vec_ull_result = vec_extract4b(vec_uc_arg, 0);
+
+   if (result_wrong_ull(vec_ull_expected, vec_ull_result))
+     {
+#ifdef DEBUG
+        printf("Error: vec_extract4b pos 0, result does not match expected result\n");
+	print_ull (vec_ull_expected, vec_ull_result);
+#else
+        abort();
+#endif
+      }
+
+   /* Extract 4b, from char 12 location */
+   vec_uc_arg = (vector unsigned char){10, 0, 0, 0,
+				       20, 0, 0, 0,
+				       30, 0, 0, 0,
+				       40, 0, 0, 0};
+
+   vec_ull_expected = (vector unsigned long long){0, 40};
+   vec_ull_result = vec_extract4b(vec_uc_arg, 12);
+
+   if (result_wrong_ull(vec_ull_expected, vec_ull_result))
+     {
+#ifdef DEBUG
+        printf("Error: vec_extract4b pos 12, result does not match expected result\n");
+	print_ull (vec_ull_expected, vec_ull_result);
+#else
+        abort();
+#endif
+      }
+}