diff mbox series

[rs6000] Add support to enable vmsumudm behind vec_msum builtin.

Message ID 1391c282d282cff26e3ad3bff82789020d33c2fe.camel@vnet.ibm.com
State New
Headers show
Series [rs6000] Add support to enable vmsumudm behind vec_msum builtin. | expand

Commit Message

will schmidt June 26, 2020, 6:20 p.m. UTC
Hi,

  Add support for the vmsumudm instruction and tie it into the vec_msum
  built-in to support the variants of that built-in using vector
 _int128 parameters.

  vector _uint128_t vec_msum (vector unsigned long long,
                              vector unsigned long long,
                              vector _uint128_t);
  vector _int128_t vec_msum (vector signed long long,
                             vector signed long long,
                             vector _int128_t);
    
Regtests currently running on assorted powerpc targets.

OK for trunk?

Thanks,
-Will


[gcc]

2020-06-18  Will Schmidt  <will_schmidt@vnet.ibm.com>

	* config/rs6000/altivec.h (vec_vmsumudm): New define.
	* config/rs6000/altivec.md (UNSPEC_VMSUMUDM): New unspec.
	(altivec_vmsumudm): New define_insn.
	* config/rs6000/rs6000-builtin.def (altivec_vmsumudm): New
	BU_ALTIVEC_3 entry. (vmsumudm): New BU_ALTIVEC_OVERLOAD_3 
	entry.
	* config/rs6000/rs6000-call.c (altivec_overloaded_builtins): 
	Add entries for ALTIVEC_BUILTIN_VMSUMUDM variants of vec_msum.

[testsuite]

2020-06-18  Will Schmidt  <will_schmidt@vnet.ibm.com>

	* gcc.target/powerpc/builtins-msum-runnable.c: New test.
	* gcc.target/powerpc/vsx-builtin-msum.c: New test.

Comments

Segher Boessenkool June 26, 2020, 10:58 p.m. UTC | #1
Hi!

On Fri, Jun 26, 2020 at 01:20:43PM -0500, will schmidt wrote:
> 	* config/rs6000/altivec.h (vec_vmsumudm): New define.
> 	* config/rs6000/altivec.md (UNSPEC_VMSUMUDM): New unspec.
> 	(altivec_vmsumudm): New define_insn.
> 	* config/rs6000/rs6000-builtin.def (altivec_vmsumudm): New
> 	BU_ALTIVEC_3 entry. (vmsumudm): New BU_ALTIVEC_OVERLOAD_3 
> 	entry.

> +(define_insn "altivec_vmsumudm"
> +  [(set (match_operand:V1TI 0 "register_operand" "=v")
> +	(unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v")
> +		      (match_operand:V2DI 2 "register_operand" "v")
> +		      (match_operand:V1TI 3 "register_operand" "v")]
> +		     UNSPEC_VMSUMUDM))]
> +  "TARGET_P8_VECTOR"
> +  "vmsumudm %0,%1,%2,%3"
> +  [(set_attr "type" "veccomplex")])

I wonder if it would be better to actually describe what the insn does,
instead of using an unspec.  All similar insns are like this already of
course, it's not something that needs to be fixed right now.

TARGET_P8_VECTOR is wrong (it is ISA 3.0B).

> +The @code{vec_msum} functions perform a vector multiply-sum, returning
> +the result of arg1*arg2+arg3.  ISA 3.0 adds support for vec_msum returning
> +a vector int128 result.

Well, that doesn't describe the horizontal addition it does?  The muls
are widening, and two adjacent results are added (together with the
corresponding elt in arg3).

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-msum.c
> @@ -0,0 +1,25 @@
> +/* Verify that overloaded built-ins for vec_msum with __int128
> +   inputs generate the proper code.  */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-options "-mdejagnu-cpu=power8  -O3" } */

This needs to change to power9 as well?

Thanks,


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index bb1524f..0d19939 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -159,10 +159,11 @@ 
 #define vec_vmsumuhm __builtin_vec_vmsumuhm
 #define vec_vmsummbm __builtin_vec_vmsummbm
 #define vec_vmsumubm __builtin_vec_vmsumubm
 #define vec_vmsumshs __builtin_vec_vmsumshs
 #define vec_vmsumuhs __builtin_vec_vmsumuhs
+#define vec_vmsumudm __builtin_vec_vmsumudm
 #define vec_vmulesb __builtin_vec_vmulesb
 #define vec_vmulesh __builtin_vec_vmulesh
 #define vec_vmuleuh __builtin_vec_vmuleuh
 #define vec_vmuleub __builtin_vec_vmuleub
 #define vec_vmulosh __builtin_vec_vmulosh
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 2ce9227..0481642 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -19,10 +19,11 @@ 
 ;; <http://www.gnu.org/licenses/>.
 
 (define_c_enum "unspec"
   [UNSPEC_VCMPBFP
    UNSPEC_VMSUMU
+   UNSPEC_VMSUMUDM
    UNSPEC_VMSUMM
    UNSPEC_VMSUMSHM
    UNSPEC_VMSUMUHS
    UNSPEC_VMSUMSHS
    UNSPEC_VMHADDSHS
@@ -970,10 +971,20 @@ 
 		     UNSPEC_VMSUMU))]
   "TARGET_ALTIVEC"
   "vmsumu<VI_char>m %0,%1,%2,%3"
   [(set_attr "type" "veccomplex")])
 
+(define_insn "altivec_vmsumudm"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v")
+		      (match_operand:V2DI 2 "register_operand" "v")
+		      (match_operand:V1TI 3 "register_operand" "v")]
+		     UNSPEC_VMSUMUDM))]
+  "TARGET_P8_VECTOR"
+  "vmsumudm %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
 (define_insn "altivec_vmsumm<VI_char>m"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")
 		      (match_operand:VIshort 2 "register_operand" "v")
                       (match_operand:V4SI 3 "register_operand" "v")]
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 363656e..ee0d787 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1140,10 +1140,11 @@  BU_ALTIVEC_3 (VMHADDSHS,      "vmhaddshs",      SAT,   	altivec_vmhaddshs)
 BU_ALTIVEC_3 (VMHRADDSHS,     "vmhraddshs",     SAT,   	altivec_vmhraddshs)
 BU_ALTIVEC_3 (VMLADDUHM,      "vmladduhm",      CONST, 	fmav8hi4)
 BU_ALTIVEC_3 (VMSUMUBM,       "vmsumubm",       CONST, 	altivec_vmsumubm)
 BU_ALTIVEC_3 (VMSUMMBM,       "vmsummbm",       CONST, 	altivec_vmsummbm)
 BU_ALTIVEC_3 (VMSUMUHM,       "vmsumuhm",       CONST, 	altivec_vmsumuhm)
+BU_ALTIVEC_3 (VMSUMUDM,       "vmsumudm",       CONST, 	altivec_vmsumudm)
 BU_ALTIVEC_3 (VMSUMSHM,       "vmsumshm",       CONST, 	altivec_vmsumshm)
 BU_ALTIVEC_3 (VMSUMUHS,       "vmsumuhs",       SAT,   	altivec_vmsumuhs)
 BU_ALTIVEC_3 (VMSUMSHS,       "vmsumshs",       SAT,   	altivec_vmsumshs)
 BU_ALTIVEC_3 (VNMSUBFP,       "vnmsubfp",       FP,    	nfmsv4sf4)
 BU_ALTIVEC_3 (VPERM_1TI,      "vperm_1ti",      CONST, 	altivec_vperm_v1ti)
@@ -1497,10 +1498,11 @@  BU_ALTIVEC_OVERLOAD_3 (SEL,        "sel")
 BU_ALTIVEC_OVERLOAD_3 (VMSUMMBM,   "vmsummbm")
 BU_ALTIVEC_OVERLOAD_3 (VMSUMSHM,   "vmsumshm")
 BU_ALTIVEC_OVERLOAD_3 (VMSUMSHS,   "vmsumshs")
 BU_ALTIVEC_OVERLOAD_3 (VMSUMUBM,   "vmsumubm")
 BU_ALTIVEC_OVERLOAD_3 (VMSUMUHM,   "vmsumuhm")
+BU_ALTIVEC_OVERLOAD_3 (VMSUMUDM,   "vmsumudm")
 BU_ALTIVEC_OVERLOAD_3 (VMSUMUHS,   "vmsumuhs")
 
 /* Altivec DST overloaded builtins.  */
 BU_ALTIVEC_OVERLOAD_D (DST,	   "dst")
 BU_ALTIVEC_OVERLOAD_D (DSTT,	   "dstt")
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 3a109fe..c91980d3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -3086,10 +3086,16 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V4SI },
   { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMUHM,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI },
   { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMSHM,
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI },
+
+  { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMUDM,
+    RS6000_BTI_V1TI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V1TI },
+  { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMUDM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V1TI },
+
   { ALTIVEC_BUILTIN_VEC_VMSUMSHM, ALTIVEC_BUILTIN_VMSUMSHM,
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI },
   { ALTIVEC_BUILTIN_VEC_VMSUMUHM, ALTIVEC_BUILTIN_VMSUMUHM,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI },
   { ALTIVEC_BUILTIN_VEC_VMSUMMBM, ALTIVEC_BUILTIN_VMSUMMBM,
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 95f7192..d95974a 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -20207,10 +20207,17 @@  bool scalar_test_data_class (double source, const int condition);
 bool scalar_test_data_class (__ieee128 source, const int condition);
 
 bool scalar_test_neg (float source);
 bool scalar_test_neg (double source);
 bool scalar_test_neg (__ieee128 source);
+
+vector _uint128_t vec_msum (vector unsigned long long,
+			    vector unsigned long long,
+			    vector _uint128_t);
+vector _int128_t vec_msum (vector signed long long,
+			   vector signed long long,
+			   vector _int128_t);
 @end smallexample
 
 The @code{scalar_extract_exp} and @code{scalar_extract_sig}
 functions require a 64-bit environment supporting ISA 3.0 or later.
 The @code{scalar_extract_exp} and @code{scalar_extract_sig} built-in
@@ -20226,10 +20233,13 @@  When supplied with a 128-bit @code{source} argument, the
 treated similarly.
 Note that the sign of the significand is not represented in the result
 returned from the @code{scalar_extract_sig} function.  Use the
 @code{scalar_test_neg} function to test the sign of its @code{double}
 argument.
+The @code{vec_msum} functions perform a vector multiply-sum, returning
+the result of arg1*arg2+arg3.  ISA 3.0 adds support for vec_msum returning
+a vector int128 result.
 
 The @code{scalar_insert_exp}
 functions require a 64-bit environment supporting ISA 3.0 or later.
 When supplied with a 64-bit first argument, the
 @code{scalar_insert_exp} built-in function returns a double-precision
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-msum-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-msum-runnable.c
new file mode 100644
index 0000000..0fa5c31
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-msum-runnable.c
@@ -0,0 +1,74 @@ 
+/* { dg-do run { target { p9vector_hw } } } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+void abort (void);
+
+int
+main()
+{
+  vector __uint128_t arg_uint128, result_uint128, expected_uint128;
+  vector __int128_t arg_int128, result_int128, expected_int128;
+
+  arg_uint128[0] = 0x1627384950617243;
+  arg_uint128[0] = arg_uint128[0] << 64;
+  arg_uint128[0] |= 0x9405182930415263;
+  expected_uint128[0] = 0x1627384950617243;
+  expected_uint128[0] = expected_uint128[0] << 64;
+  expected_uint128[0] |= 0xb6b07e42a570e5fe;
+  vector unsigned long long arg_vull2 = {0x12345678,0x44445555};
+  vector unsigned long long arg_vull3 = {0x6789abcd,0x66667777};
+  result_uint128 = vec_msum (arg_vull2, arg_vull3, arg_uint128);
+
+  if (result_uint128[0] != expected_uint128[0])
+    {
+#ifdef DEBUG
+       printf("result_uint128[0] doesn't match expected_u128[0]\n");
+       printf("arg_vull2  %llx %llx \n",  arg_vull2[0], arg_vull2[1]);
+       printf("arg_vull3  %llx %llx \n",  arg_vull3[0], arg_vull3[1]);
+       printf("arg_uint128[0] =  %llx ", arg_uint128[0] >> 64);
+       printf(" %llx\n",	 arg_uint128[0] & 0xFFFFFFFFFFFFFFFF);
+
+       printf("result_uint128[0] =  %llx ", result_uint128[0] >> 64);
+       printf(" %llx\n", result_uint128[0] & 0xFFFFFFFFFFFFFFFF);
+
+       printf("expected_uint128[0] =  %llx ", expected_uint128[0] >> 64);
+       printf(" %llx\n", expected_uint128[0] & 0xFFFFFFFFFFFFFFFF);
+#else
+       abort();
+#endif
+    }
+
+  arg_int128[0] = 0x1627384950617283;
+  arg_int128[0] = arg_int128[0] << 64;
+  arg_int128[0] |= 0x9405182930415263;
+  expected_int128[0] = 0x1627384950617283;
+  expected_int128[0] = expected_int128[0] << 64;
+  expected_int128[0] |= 0xd99f35969c11cbfa;
+  vector signed long long arg_vll2 = { 0x567890ab, 0x1233456 };
+  vector signed long long arg_vll3 = { 0xcdef0123, 0x9873451 };
+  result_int128 = vec_msum (arg_vll2, arg_vll3, arg_int128);
+
+  if (result_int128[0] != expected_int128[0])
+    {
+#ifdef DEBUG
+       printf("result_int128[0] doesn't match expected128[0]\n");
+       printf("arg_int128[0] =  %llx ", arg_int128[0] >> 64);
+       printf(" %llx\n",	 arg_int128[0] & 0xFFFFFFFFFFFFFFFF);
+
+       printf("result_int128[0] =  %llx ", result_int128[0] >> 64);
+       printf(" %llx\n", result_int128[0] & 0xFFFFFFFFFFFFFFFF);
+
+       printf("expected_int128[0] =  %llx ", expected_int128[0] >> 64);
+       printf(" %llx\n", expected_int128[0] & 0xFFFFFFFFFFFFFFFF);
+#else
+       abort();
+#endif
+    }
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-msum.c b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-msum.c
new file mode 100644
index 0000000..f2f7395
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-msum.c
@@ -0,0 +1,25 @@ 
+/* Verify that overloaded built-ins for vec_msum with __int128
+   inputs generate the proper code.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8  -O3" } */
+
+#include <altivec.h>
+
+vector signed __int128
+test_msum_si (vector signed long long vsll_1, vector signed long long vsll_2,
+	   vector signed __int128 vsi128)
+{
+  return vec_msum (vsll_1, vsll_2, vsi128);
+}
+
+vector unsigned __int128
+test_msum_ui (vector unsigned long long vull_1, vector unsigned long long vull_2,
+	   vector unsigned __int128 vui128)
+{
+  return vec_msum (vull_1, vull_2, vui128);
+}
+
+/* { dg_final { scan_assembler_times "vmsumudm" 2 } } */
+