[rs6000] add vectorization to vec_mule and vec_mulo builtins

Message ID 1505146692.18797.52.camel@us.ibm.com
State New
Headers show
Series
  • [rs6000] add vectorization to vec_mule and vec_mulo builtins
Related show

Commit Message

Carl Love Sept. 11, 2017, 4:18 p.m.
GCC Maintainers:

The following patch re-adds the vectorization support for the vec_mule()
and vec_mul0() builtins to the tree.  The vectorization support was part
of the original patch, commit 249424, to add the builtin funtionality.
But the vectorization part was pulled as part of commit 250295 due to an
underlying bug in the GCC vectorization support.  Bill Schmidt fixed the
underlying vectorization support in commit 251161 on 8/16/17 allowing
the vectorization support to be re-added for these builtins.

I have tested the patch on powerpc64le-unknown-linux-gnu (Power 8 LE),
powerpc64-unknown-linux-gnu (Power 8 BE) systems with no regressions.

Please let me know if the following patch is acceptable.  Thanks.

                        Carl Love
--------------------------------------------------------------------------------

gcc/ChangeLog:

2017-09-11  Carl Love  <cel@us.ibm.com>

        * config/rs6000/altivec.md (vec_widen_umult_even_v4si,
	vec_widen_smult_even_v4si): Add define expands for vmuleuw, vmulesw,
	vmulouw, vmulosw.
        * config/rs6000/rs6000-builtin.def (VMLEUW, VMULESW, VMULOUW,
        VMULOSW): Add definitions.
        * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
        ALTIVEC_BUILTIN_VMULESW, ALTIVEC_BUILTIN_VMULEUW,
        ALTIVEC_BUILTIN_VMULOSW, ALTIVEC_BUILTIN_VMULOUW entries.
        * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin,
        builtin_function_type): Add ALTIVEC_BUILTIN_* case statements.
---
 gcc/config/rs6000/altivec.md         | 54 ++++++++++++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000-builtin.def |  8 +++---
 gcc/config/rs6000/rs6000-c.c         |  9 ++++++
 gcc/config/rs6000/rs6000.c           |  4 +++
 4 files changed, 71 insertions(+), 4 deletions(-)

Comments

Segher Boessenkool Sept. 12, 2017, 2:43 p.m. | #1
Hi Carl,

Just some nits:

On Mon, Sep 11, 2017 at 09:18:12AM -0700, Carl Love wrote:
> +(define_expand "vec_widen_smult_odd_v4si"
> +  [(use (match_operand:V2DI 0 "register_operand" ""))
> +   (use (match_operand:V4SI 1 "register_operand" ""))
> +   (use (match_operand:V4SI 2 "register_operand" ""))]

The "" here are superfluous.

> +  "TARGET_P8_VECTOR"
> +{
> +  if (VECTOR_ELT_ORDER_BIG)
> +    emit_insn (gen_altivec_vmulosw (operands[0], operands[1],
+				    operands[2]));
> +  else
> +    emit_insn (gen_altivec_vmulesw (operands[0], operands[1],
> +				    operands[2]));
> +  DONE;
> +})

These lines fit without wrapping.

Looks fine otherwise, please commit!


Segher

Patch

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 0aa1e30..168cdb3 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1428,6 +1428,32 @@ 
   DONE;
 })
 
+(define_expand "vec_widen_umult_even_v4si"
+  [(use (match_operand:V2DI 0 "register_operand"))
+   (use (match_operand:V4SI 1 "register_operand"))
+   (use (match_operand:V4SI 2 "register_operand"))]
+  "TARGET_P8_VECTOR"
+{
+ if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmuleuw (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmulouw (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_even_v4si"
+  [(use (match_operand:V2DI 0 "register_operand"))
+   (use (match_operand:V4SI 1 "register_operand"))
+   (use (match_operand:V4SI 2 "register_operand"))]
+  "TARGET_P8_VECTOR"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulesw (operands[0], operands[1], operands[2]));
+ else
+    emit_insn (gen_altivec_vmulosw (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
 (define_expand "vec_widen_umult_odd_v16qi"
   [(use (match_operand:V8HI 0 "register_operand" ""))
    (use (match_operand:V16QI 1 "register_operand" ""))
@@ -1480,6 +1506,34 @@ 
   DONE;
 })
 
+(define_expand "vec_widen_umult_odd_v4si"
+  [(use (match_operand:V2DI 0 "register_operand"))
+   (use (match_operand:V4SI 1 "register_operand"))
+   (use (match_operand:V4SI 2 "register_operand"))]
+  "TARGET_P8_VECTOR"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulouw (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmuleuw (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_odd_v4si"
+  [(use (match_operand:V2DI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "TARGET_P8_VECTOR"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulosw (operands[0], operands[1],
+				    operands[2]));
+  else
+    emit_insn (gen_altivec_vmulesw (operands[0], operands[1],
+				    operands[2]));
+  DONE;
+})
+
 (define_insn "altivec_vmuleub"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 850164a..18db576 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1031,14 +1031,14 @@  BU_ALTIVEC_2 (VMULEUB,	      "vmuleub",	CONST,	vec_widen_umult_even_v16qi)
 BU_ALTIVEC_2 (VMULESB,	      "vmulesb",	CONST,	vec_widen_smult_even_v16qi)
 BU_ALTIVEC_2 (VMULEUH,	      "vmuleuh",	CONST,	vec_widen_umult_even_v8hi)
 BU_ALTIVEC_2 (VMULESH,	      "vmulesh",	CONST,	vec_widen_smult_even_v8hi)
-BU_ALTIVEC_2 (VMULEUW,	      "vmuleuw",	CONST,  altivec_vmuleuw)
-BU_ALTIVEC_2 (VMULESW,	      "vmulesw",	CONST,  altivec_vmulesw)
+BU_ALTIVEC_2 (VMULEUW,	      "vmuleuw",	CONST,	vec_widen_umult_even_v4si)
+BU_ALTIVEC_2 (VMULESW,	      "vmulesw",	CONST,	vec_widen_smult_even_v4si)
 BU_ALTIVEC_2 (VMULOUB,	      "vmuloub",	CONST,	vec_widen_umult_odd_v16qi)
 BU_ALTIVEC_2 (VMULOSB,	      "vmulosb",	CONST,	vec_widen_smult_odd_v16qi)
 BU_ALTIVEC_2 (VMULOUH,	      "vmulouh",	CONST,	vec_widen_umult_odd_v8hi)
 BU_ALTIVEC_2 (VMULOSH,	      "vmulosh",	CONST,	vec_widen_smult_odd_v8hi)
-BU_ALTIVEC_2 (VMULOUW,	      "vmulouw",	CONST,  altivec_vmulouw)
-BU_ALTIVEC_2 (VMULOSW,	      "vmulosw",	CONST,  altivec_vmulosw)
+BU_ALTIVEC_2 (VMULOUW,	      "vmulouw",	CONST,	vec_widen_umult_odd_v4si)
+BU_ALTIVEC_2 (VMULOSW,	      "vmulosw",	CONST,	vec_widen_smult_odd_v4si)
 BU_ALTIVEC_2 (VNOR,	      "vnor",		CONST,	norv4si3)
 BU_ALTIVEC_2 (VOR,	      "vor",		CONST,	iorv4si3)
 BU_ALTIVEC_2 (VPKUHUM,	      "vpkuhum",	CONST,	altivec_vpkuhum)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index b2df850..fbab0a2 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -2212,6 +2212,10 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
   { ALTIVEC_BUILTIN_VEC_VMULESH, ALTIVEC_BUILTIN_VMULESH,
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULEUW, ALTIVEC_BUILTIN_VMULEUW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULESW, ALTIVEC_BUILTIN_VMULESW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUB,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSB,
@@ -2233,6 +2237,11 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_VMULOUB, ALTIVEC_BUILTIN_VMULOUB,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULOUW, ALTIVEC_BUILTIN_VMULOUW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULOSW, ALTIVEC_BUILTIN_VMULOSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+
   { ALTIVEC_BUILTIN_VEC_NABS, ALTIVEC_BUILTIN_NABS_V16QI,
     RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_NABS, ALTIVEC_BUILTIN_NABS_V8HI,
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index ecdf776..a5d5302 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -16214,9 +16214,11 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     /* Even element flavors of vec_mul (signed). */
     case ALTIVEC_BUILTIN_VMULESB:
     case ALTIVEC_BUILTIN_VMULESH:
+    case ALTIVEC_BUILTIN_VMULESW:
     /* Even element flavors of vec_mul (unsigned).  */
     case ALTIVEC_BUILTIN_VMULEUB:
     case ALTIVEC_BUILTIN_VMULEUH:
+    case ALTIVEC_BUILTIN_VMULEUW:
       {
 	arg0 = gimple_call_arg (stmt, 0);
 	arg1 = gimple_call_arg (stmt, 1);
@@ -16229,9 +16231,11 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     /* Odd element flavors of vec_mul (signed).  */
     case ALTIVEC_BUILTIN_VMULOSB:
     case ALTIVEC_BUILTIN_VMULOSH:
+    case ALTIVEC_BUILTIN_VMULOSW:
     /* Odd element flavors of vec_mul (unsigned). */
     case ALTIVEC_BUILTIN_VMULOUB:
     case ALTIVEC_BUILTIN_VMULOUH:
+    case ALTIVEC_BUILTIN_VMULOUW:
       {
 	arg0 = gimple_call_arg (stmt, 0);
 	arg1 = gimple_call_arg (stmt, 1);