diff mbox

[rs6000] Add additional support for vec_subc, vec_sube, vec_subec builtins.

Message ID 1498854721.9317.4.camel@us.ibm.com
State New
Headers show

Commit Message

Carl Love June 30, 2017, 8:32 p.m. UTC
GCC Maintainers:

The following patch adds support for additional  the vec_subc, vec_sube,
vec_subec builtins.  It also adds a few missing tests for currently
supported builtins.  The patch has been tested on
powerpc64le-unknown-linux-gnu (Power 8 LE) and
powerpc64-unknown-linux-gnu(Power 8 BE).

Please let me know if the following patch is acceptable.  Thanks.

                        Carl Love

---------------------------------------------------------


gcc/ChangeLog:

2017-06-29  Carl Love  <cel@us.ibm.com>

	* config/rs6000/rs6000-c.c: Add support for built-in functions.
	vector signed int vec_subc (vector signed int, vector signed int);
	vector signed __int128 vec_subc (vector signed __int128, vector signed __int128);
	vector unsigned __int128 vec_subc (vector unsigned __int128, vector unsigned __int128);
	vector signed int vec_sube (vector signed int, vector signed int, vector signed int);
	vector unsigned int vec_sube (vector unsigned int, vector unsigned int, vector unsigned int);
	vector signed __int128 vec_sube (vector signed __int128, vector signed __int128, vector signed__int128);
	vector unsigned __int128 vec_sube (vector unsigned __int128, vector unsigned __int128, vector unsigned __int128);
	vector signed int vec_subec (vector signed int, vector signed int, vector signed int);
	vector unsigned int vec_subec (vector unsigned int, vector unsigned int, vector unsigned int);
	vector signed __int128 vec_subec (vector signed __int128, vector signed __int128, vector signed__int128);
	vector unsigned __int128 vec_subec (vector unsigned __int128, vector unsigned __int128, vector unsigned __int128);
	* config/rs6000/rs6000.c (ALTIVEC_BUILTIN_VEC_SUBE, ALTIVEC_BUILTIN_VEC_SUBEC): Add
	def_builtins.
	* config/rs6000/rs6000-builtin.def (SUBE, SUBEC): Add
	BU_ALTIVEC_OVERLOAD_X definitions.
	* config/rs6000/altivec.h (vec_sube, vec_subec): Add builtin defines.
	* doc/extend.texi: Update the built-in documentation file for the new
	built-in functions.

gcc/testsuite/ChangeLog:

2017-06-29  Carl Love  <cel@us.ibm.com>

	* gcc.target/powerpc/p8vector-builtin-8.c (foo): Add test cases for
	the new vec_subc, vec_sube, vec_subec built-ins. Add the missing test
	cases for vec_addc, adde and addec builtins.
---
 gcc/config/rs6000/altivec.h                        |   2 +
 gcc/config/rs6000/rs6000-builtin.def               |   2 +
 gcc/config/rs6000/rs6000-c.c                       | 106 +++++++++++++++++----
 gcc/config/rs6000/rs6000.c                         |   4 +
 gcc/doc/extend.texi                                |  27 ++++++
 .../gcc.target/powerpc/p8vector-builtin-8.c        |  29 +++++-
 6 files changed, 148 insertions(+), 22 deletions(-)

Comments

Segher Boessenkool July 1, 2017, 12:56 a.m. UTC | #1
Hi Carl,

On Fri, Jun 30, 2017 at 01:32:01PM -0700, Carl Love wrote:
> 	vector unsigned __int128 vec_sube (vector unsigned __int128, vector unsigned __int128, vector unsigned __int128);

Many of the changelog lines are much too long.

> 	* gcc.target/powerpc/p8vector-builtin-8.c (foo): Add test cases for
> 	the new vec_subc, vec_sube, vec_subec built-ins. Add the missing test
> 	cases for vec_addc, adde and addec builtins.

Two spaces after a full stop.

> @@ -5855,13 +5864,14 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
>        /* else, fall through and process the Power9 alternative below */
>      }
>  
> -  if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
> +  if ((fcode == ALTIVEC_BUILTIN_VEC_ADDE)
> +      || (fcode == ALTIVEC_BUILTIN_VEC_SUBE))
>      {
>        /* vec_adde needs to be special cased because there is no instruction
>  	  for the {un}signed int version.  */
>        if (nargs != 3)
>  	{
> -	  error ("vec_adde only accepts 3 arguments");
> +	  error ("vec_adde and vec_sube only accepts 3 arguments");
>  	  return error_mark_node;
>  	}

Please only print the relevant name; see how this is handled for
ALTIVEC_BUILTIN_VEC_SPLATS and ALTIVEC_BUILTIN_VEC_PROMOTE, for example.

>  
> @@ -5884,14 +5894,24 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
>  	{
>  	  /* For {un}signed ints,
>  	     vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb),
> +						   vec_and (carryv, 0x1)).
> +	     vec_sube (va, vb, carryv) == vec_sub (vec_sub (va, vb),
>  						   vec_and (carryv, 0x1)).  */

s/0x1/1/ (yeah I realise this was here before, but let's stop the silliness).

> @@ -5919,13 +5951,14 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
>  	}
>      }
>  
> -  if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
> +  if ((fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
> +      || (fcode == ALTIVEC_BUILTIN_VEC_SUBEC))

No superfluous parentheses please, they don't help readability.

> +	       if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
> +		 {
> +		   tree VADDECUQ_bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDECUQ];

Line too long.  Just name the local variable something shorter?  You don't
need VADDECUQ in the name.

> +vector signed __int128 vec_sube (vector signed __int128,
> +                                 vector signed __int128,
> +                                 vector signed__int128);

Missing space in that last line.

Some of the above happen more than once, please check.

Okay for trunk with those issues fixed.  Thanks!


Segher
diff mbox

Patch

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 417e143..45bf615 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -191,6 +191,8 @@ 
 #define vec_unsignedo __builtin_vec_vunsignedo
 #define vec_vsubfp __builtin_vec_vsubfp
 #define vec_subc __builtin_vec_subc
+#define vec_sube __builtin_vec_sube
+#define vec_subec __builtin_vec_subec
 #define vec_vsubsws __builtin_vec_vsubsws
 #define vec_vsubshs __builtin_vec_vsubshs
 #define vec_vsubsbs __builtin_vec_vsubsbs
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 9bdc2b4..f1c8ae0 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1465,6 +1465,8 @@  BU_ALTIVEC_OVERLOAD_X (STVLX,	   "stvlx")
 BU_ALTIVEC_OVERLOAD_X (STVLXL,	   "stvlxl")
 BU_ALTIVEC_OVERLOAD_X (STVRX,	   "stvrx")
 BU_ALTIVEC_OVERLOAD_X (STVRXL,	   "stvrxl")
+BU_ALTIVEC_OVERLOAD_X (SUBE,	   "sube")
+BU_ALTIVEC_OVERLOAD_X (SUBEC,	   "subec")
 BU_ALTIVEC_OVERLOAD_X (VCFSX,	   "vcfsx")
 BU_ALTIVEC_OVERLOAD_X (VCFUX,	   "vcfux")
 BU_ALTIVEC_OVERLOAD_X (VSPLTB,	   "vspltb")
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 0752ef9..46a99d5 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -2950,8 +2950,17 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+
+  { ALTIVEC_BUILTIN_VEC_SUBC, ALTIVEC_BUILTIN_VSUBCUW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SUBC, ALTIVEC_BUILTIN_VSUBCUW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBC, P8V_BUILTIN_VSUBCUQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBC, P8V_BUILTIN_VSUBCUQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+
   { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUBS,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUBS,
@@ -5855,13 +5864,14 @@  altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
       /* else, fall through and process the Power9 alternative below */
     }
 
-  if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+  if ((fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+      || (fcode == ALTIVEC_BUILTIN_VEC_SUBE))
     {
       /* vec_adde needs to be special cased because there is no instruction
 	  for the {un}signed int version.  */
       if (nargs != 3)
 	{
-	  error ("vec_adde only accepts 3 arguments");
+	  error ("vec_adde and vec_sube only accepts 3 arguments");
 	  return error_mark_node;
 	}
 
@@ -5884,14 +5894,24 @@  altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	{
 	  /* For {un}signed ints,
 	     vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb),
+						   vec_and (carryv, 0x1)).
+	     vec_sube (va, vb, carryv) == vec_sub (vec_sub (va, vb),
 						   vec_and (carryv, 0x1)).  */
 	  case SImode:
 	    {
+	      tree add_sub_builtin;
+
 	      vec<tree, va_gc> *params = make_tree_vector ();
 	      vec_safe_push (params, arg0);
 	      vec_safe_push (params, arg1);
-	      tree add_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD];
-	      tree call = altivec_resolve_overloaded_builtin (loc, add_builtin,
+
+	      if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+		add_sub_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD];
+	      else
+		add_sub_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUB];
+
+	      tree call = altivec_resolve_overloaded_builtin (loc,
+							      add_sub_builtin,
 							      params);
 	      tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
 	      tree ones_vector = build_vector_from_val (arg0_type, const1);
@@ -5900,16 +5920,28 @@  altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	      params = make_tree_vector ();
 	      vec_safe_push (params, call);
 	      vec_safe_push (params, and_expr);
-	      return altivec_resolve_overloaded_builtin (loc, add_builtin,
+	      return altivec_resolve_overloaded_builtin (loc, add_sub_builtin,
 							 params);
 	    }
 	  /* For {un}signed __int128s use the vaddeuqm instruction
 		directly.  */
 	  case TImode:
 	    {
-	      tree adde_bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDEUQM];
-	      return altivec_resolve_overloaded_builtin (loc, adde_bii,
-							 arglist);
+	       if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+		 {
+		   tree adde_bii
+		     = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDEUQM];
+		   return altivec_resolve_overloaded_builtin (loc, adde_bii,
+							      arglist);
+		 }
+
+	       else
+		 {
+		   tree sube_bii
+		     = rs6000_builtin_decls[P8V_BUILTIN_VEC_VSUBEUQM];
+		   return altivec_resolve_overloaded_builtin (loc, sube_bii,
+							      arglist);
+		 }
 	    }
 
 	  /* Types other than {un}signed int and {un}signed __int128
@@ -5919,13 +5951,14 @@  altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	}
     }
 
-  if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+  if ((fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+      || (fcode == ALTIVEC_BUILTIN_VEC_SUBEC))
     {
-      /* vec_addec needs to be special cased because there is no instruction
-	for the {un}signed int version.  */
+      /* vec_addec and vec_subec needs to be special cased because there is
+	 no instruction for the {un}signed int version.  */
       if (nargs != 3)
 	{
-	  error ("vec_addec only accepts 3 arguments");
+	  error ("vec_addec and vec_subec only accepts 3 arguments");
 	  return error_mark_node;
 	}
 
@@ -5956,19 +5989,37 @@  altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	    /* Use save_expr to ensure that operands used more than once
 		that may have side effects (like calls) are only evaluated
 		once.  */
+	    tree addc_subc_builtin;
+	    tree add_sub_builtin;
+
 	    arg0 = save_expr (arg0);
 	    arg1 = save_expr (arg1);
 	    vec<tree, va_gc> *params = make_tree_vector ();
 	    vec_safe_push (params, arg0);
 	    vec_safe_push (params, arg1);
-	    tree addc_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADDC];
-	    tree call1 = altivec_resolve_overloaded_builtin (loc, addc_builtin,
+
+	    if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+	      addc_subc_builtin
+		= rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADDC];
+	    else
+	      addc_subc_builtin
+		= rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUBC];
+
+	    tree call1 = altivec_resolve_overloaded_builtin (loc,
+							     addc_subc_builtin,
 							     params);
 	    params = make_tree_vector ();
 	    vec_safe_push (params, arg0);
 	    vec_safe_push (params, arg1);
-	    tree add_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD];
-	    tree call2 = altivec_resolve_overloaded_builtin (loc, add_builtin,
+
+
+	    if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+	      add_sub_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD];
+	    else
+	      add_sub_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUB];
+
+	    tree call2 = altivec_resolve_overloaded_builtin (loc,
+							     add_sub_builtin,
 							     params);
 	    tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
 	    tree ones_vector = build_vector_from_val (arg0_type, const1);
@@ -5977,7 +6028,7 @@  altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	    params = make_tree_vector ();
 	    vec_safe_push (params, call2);
 	    vec_safe_push (params, and_expr);
-	    call2 = altivec_resolve_overloaded_builtin (loc, addc_builtin,
+	    call2 = altivec_resolve_overloaded_builtin (loc, addc_subc_builtin,
 							params);
 	    params = make_tree_vector ();
 	    vec_safe_push (params, call1);
@@ -5986,12 +6037,25 @@  altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	    return altivec_resolve_overloaded_builtin (loc, or_builtin,
 						       params);
 	    }
-	  /* For {un}signed __int128s use the vaddecuq instruction.  */
+	  /* For {un}signed __int128s use the vaddecuq/vsubbecuq
+	     instructions.  */
 	  case TImode:
 	    {
-	    tree VADDECUQ_bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDECUQ];
-	    return altivec_resolve_overloaded_builtin (loc, VADDECUQ_bii,
-						       arglist);
+	       if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+		 {
+		   tree VADDECUQ_bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDECUQ];
+		   return altivec_resolve_overloaded_builtin (loc,
+							      VADDECUQ_bii,
+							      arglist);
+		 }
+
+	       else
+		 {
+		   tree VSUBECUQ_bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VSUBECUQ];
+		   return altivec_resolve_overloaded_builtin (loc,
+							      VSUBECUQ_bii,
+							      arglist);
+		 }
 	    }
 	  /* Types other than {un}signed int and {un}signed __int128
 		are errors.  */
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 181794d..60d2f9f 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -17581,6 +17581,10 @@  altivec_init_builtins (void)
 		ALTIVEC_BUILTIN_VEC_CMPNE);
   def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
 		ALTIVEC_BUILTIN_VEC_MUL);
+  def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
+		ALTIVEC_BUILTIN_VEC_SUBE);
+  def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
+		ALTIVEC_BUILTIN_VEC_SUBEC);
 
   /* Cell builtins.  */
   def_builtin ("__builtin_altivec_lvlx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index f87c1f3..efe1239 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -17103,7 +17103,34 @@  vector unsigned char vec_vsububm (vector unsigned char,
 vector unsigned char vec_vsububm (vector unsigned char,
                                   vector unsigned char);
 
+vector signed int vec_subc (vector signed int, vector signed int);
 vector unsigned int vec_subc (vector unsigned int, vector unsigned int);
+vector signed __int128 vec_subc (vector signed __int128,
+                                 vector signed __int128);
+vector unsigned __int128 vec_subc (vector unsigned __int128,
+                                   vector unsigned __int128);
+
+vector signed int vec_sube (vector signed int, vector signed int,
+                            vector signed int);
+vector unsigned int vec_sube (vector unsigned int, vector unsigned int,
+                              vector unsigned int);
+vector signed __int128 vec_sube (vector signed __int128,
+                                 vector signed __int128,
+                                 vector signed__int128);
+vector unsigned __int128 vec_sube (vector unsigned __int128,
+                                   vector unsigned __int128,
+                                   vector unsigned __int128);
+
+vector signed int vec_subec (vector signed int, vector signed int,
+                             vector signed int);
+vector unsigned int vec_subec (vector unsigned int, vector unsigned int,
+                               vector unsigned int);
+vector signed __int128 vec_subec (vector signed __int128,
+                                  vector signed __int128,
+                                  vector signed__int128);
+vector unsigned __int128 vec_subec (vector unsigned __int128,
+                                    vector unsigned __int128,
+                                    vector unsigned __int128);
 
 vector unsigned char vec_subs (vector bool char, vector unsigned char);
 vector unsigned char vec_subs (vector unsigned char, vector bool char);
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c
index 3334774..4b0370b 100644
--- a/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c
@@ -11,7 +11,8 @@  vector unsigned char      vuca, vucb, vucc;
 vector bool     char      vbca, vbcb;
 vector unsigned short     vusa, vusb;
 vector bool     short     vbsa, vbsb;
-vector unsigned int       vuia, vuib;
+vector signed int         vsia, vsib, vsic;
+vector unsigned int       vuia, vuib, vuic;
 vector bool     int       vbia, vbib;
 vector signed   long long vsla, vslb;
 vector unsigned long long vula, vulb, vulc;
@@ -24,6 +25,7 @@  void foo (vector unsigned char *vucr,
 	  vector bool char *vbcr,
 	  vector unsigned short *vusr,
 	  vector bool short *vbsr,
+	  vector signed int *vsir,
 	  vector unsigned int *vuir,
 	  vector bool int *vbir,
 	  vector unsigned long long *vulr,
@@ -32,10 +34,16 @@  void foo (vector unsigned char *vucr,
 	  vector unsigned __int128 *vuxr,
 	  vector double *vdr)
 {
+  *vsir++ = vec_addc (vsia, vsib);
+  *vuir++ = vec_addc (vuia, vuib);
   *vsxr++ = vec_addc (vsxa, vsxb);
   *vuxr++ = vec_addc (vuxa, vuxb);
+  *vsir++ = vec_adde (vsia, vsib, vsic);
+  *vuir++ = vec_adde (vuia, vuib, vuic);
   *vsxr++ = vec_adde (vsxa, vsxb, vsxc);
   *vuxr++ = vec_adde (vuxa, vuxb, vuxc);
+  *vsir++ = vec_addec (vsia, vsib, vsic);
+  *vuir++ = vec_addec (vuia, vuib, vuic);
   *vsxr++ = vec_addec (vsxa, vsxb, vsxc);
   *vuxr++ = vec_addec (vuxa, vuxb, vuxc);
   *vucr++ = vec_bperm (vuca, vucb);
@@ -60,11 +68,30 @@  void foo (vector unsigned char *vucr,
   *vuxr++ = vec_pmsum_be (vula, vulb);
   *vuir++ = vec_shasigma_be (vuia, 0, 1);
   *vulr++ = vec_shasigma_be (vula, 0, 1);
+  *vsir++ = vec_subc (vsia, vsib);
+  *vuir++ = vec_subc (vuia, vuib);
+  *vsxr++ = vec_subc (vsxa, vsxb);
+  *vuxr++ = vec_subc (vuxa, vuxb);
+  *vsir++ = vec_sube (vsia, vsib, vsic);
+  *vuir++ = vec_sube (vuia, vuib, vuic);
+  *vsxr++ = vec_sube (vsxa, vsxb, vsxc);
+  *vuxr++ = vec_sube (vuxa, vuxb, vuxc);
+  *vsir++ = vec_subec (vsia, vsib, vsic);
+  *vuir++ = vec_subec (vuia, vuib, vuic);
+  *vsxr++ = vec_subec (vsxa, vsxb, vsxc);
+  *vuxr++ = vec_subec (vuxa, vuxb, vuxc);
 }
 
 /* { dg-final { scan-assembler-times "vaddcuq" 2 } } */
 /* { dg-final { scan-assembler-times "vaddeuqm" 2 } } */
 /* { dg-final { scan-assembler-times "vaddecuq" 2 } } */
+/* { dg-final { scan-assembler-times "vaddcuw" 6 } } */
+/* { dg-final { scan-assembler-times "vadduwm" 4 } } */
+/* { dg-final { scan-assembler-times "vsubcuq" 2 } } */
+/* { dg-final { scan-assembler-times "vsubeuqm" 2 } } */
+/* { dg-final { scan-assembler-times "vsubecuq" 2 } } */
+/* { dg-final { scan-assembler-times "vsubcuw" 4 } } */
+/* { dg-final { scan-assembler-times "vsubuwm" 4 } } */
 /* { dg-final { scan-assembler-times "vbpermq" 2 } } */
 /* { dg-final { scan-assembler-times "xxleqv" 4 } } */
 /* { dg-final { scan-assembler-times "vgbbd" 1 } } */