diff mbox series

[3/6,ver,3] RS6000 add 128-bit Integer Operations part 1

Message ID a1ee5e16bacbe0373a64f52c90e40d9fa017dc2c.camel@us.ibm.com
State New
Headers show
Series [1/6,ver,3] rs6000, Fix arguments in altivec_vrlwmi and altivec_rlwdi builtins | expand

Commit Message

Carl Love Jan. 19, 2021, 10:33 p.m. UTC
Will, Segher:

This patch adds the 128-bit integer support for divide, modulo, shift,
compare of 128-bit integers instructions and builtin support.

version 3:

  int_128bit-runnable.c: Removed ppc_native_128bit from
 		         dg-require-effective-target.  Was missed from 
			 an earlier cleanup.
  Tested on Power 8BE, Power9, Power10.
		       
version 2:

  Fixed the references to 128-bit in ChangeLog that got missed in the
  last go round.

  Fixed missing spaces in emit_insn calls.

  Re-tested the patch on Power 9 with no regression errors.

                    Carl Love

----------------------------------------------------------------------

gcc/ChangeLog

2021-01-12  Carl Love  <cel@us.ibm.com>
	* config/rs6000/altivec.h (vec_signextq, vec_dive, vec_mod): Add define
	for new builtins.
	* config/rs6000/altivec.md (UNSPEC_VMULEUD, UNSPEC_VMULESD,
	UNSPEC_VMULOUD, UNSPEC_VMULOSD): New unspecs.
	(altivec_eqv1ti, altivec_gtv1ti, altivec_gtuv1ti, altivec_vmuleud,
	altivec_vmuloud, altivec_vmulesd, altivec_vmulosd, altivec_vrlq,
	altivec_vrlqmi, altivec_vrlqmi_inst, altivec_vrlqnm,
	altivec_vrlqnm_inst, altivec_vslq, altivec_vsrq, altivec_vsraq,
	altivec_vcmpequt_p, altivec_vcmpgtst_p, altivec_vcmpgtut_p): New
	define_insn.
	(vec_widen_umult_even_v2di, vec_widen_smult_even_v2di,
	vec_widen_umult_odd_v2di, vec_widen_smult_odd_v2di, altivec_vrlqmi,
	altivec_vrlqnm): New define_expands.
	* config/rs6000/rs6000-builtin.def (VCMPEQUT_P, VCMPGTST_P,
	VCMPGTUT_P): Add macro expansions.
	(BU_P10V_AV_P): Add builtin predicate definition.
	(VCMPGTUT, VCMPGTST, VCMPEQUT, CMPNET, CMPGE_1TI,
	CMPGE_U1TI, CMPLE_1TI, CMPLE_U1TI, VNOR_V1TI_UNS, VNOR_V1TI, VCMPNET_P,
	VCMPAET_P, VSIGNEXTSD2Q, VMULEUD, VMULESD, VMULOUD, VMULOSD, VRLQ,
	VSLQ, VSRQ, VSRAQ, VRLQNM, DIV_V1TI, UDIV_V1TI, DIVES_V1TI, DIVEU_V1TI,
	MODS_V1TI, MODU_V1TI, VRLQMI): New macro expansions.
	(VRLQ, VSLQ, VSRQ, VSRAQ, DIVE, MOD, SIGNEXT): New overload expansions.
	* config/rs6000/rs6000-call.c (P10_BUILTIN_VCMPEQUT,
	P10V_BUILTIN_CMPGE_1TI, P10V_BUILTIN_CMPGE_U1TI,
	P10V_BUILTIN_VCMPGTUT, P10V_BUILTIN_VCMPGTST,
	P10V_BUILTIN_CMPLE_1TI, P10V_BUILTIN_VCMPLE_U1TI,
	P10V_BUILTIN_DIV_V1TI, P10V_BUILTIN_UDIV_V1TI,
	P10V_BUILTIN_VMULESD, P10V_BUILTIN_VMULEUD,
	P10V_BUILTIN_VMULOSD, P10V_BUILTIN_VMULOUD,
	P10V_BUILTIN_VNOR_V1TI, P10V_BUILTIN_VNOR_V1TI_UNS,
	P10V_BUILTIN_VRLQ, P10V_BUILTIN_VRLQMI,
	P10V_BUILTIN_VRLQNM, P10V_BUILTIN_VSLQ,
	P10V_BUILTIN_VSRQ, P10V_BUILTIN_VSRAQ,
	P10V_BUILTIN_VCMPGTUT_P, P10V_BUILTIN_VCMPGTST_P,
	P10V_BUILTIN_VCMPEQUT_P, P10V_BUILTIN_VCMPGTUT_P,
	P10V_BUILTIN_VCMPGTST_P, P10V_BUILTIN_CMPNET,
	P10V_BUILTIN_VCMPNET_P, P10V_BUILTIN_VCMPAET_P,
	P10V_BUILTIN_VSIGNEXTSD2Q, P10V_BUILTIN_DIVES_V1TI,
	P10V_BUILTIN_MODS_V1TI, P10V_BUILTIN_MODU_V1TI):
	New overloaded definitions.
	(rs6000_gimple_fold_builtin) [P10V_BUILTIN_VCMPEQUT,
	P10_BUILTIN_CMPNET, P10_BUILTIN_CMPGE_1TI,
	P10_BUILTIN_CMPGE_U1TI, P10_BUILTIN_VCMPGTUT,
	P10_BUILTIN_VCMPGTST, P10_BUILTIN_CMPLE_1TI,
	P10_BUILTIN_CMPLE_U1TI]: New case statements.
	(rs6000_init_builtins) [bool_V1TI_type_node, int_ftype_int_v1ti_v1ti]:
	New assignments.
	(altivec_init_builtins): New E_V1TImode case statement.
	(builtin_function_type)[P10_BUILTIN_128BIT_VMULEUD,
	P10_BUILTIN_128BIT_VMULOUD, P10_BUILTIN_128BIT_DIVEU_V1TI,
	P10_BUILTIN_128BIT_MODU_V1TI, P10_BUILTIN_CMPGE_U1TI,
	P10_BUILTIN_VCMPGTUT, P10_BUILTIN_VCMPEQUT]: New case statements.
	* config/rs6000/r6000.c (rs6000_handle_altivec_attribute)[E_TImode,
	E_V1TImode]: New case statements.
	* config/rs6000/r6000.h (rs6000_builtin_type_index): New enum
	value RS6000_BTI_bool_V1TI.
	* config/rs6000/vector.md (vector_gtv1ti,vector_nltv1ti,
	vector_gtuv1ti, vector_nltuv1ti, vector_ngtv1ti, vector_ngtuv1ti,
	vector_eq_v1ti_p, vector_ne_v1ti_p, vector_ae_v1ti_p,
	vector_gt_v1ti_p, vector_gtu_v1ti_p, vrotlv1ti3, vashlv1ti3,
	vlshrv1ti3, vashrv1ti3): New define_expands.
	* config/rs6000/vsx.md (UNSPEC_VSX_DIVSQ, UNSPEC_VSX_DIVUQ,
	UNSPEC_VSX_DIVESQ, UNSPEC_VSX_DIVEUQ, UNSPEC_VSX_MODSQ,
	UNSPEC_VSX_MODUQ): New unspecs.
	(mulv2di3, vsx_div_v1ti, vsx_udiv_v1ti, vsx_dives_v1ti,
	vsx_diveu_v1ti,	vsx_mods_v1ti, vsx_modu_v1ti, xxswapd_v1ti,
	vsx_sign_extend_v2di_v1ti): New define_insns.
	(vcmpnet): New define_expand.
	* gcc/doc/extend.texi: Add documentation for the new builtins vec_rl,
	vec_rlmi, vec_rlnm, vec_sl, vec_sr, vec_sra, vec_mule, vec_mulo,
	vec_div, vec_dive, vec_mod, vec_cmpeq, vec_cmpne, vec_cmpgt, vec_cmplt,
	vec_cmpge, vec_cmple, vec_all_eq, vec_all_ne, vec_all_gt, vec_all_lt,
	vec_all_ge, vec_all_le, vec_any_eq, vec_any_ne, vec_any_gt, vec_any_lt,
	vec_any_ge, vec_any_le.

gcc/testsuite/ChangeLog

2021-01-12 Carl Love  <cel@us.ibm.com>
	* gcc.target/powerpc/int_128bit-runnable.c: New test file.
---
 gcc/config/rs6000/altivec.h                   |    4 +
 gcc/config/rs6000/altivec.md                  |  241 ++
 gcc/config/rs6000/rs6000-builtin.def          |   53 +-
 gcc/config/rs6000/rs6000-call.c               |  142 +-
 gcc/config/rs6000/rs6000.c                    |    1 +
 gcc/config/rs6000/rs6000.h                    |    3 +-
 gcc/config/rs6000/vector.md                   |  191 ++
 gcc/config/rs6000/vsx.md                      |  107 +
 gcc/doc/extend.texi                           |  174 ++
 .../gcc.target/powerpc/int_128bit-runnable.c  | 2301 +++++++++++++++++
 10 files changed, 3214 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
diff mbox series

Patch

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 460310a5132..3dedccca189 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -717,6 +717,10 @@  __altivec_scalar_pred(vec_any_nle,
 #define vec_step(x) __builtin_vec_step (* (__typeof__ (x) *) 0)
 
 #ifdef _ARCH_PWR10
+#define vec_signextq  __builtin_vec_vsignextq
+#define vec_dive __builtin_vec_dive
+#define vec_mod  __builtin_vec_mod
+
 /* May modify these macro definitions if future capabilities overload
    with support for different vector argument and result types.  */
 #define vec_cntlzm(a, b)	__builtin_altivec_vclzdm (a, b)
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 4d08cca2228..cb83c5ce012 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -39,12 +39,16 @@ 
    UNSPEC_VMULESH
    UNSPEC_VMULEUW
    UNSPEC_VMULESW
+   UNSPEC_VMULEUD
+   UNSPEC_VMULESD
    UNSPEC_VMULOUB
    UNSPEC_VMULOSB
    UNSPEC_VMULOUH
    UNSPEC_VMULOSH
    UNSPEC_VMULOUW
    UNSPEC_VMULOSW
+   UNSPEC_VMULOUD
+   UNSPEC_VMULOSD
    UNSPEC_VPKPX
    UNSPEC_VPACK_SIGN_SIGN_SAT
    UNSPEC_VPACK_SIGN_UNS_SAT
@@ -629,6 +633,14 @@ 
   "vcmpequ<VI_char> %0,%1,%2"
   [(set_attr "type" "veccmpfx")])
 
+(define_insn "altivec_eqv1ti"
+  [(set (match_operand:V1TI 0 "altivec_register_operand" "=v")
+	(eq:V1TI (match_operand:V1TI 1 "altivec_register_operand" "v")
+		 (match_operand:V1TI 2 "altivec_register_operand" "v")))]
+  "TARGET_POWER10"
+  "vcmpequq %0,%1,%2"
+  [(set_attr "type" "veccmpfx")])
+
 (define_insn "*altivec_gt<mode>"
   [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
 	(gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
@@ -637,6 +649,14 @@ 
   "vcmpgts<VI_char> %0,%1,%2"
   [(set_attr "type" "veccmpfx")])
 
+(define_insn "*altivec_gtv1ti"
+  [(set (match_operand:V1TI 0 "altivec_register_operand" "=v")
+	(gt:V1TI (match_operand:V1TI 1 "altivec_register_operand" "v")
+		 (match_operand:V1TI 2 "altivec_register_operand" "v")))]
+  "TARGET_POWER10"
+  "vcmpgtsq %0,%1,%2"
+  [(set_attr "type" "veccmpfx")])
+
 (define_insn "*altivec_gtu<mode>"
   [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
 	(gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
@@ -645,6 +665,14 @@ 
   "vcmpgtu<VI_char> %0,%1,%2"
   [(set_attr "type" "veccmpfx")])
 
+(define_insn "*altivec_gtuv1ti"
+  [(set (match_operand:V1TI 0 "altivec_register_operand" "=v")
+	(gtu:V1TI (match_operand:V1TI 1 "altivec_register_operand" "v")
+		  (match_operand:V1TI 2 "altivec_register_operand" "v")))]
+  "TARGET_POWER10"
+  "vcmpgtuq %0,%1,%2"
+  [(set_attr "type" "veccmpfx")])
+
 (define_insn "*altivec_eqv4sf"
   [(set (match_operand:V4SF 0 "altivec_register_operand" "=v")
 	(eq:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v")
@@ -1688,6 +1716,19 @@ 
  DONE;
 })
 
+(define_expand "vec_widen_umult_even_v2di"
+  [(use (match_operand:V1TI 0 "register_operand"))
+   (use (match_operand:V2DI 1 "register_operand"))
+   (use (match_operand:V2DI 2 "register_operand"))]
+  "TARGET_POWER10"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_altivec_vmuleud (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmuloud (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
 (define_expand "vec_widen_smult_even_v4si"
   [(use (match_operand:V2DI 0 "register_operand"))
    (use (match_operand:V4SI 1 "register_operand"))
@@ -1701,6 +1742,19 @@ 
   DONE;
 })
 
+(define_expand "vec_widen_smult_even_v2di"
+  [(use (match_operand:V1TI 0 "register_operand"))
+   (use (match_operand:V2DI 1 "register_operand"))
+   (use (match_operand:V2DI 2 "register_operand"))]
+  "TARGET_POWER10"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_altivec_vmulesd (operands[0], operands[1], operands[2]));
+ else
+    emit_insn (gen_altivec_vmulosd (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
 (define_expand "vec_widen_umult_odd_v16qi"
   [(use (match_operand:V8HI 0 "register_operand"))
    (use (match_operand:V16QI 1 "register_operand"))
@@ -1766,6 +1820,19 @@ 
   DONE;
 })
 
+(define_expand "vec_widen_umult_odd_v2di"
+  [(use (match_operand:V1TI 0 "register_operand"))
+   (use (match_operand:V2DI 1 "register_operand"))
+   (use (match_operand:V2DI 2 "register_operand"))]
+  "TARGET_POWER10"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_altivec_vmuloud (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmuleud (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
 (define_expand "vec_widen_smult_odd_v4si"
   [(use (match_operand:V2DI 0 "register_operand"))
    (use (match_operand:V4SI 1 "register_operand"))
@@ -1779,6 +1846,19 @@ 
   DONE;
 })
 
+(define_expand "vec_widen_smult_odd_v2di"
+  [(use (match_operand:V1TI 0 "register_operand"))
+   (use (match_operand:V2DI 1 "register_operand"))
+   (use (match_operand:V2DI 2 "register_operand"))]
+  "TARGET_POWER10"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_altivec_vmulosd (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmulesd (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
 (define_insn "altivec_vmuleub"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
@@ -1860,6 +1940,15 @@ 
   "vmuleuw %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
+(define_insn "altivec_vmuleud"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+       (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v")
+                     (match_operand:V2DI 2 "register_operand" "v")]
+                    UNSPEC_VMULEUD))]
+  "TARGET_POWER10"
+  "vmuleud %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
 (define_insn "altivec_vmulouw"
   [(set (match_operand:V2DI 0 "register_operand" "=v")
        (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
@@ -1869,6 +1958,15 @@ 
   "vmulouw %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
+(define_insn "altivec_vmuloud"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+       (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v")
+                     (match_operand:V2DI 2 "register_operand" "v")]
+                    UNSPEC_VMULOUD))]
+  "TARGET_POWER10"
+  "vmuloud %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
 (define_insn "altivec_vmulesw"
   [(set (match_operand:V2DI 0 "register_operand" "=v")
        (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
@@ -1878,6 +1976,15 @@ 
   "vmulesw %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
+(define_insn "altivec_vmulesd"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+       (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v")
+                     (match_operand:V2DI 2 "register_operand" "v")]
+                    UNSPEC_VMULESD))]
+  "TARGET_POWER10"
+  "vmulesd %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
 (define_insn "altivec_vmulosw"
   [(set (match_operand:V2DI 0 "register_operand" "=v")
        (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
@@ -1887,6 +1994,15 @@ 
   "vmulosw %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
+(define_insn "altivec_vmulosd"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+       (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v")
+                     (match_operand:V2DI 2 "register_operand" "v")]
+                    UNSPEC_VMULOSD))]
+  "TARGET_POWER10"
+  "vmulosd %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
 ;; Vector pack/unpack
 (define_insn "altivec_vpkpx"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
@@ -1980,6 +2096,15 @@ 
   "vrl<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
+(define_insn "altivec_vrlq"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+        (rotate:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
+                     (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+  "TARGET_POWER10"
+;; rotate amount in needs to be in bits[57:63] of operand2.
+  "vrlq %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vrl<VI_char>mi"
   [(set (match_operand:VIlong 0 "register_operand" "=v")
         (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "v")
@@ -1990,6 +2115,34 @@ 
   "vrl<VI_char>mi %0,%1,%3"
   [(set_attr "type" "veclogical")])
 
+(define_expand "altivec_vrlqmi"
+  [(set (match_operand:V1TI 0 "vsx_register_operand")
+	(unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand")
+		      (match_operand:V1TI 2 "vsx_register_operand")
+		      (match_operand:V1TI 3 "vsx_register_operand")]
+		     UNSPEC_VRLMI))]
+  "TARGET_POWER10"
+{
+  /* Mask bit begin, end fields need to be in bits [41:55] of 128-bit operand2.
+     Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */
+  rtx tmp = gen_reg_rtx (V1TImode);
+
+  emit_insn (gen_xxswapd_v1ti (tmp, operands[3]));
+  emit_insn (gen_altivec_vrlqmi_inst (operands[0], operands[1], operands[2],
+				      tmp));
+  DONE;
+})
+
+(define_insn "altivec_vrlqmi_inst"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
+		      (match_operand:V1TI 2 "vsx_register_operand" "0")
+		      (match_operand:V1TI 3 "vsx_register_operand" "v")]
+		     UNSPEC_VRLMI))]
+  "TARGET_POWER10"
+  "vrlqmi %0,%1,%3"
+  [(set_attr "type" "veclogical")])
+
 (define_insn "altivec_vrl<VI_char>nm"
   [(set (match_operand:VIlong 0 "register_operand" "=v")
         (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "v")
@@ -1999,6 +2152,31 @@ 
   "vrl<VI_char>nm %0,%1,%2"
   [(set_attr "type" "veclogical")])
 
+(define_expand "altivec_vrlqnm"
+  [(set (match_operand:V1TI 0 "vsx_register_operand")
+	(unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand")
+		      (match_operand:V1TI 2 "vsx_register_operand")]
+		     UNSPEC_VRLNM))]
+  "TARGET_POWER10"
+{
+  /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */
+  rtx tmp = gen_reg_rtx (V1TImode);
+
+  emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
+  emit_insn (gen_altivec_vrlqnm_inst (operands[0], operands[1], tmp));
+  DONE;
+})
+
+(define_insn "altivec_vrlqnm_inst"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
+		      (match_operand:V1TI 2 "vsx_register_operand" "v")]
+		     UNSPEC_VRLNM))]
+  "TARGET_POWER10"
+  ;; rotate and mask bits need to be in upper 64-bits of operand2.
+  "vrlqnm %0,%1,%2"
+  [(set_attr "type" "veclogical")])
+
 (define_insn "altivec_vsl"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
@@ -2043,6 +2221,15 @@ 
   "vsl<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
+(define_insn "altivec_vslq"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+	(ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
+		     (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+  "TARGET_POWER10"
+  /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */
+  "vslq %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "*altivec_vsr<VI_char>"
   [(set (match_operand:VI2 0 "register_operand" "=v")
         (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
@@ -2051,6 +2238,15 @@ 
   "vsr<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
+(define_insn "altivec_vsrq"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+	(lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
+		       (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+  "TARGET_POWER10"
+  /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */
+  "vsrq %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "*altivec_vsra<VI_char>"
   [(set (match_operand:VI2 0 "register_operand" "=v")
         (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
@@ -2059,6 +2255,15 @@ 
   "vsra<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
+(define_insn "altivec_vsraq"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+	(ashiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
+		       (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+  "TARGET_POWER10"
+  /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */
+  "vsraq %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vsr"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
@@ -2619,6 +2824,18 @@ 
   "vcmpequ<VI_char>. %0,%1,%2"
   [(set_attr "type" "veccmpfx")])
 
+(define_insn "altivec_vcmpequt_p"
+  [(set (reg:CC CR6_REGNO)
+	(unspec:CC [(eq:CC (match_operand:V1TI 1 "altivec_register_operand" "v")
+			   (match_operand:V1TI 2 "altivec_register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:V1TI 0 "altivec_register_operand" "=v")
+	(eq:V1TI (match_dup 1)
+		 (match_dup 2)))]
+  "TARGET_POWER10"
+  "vcmpequq. %0,%1,%2"
+  [(set_attr "type" "veccmpfx")])
+
 (define_insn "*altivec_vcmpgts<VI_char>_p"
   [(set (reg:CC CR6_REGNO)
 	(unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v")
@@ -2631,6 +2848,18 @@ 
   "vcmpgts<VI_char>. %0,%1,%2"
   [(set_attr "type" "veccmpfx")])
 
+(define_insn "*altivec_vcmpgtst_p"
+  [(set (reg:CC CR6_REGNO)
+	(unspec:CC [(gt:CC (match_operand:V1TI 1 "register_operand" "v")
+			   (match_operand:V1TI 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:V1TI 0 "register_operand" "=v")
+	(gt:V1TI (match_dup 1)
+		 (match_dup 2)))]
+  "TARGET_POWER10"
+  "vcmpgtsq. %0,%1,%2"
+  [(set_attr "type" "veccmpfx")])
+
 (define_insn "*altivec_vcmpgtu<VI_char>_p"
   [(set (reg:CC CR6_REGNO)
 	(unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v")
@@ -2643,6 +2872,18 @@ 
   "vcmpgtu<VI_char>. %0,%1,%2"
   [(set_attr "type" "veccmpfx")])
 
+(define_insn "*altivec_vcmpgtut_p"
+  [(set (reg:CC CR6_REGNO)
+	(unspec:CC [(gtu:CC (match_operand:V1TI 1 "register_operand" "v")
+			    (match_operand:V1TI 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:V1TI 0 "register_operand" "=v")
+	(gtu:V1TI (match_dup 1)
+		  (match_dup 2)))]
+  "TARGET_POWER10"
+  "vcmpgtuq. %0,%1,%2"
+  [(set_attr "type" "veccmpfx")])
+
 (define_insn "*altivec_vcmpeqfp_p"
   [(set (reg:CC CR6_REGNO)
 	(unspec:CC [(eq:CC (match_operand:V4SF 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 842f07196de..623907216af 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1201,6 +1201,15 @@ 
 		     | RS6000_BTC_TERNARY),				\
 		    CODE_FOR_ ## ICODE)			/* ICODE */
 
+/* See the comment on BU_ALTIVEC_P.  */
+#define BU_P10V_AV_P(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_P (P10V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P10,	 		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_PREDICATE),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
 #define BU_P10V_AV_X(ENUM, NAME, ATTR)					\
   RS6000_BUILTIN_X (P10_BUILTIN_ ## ENUM,		/* ENUM */	\
 		    "__builtin_altivec_" NAME,		/* NAME */	\
@@ -2821,6 +2830,10 @@  BU_P9V_AV_1 (VSIGNEXTSH2D,	"vsignextsh2d",		CONST,  vsx_sign_extend_hi_v2di)
 BU_P9V_AV_1 (VSIGNEXTSW2D,	"vsignextsw2d",		CONST,  vsx_sign_extend_si_v2di)
 
 /* Builtins for scalar instructions added in ISA 3.1 (power10).  */
+BU_P10V_AV_P (VCMPEQUT_P,	"vcmpequt_p",	CONST,	vector_eq_v1ti_p)
+BU_P10V_AV_P (VCMPGTST_P,	"vcmpgtst_p",	CONST,	vector_gt_v1ti_p)
+BU_P10V_AV_P (VCMPGTUT_P,	"vcmpgtut_p",	CONST,	vector_gtu_v1ti_p)
+
 BU_P10_POWERPC64_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
 BU_P10_POWERPC64_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
 BU_P10_POWERPC64_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
@@ -2841,7 +2854,38 @@  BU_P10V_VSX_2 (XXGENPCVM_V16QI, "xxgenpcvm_v16qi", CONST, xxgenpcvm_v16qi)
 BU_P10V_VSX_2 (XXGENPCVM_V8HI, "xxgenpcvm_v8hi", CONST, xxgenpcvm_v8hi)
 BU_P10V_VSX_2 (XXGENPCVM_V4SI, "xxgenpcvm_v4si", CONST, xxgenpcvm_v4si)
 BU_P10V_VSX_2 (XXGENPCVM_V2DI, "xxgenpcvm_v2di", CONST, xxgenpcvm_v2di)
-
+BU_P10V_AV_2 (VCMPGTUT,		"vcmpgtut",	CONST,	vector_gtuv1ti)
+BU_P10V_AV_2 (VCMPGTST,		"vcmpgtst",	CONST,	vector_gtv1ti)
+BU_P10V_AV_2 (VCMPEQUT,		"vcmpequt",	CONST,	eqvv1ti3)
+BU_P10V_AV_2 (CMPNET,		"vcmpnet",	CONST,	vcmpnet)
+BU_P10V_AV_2 (CMPGE_1TI,	"cmpge_1ti",    CONST,  vector_nltv1ti)
+BU_P10V_AV_2 (CMPGE_U1TI,	"cmpge_u1ti",   CONST,  vector_nltuv1ti)
+BU_P10V_AV_2 (CMPLE_1TI,	"cmple_1ti",    CONST,  vector_ngtv1ti)
+BU_P10V_AV_2 (CMPLE_U1TI,	"cmple_u1ti",   CONST,  vector_ngtuv1ti)
+BU_P10V_AV_2 (VNOR_V1TI_UNS,	"vnor_v1ti_uns",CONST,	norv1ti3)
+BU_P10V_AV_2 (VNOR_V1TI,	"vnor_v1ti",	CONST,	norv1ti3)
+BU_P10V_AV_2 (VCMPNET_P,	"vcmpnet_p",	CONST,	vector_ne_v1ti_p)
+BU_P10V_AV_2 (VCMPAET_P,	"vcmpaet_p",	CONST,	vector_ae_v1ti_p)
+
+BU_P10V_AV_1 (VSIGNEXTSD2Q,	"vsignext",     CONST,  vsx_sign_extend_v2di_v1ti)
+
+BU_P10V_AV_2 (VMULEUD,	"vmuleud",	CONST,	vec_widen_umult_even_v2di)
+BU_P10V_AV_2 (VMULESD,	"vmulesd",	CONST,	vec_widen_smult_even_v2di)
+BU_P10V_AV_2 (VMULOUD,	"vmuloud",	CONST,	vec_widen_umult_odd_v2di)
+BU_P10V_AV_2 (VMULOSD,	"vmulosd",	CONST,	vec_widen_smult_odd_v2di)
+BU_P10V_AV_2 (VRLQ,		"vrlq",		CONST,	vrotlv1ti3)
+BU_P10V_AV_2 (VSLQ,		"vslq",		CONST,	vashlv1ti3)
+BU_P10V_AV_2 (VSRQ,		"vsrq",		CONST,	vlshrv1ti3)
+BU_P10V_AV_2 (VSRAQ,		"vsraq",	CONST,	vashrv1ti3)
+BU_P10V_AV_2 (VRLQNM,	"vrlqnm",	CONST,	altivec_vrlqnm)
+BU_P10V_AV_2 (DIV_V1TI,	"div_1ti",      CONST,  vsx_div_v1ti)
+BU_P10V_AV_2 (UDIV_V1TI,	"udiv_1ti",     CONST,  vsx_udiv_v1ti)
+BU_P10V_AV_2 (DIVES_V1TI,	"dives",	CONST,	vsx_dives_v1ti)
+BU_P10V_AV_2 (DIVEU_V1TI,	"diveu",	CONST,	vsx_diveu_v1ti)
+BU_P10V_AV_2 (MODS_V1TI,	"mods",		CONST,	vsx_mods_v1ti)
+BU_P10V_AV_2 (MODU_V1TI,	"modu",		CONST,	vsx_modu_v1ti)
+
+BU_P10V_AV_3 (VRLQMI,	"vrlqmi",	CONST,	altivec_vrlqmi)
 BU_P10V_AV_3 (VEXTRACTBL, "vextdubvlx", CONST, vextractlv16qi)
 BU_P10V_AV_3 (VEXTRACTHL, "vextduhvlx", CONST, vextractlv8hi)
 BU_P10V_AV_3 (VEXTRACTWL, "vextduwvlx", CONST, vextractlv4si)
@@ -2948,6 +2992,12 @@  BU_P10_OVERLOAD_2 (CLRR, "clrr")
 BU_P10_OVERLOAD_2 (GNB, "gnb")
 BU_P10_OVERLOAD_4 (XXEVAL, "xxeval")
 BU_P10_OVERLOAD_2 (XXGENPCVM, "xxgenpcvm")
+BU_P10_OVERLOAD_2 (VRLQ, "vrlq")
+BU_P10_OVERLOAD_2 (VSLQ, "vslq")
+BU_P10_OVERLOAD_2 (VSRQ, "vsrq")
+BU_P10_OVERLOAD_2 (VSRAQ, "vsraq")
+BU_P10_OVERLOAD_2 (DIVE, "dive")
+BU_P10_OVERLOAD_2 (MOD, "mod")
 
 BU_P10_OVERLOAD_3 (EXTRACTL, "extractl")
 BU_P10_OVERLOAD_3 (EXTRACTH, "extracth")
@@ -2967,6 +3017,7 @@  BU_P10_OVERLOAD_1 (VSTRIL_P, "stril_p")
 BU_P10_OVERLOAD_1 (XVTLSBB_ZEROS, "xvtlsbb_all_zeros")
 BU_P10_OVERLOAD_1 (XVTLSBB_ONES, "xvtlsbb_all_ones")
 
+BU_P10_OVERLOAD_1 (SIGNEXT, "vsignextq")
 
 BU_P10_OVERLOAD_1 (MTVSRBM, "mtvsrbm")
 BU_P10_OVERLOAD_1 (MTVSRHM, "mtvsrhm")
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 3af325317a1..e9ba4751cd4 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -837,6 +837,10 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
     RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, P10V_BUILTIN_VCMPEQUT,
+    RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, P10V_BUILTIN_VCMPEQUT,
+    RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP,
@@ -883,6 +887,12 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U2DI,
     RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI,
     RS6000_BTI_unsigned_V2DI, 0},
+
+  { ALTIVEC_BUILTIN_VEC_CMPGE, P10V_BUILTIN_CMPGE_1TI,
+    RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0},
+  { ALTIVEC_BUILTIN_VEC_CMPGE, P10V_BUILTIN_CMPGE_U1TI,
+    RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0},
   { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUB,
     RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSB,
@@ -897,8 +907,12 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD,
     RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, P10V_BUILTIN_VCMPGTUT,
+    RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD,
     RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, P10V_BUILTIN_VCMPGTST,
+    RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP,
@@ -941,6 +955,11 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U2DI,
     RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI,
     RS6000_BTI_unsigned_V2DI, 0},
+  { ALTIVEC_BUILTIN_VEC_CMPLE, P10V_BUILTIN_CMPLE_1TI,
+    RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0},
+  { ALTIVEC_BUILTIN_VEC_CMPLE, P10V_BUILTIN_CMPLE_U1TI,
+    RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0},
   { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUB,
     RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSB,
@@ -1069,6 +1088,12 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_UDIV_V2DI,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIV_V1TI,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_UDIV_V1TI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+
   { VSX_BUILTIN_VEC_DOUBLE, VSX_BUILTIN_XVCVSXDDP,
     RS6000_BTI_V2DF, RS6000_BTI_V2DI, 0, 0 },
   { VSX_BUILTIN_VEC_DOUBLE, VSX_BUILTIN_XVCVUXDDP,
@@ -1922,6 +1947,11 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { ALTIVEC_BUILTIN_VEC_MULE, P8V_BUILTIN_VMULEUW,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI,
     RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULE, P10V_BUILTIN_VMULESD,
+    RS6000_BTI_V1TI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULE, P10V_BUILTIN_VMULEUD,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VMULEUB, ALTIVEC_BUILTIN_VMULEUB,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_VMULESB, ALTIVEC_BUILTIN_VMULESB,
@@ -1945,6 +1975,11 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { ALTIVEC_BUILTIN_VEC_MULO, P8V_BUILTIN_VMULOUW,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI,
     RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULO, P10V_BUILTIN_VMULOSD,
+    RS6000_BTI_V1TI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULO, P10V_BUILTIN_VMULOUD,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH,
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
   { ALTIVEC_BUILTIN_VEC_VMULOSH, ALTIVEC_BUILTIN_VMULOSH,
@@ -1987,6 +2022,16 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR_V2DI,
     RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_bool_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI,
+    RS6000_BTI_V1TI, RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI_UNS,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI_UNS,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_bool_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI_UNS,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
   { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR_V2DI_UNS,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR_V2DI_UNS,
@@ -2248,6 +2293,11 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, P10V_BUILTIN_VRLQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, P10V_BUILTIN_VRLQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
   { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
@@ -2266,12 +2316,23 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLDMI,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { P9V_BUILTIN_VEC_RLMI, P10V_BUILTIN_VRLQMI,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI,
+    RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI },
+  { P9V_BUILTIN_VEC_RLMI, P10V_BUILTIN_VRLQMI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
   { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLWNM,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
     RS6000_BTI_unsigned_V4SI, 0 },
   { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLDNM,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
     RS6000_BTI_unsigned_V2DI, 0 },
+  { P9V_BUILTIN_VEC_RLNM, P10V_BUILTIN_VRLQNM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+  { P9V_BUILTIN_VEC_RLNM, P10V_BUILTIN_VRLQNM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
   { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
     RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
@@ -2288,6 +2349,11 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, P10V_BUILTIN_VSLQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, P10V_BUILTIN_VSLQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
   { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP,
@@ -2484,6 +2550,11 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, P10V_BUILTIN_VSRQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, P10V_BUILTIN_VSRQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
@@ -2512,6 +2583,11 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, P10V_BUILTIN_VSRAQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, P10V_BUILTIN_VSRAQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
@@ -4129,12 +4205,16 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P10V_BUILTIN_VCMPGTUT_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
   { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P10V_BUILTIN_VCMPGTST_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
   { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTFP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_VCMPGT_P, VSX_BUILTIN_XVCMPGTDP_P,
@@ -4199,6 +4279,10 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P10V_BUILTIN_VCMPEQUT_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P10V_BUILTIN_VCMPEQUT_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
   { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P,
@@ -4250,12 +4334,16 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P10V_BUILTIN_VCMPGTUT_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P10V_BUILTIN_VCMPGTST_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
@@ -4904,6 +4992,12 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { ALTIVEC_BUILTIN_VEC_CMPNE, P9V_BUILTIN_CMPNEW,
     RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI,
     RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPNE, P10V_BUILTIN_CMPNET,
+    RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI,
+    RS6000_BTI_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPNE, P10V_BUILTIN_CMPNET,
+    RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
 
   /* The following 2 entries have been deprecated.  */
   { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEB_P,
@@ -5004,6 +5098,10 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNED_P,
     RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI,
     RS6000_BTI_bool_V2DI, 0 },
+  { P9V_BUILTIN_VEC_VCMPNE_P, P10V_BUILTIN_VCMPNET_P,
+    RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P9V_BUILTIN_VEC_VCMPNE_P, P10V_BUILTIN_VCMPNET_P,
+    RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
 
   { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEFP_P,
     RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
@@ -5109,7 +5207,10 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAED_P,
     RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI,
     RS6000_BTI_bool_V2DI, 0 },
-
+  { P9V_BUILTIN_VEC_VCMPAE_P, P10V_BUILTIN_VCMPAET_P,
+    RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P9V_BUILTIN_VEC_VCMPAE_P, P10V_BUILTIN_VCMPAET_P,
+    RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
   { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEFP_P,
     RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEDP_P,
@@ -6036,6 +6137,21 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
  { P10_BUILTIN_VEC_XVTLSBB_ONES, P10V_BUILTIN_XVTLSBB_ONES,
     RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 },
 
+ { P10_BUILTIN_VEC_SIGNEXT, P10V_BUILTIN_VSIGNEXTSD2Q,
+    RS6000_BTI_V1TI, RS6000_BTI_V2DI, 0, 0 },
+
+ { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVES_V1TI,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVEU_V1TI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+
+  { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODS_V1TI,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODU_V1TI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+
   { RS6000_BUILTIN_NONE, RS6000_BUILTIN_NONE, 0, 0, 0, 0 }
 };
 
@@ -12530,12 +12646,14 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case ALTIVEC_BUILTIN_VCMPEQUH:
     case ALTIVEC_BUILTIN_VCMPEQUW:
     case P8V_BUILTIN_VCMPEQUD:
+    case P10V_BUILTIN_VCMPEQUT:
       fold_compare_helper (gsi, EQ_EXPR, stmt);
       return true;
 
     case P9V_BUILTIN_CMPNEB:
     case P9V_BUILTIN_CMPNEH:
     case P9V_BUILTIN_CMPNEW:
+    case P10V_BUILTIN_CMPNET:
       fold_compare_helper (gsi, NE_EXPR, stmt);
       return true;
 
@@ -12547,6 +12665,8 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case VSX_BUILTIN_CMPGE_U4SI:
     case VSX_BUILTIN_CMPGE_2DI:
     case VSX_BUILTIN_CMPGE_U2DI:
+    case P10V_BUILTIN_CMPGE_1TI:
+    case P10V_BUILTIN_CMPGE_U1TI:
       fold_compare_helper (gsi, GE_EXPR, stmt);
       return true;
 
@@ -12558,6 +12678,8 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case ALTIVEC_BUILTIN_VCMPGTUW:
     case P8V_BUILTIN_VCMPGTUD:
     case P8V_BUILTIN_VCMPGTSD:
+    case P10V_BUILTIN_VCMPGTUT:
+    case P10V_BUILTIN_VCMPGTST:
       fold_compare_helper (gsi, GT_EXPR, stmt);
       return true;
 
@@ -12569,6 +12691,8 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case VSX_BUILTIN_CMPLE_U4SI:
     case VSX_BUILTIN_CMPLE_2DI:
     case VSX_BUILTIN_CMPLE_U2DI:
+    case P10V_BUILTIN_CMPLE_1TI:
+    case P10V_BUILTIN_CMPLE_U1TI:
       fold_compare_helper (gsi, LE_EXPR, stmt);
       return true;
 
@@ -13296,6 +13420,8 @@  rs6000_init_builtins (void)
 					    ? "__vector __bool long"
 					    : "__vector __bool long long",
 					    bool_long_long_type_node, 2);
+  bool_V1TI_type_node = rs6000_vector_type ("__vector __bool __int128",
+					    intTI_type_node, 1);
   pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
 					     pixel_type_node, 8);
 
@@ -13481,6 +13607,10 @@  altivec_init_builtins (void)
     = build_function_type_list (integer_type_node,
 				integer_type_node, V2DI_type_node,
 				V2DI_type_node, NULL_TREE);
+  tree int_ftype_int_v1ti_v1ti
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V1TI_type_node,
+				V1TI_type_node, NULL_TREE);
   tree void_ftype_v4si
     = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
   tree v8hi_ftype_void
@@ -13848,6 +13978,9 @@  altivec_init_builtins (void)
 	case E_VOIDmode:
 	  type = int_ftype_int_opaque_opaque;
 	  break;
+	case E_V1TImode:
+	  type = int_ftype_int_v1ti_v1ti;
+	  break;
 	case E_V2DImode:
 	  type = int_ftype_int_v2di_v2di;
 	  break;
@@ -14451,6 +14584,10 @@  builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
     case P10V_BUILTIN_XXGENPCVM_V8HI:
     case P10V_BUILTIN_XXGENPCVM_V4SI:
     case P10V_BUILTIN_XXGENPCVM_V2DI:
+    case P10V_BUILTIN_VMULEUD:
+    case P10V_BUILTIN_VMULOUD:
+    case P10V_BUILTIN_DIVEU_V1TI:
+    case P10V_BUILTIN_MODU_V1TI:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
@@ -14550,10 +14687,13 @@  builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
     case VSX_BUILTIN_CMPGE_U8HI:
     case VSX_BUILTIN_CMPGE_U4SI:
     case VSX_BUILTIN_CMPGE_U2DI:
+    case P10V_BUILTIN_CMPGE_U1TI:
     case ALTIVEC_BUILTIN_VCMPGTUB:
     case ALTIVEC_BUILTIN_VCMPGTUH:
     case ALTIVEC_BUILTIN_VCMPGTUW:
     case P8V_BUILTIN_VCMPGTUD:
+    case P10V_BUILTIN_VCMPGTUT:
+    case P10V_BUILTIN_VCMPEQUT:
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
       break;
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 67681d18150..0c51491cbe7 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -19855,6 +19855,7 @@  rs6000_handle_altivec_attribute (tree *node,
     case 'b':
       switch (mode)
 	{
+	case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
 	case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
 	case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
 	case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index b05dd827b13..d6c3ba040be 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2330,7 +2330,6 @@  extern int frame_pointer_needed;
 #define RS6000_BTM_MMA		MASK_MMA	/* ISA 3.1 MMA.  */
 #define RS6000_BTM_P10		MASK_POWER10
 
-
 #define RS6000_BTM_COMMON	(RS6000_BTM_ALTIVEC			\
 				 | RS6000_BTM_VSX			\
 				 | RS6000_BTM_P8_VECTOR			\
@@ -2443,6 +2442,7 @@  enum rs6000_builtin_type_index
   RS6000_BTI_bool_V8HI,          /* __vector __bool short */
   RS6000_BTI_bool_V4SI,          /* __vector __bool int */
   RS6000_BTI_bool_V2DI,          /* __vector __bool long */
+  RS6000_BTI_bool_V1TI,          /* __vector __bool 128-bit */
   RS6000_BTI_pixel_V8HI,         /* __vector __pixel */
   RS6000_BTI_long,	         /* long_integer_type_node */
   RS6000_BTI_unsigned_long,      /* long_unsigned_type_node */
@@ -2496,6 +2496,7 @@  enum rs6000_builtin_type_index
 #define bool_V8HI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V8HI])
 #define bool_V4SI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V4SI])
 #define bool_V2DI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V2DI])
+#define bool_V1TI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V1TI])
 #define pixel_V8HI_type_node	      (rs6000_builtin_types[RS6000_BTI_pixel_V8HI])
 
 #define long_long_integer_type_internal_node  (rs6000_builtin_types[RS6000_BTI_long_long])
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index e5191bd1424..0f252c915b0 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -685,6 +685,13 @@ 
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
+(define_expand "vector_gtv1ti"
+  [(set (match_operand:V1TI 0 "vlogical_operand")
+	(gt:V1TI (match_operand:V1TI 1 "vlogical_operand")
+		 (match_operand:V1TI 2 "vlogical_operand")))]
+  "TARGET_POWER10"
+  "")
+
 ; >= for integer vectors: swap operands and apply not-greater-than
 (define_expand "vector_nlt<mode>"
   [(set (match_operand:VEC_I 3 "vlogical_operand")
@@ -697,6 +704,17 @@ 
   operands[3] = gen_reg_rtx_and_attrs (operands[0]);
 })
 
+(define_expand "vector_nltv1ti"
+  [(set (match_operand:V1TI 3 "vlogical_operand")
+	(gt:V1TI (match_operand:V1TI 2 "vlogical_operand")
+		 (match_operand:V1TI 1 "vlogical_operand")))
+   (set (match_operand:V1TI 0 "vlogical_operand")
+        (not:V1TI (match_dup 3)))]
+  "TARGET_POWER10"
+{
+  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
+})
+
 (define_expand "vector_gtu<mode>"
   [(set (match_operand:VEC_I 0 "vint_operand")
 	(gtu:VEC_I (match_operand:VEC_I 1 "vint_operand")
@@ -704,6 +722,13 @@ 
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
+(define_expand "vector_gtuv1ti"
+  [(set (match_operand:V1TI 0 "altivec_register_operand")
+	(gtu:V1TI (match_operand:V1TI 1 "altivec_register_operand")
+		  (match_operand:V1TI 2 "altivec_register_operand")))]
+  "TARGET_POWER10"
+  "")
+
 ; >= for integer vectors: swap operands and apply not-greater-than
 (define_expand "vector_nltu<mode>"
   [(set (match_operand:VEC_I 3 "vlogical_operand")
@@ -716,6 +741,17 @@ 
   operands[3] = gen_reg_rtx_and_attrs (operands[0]);
 })
 
+(define_expand "vector_nltuv1ti"
+  [(set (match_operand:V1TI 3 "vlogical_operand")
+	(gtu:V1TI (match_operand:V1TI 2 "vlogical_operand")
+		  (match_operand:V1TI 1 "vlogical_operand")))
+   (set (match_operand:V1TI 0 "vlogical_operand")
+	(not:V1TI (match_dup 3)))]
+  "TARGET_POWER10"
+{
+  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
+})
+
 (define_expand "vector_geu<mode>"
   [(set (match_operand:VEC_I 0 "vint_operand")
 	(geu:VEC_I (match_operand:VEC_I 1 "vint_operand")
@@ -735,6 +771,17 @@ 
   operands[3] = gen_reg_rtx_and_attrs (operands[0]);
 })
 
+(define_expand "vector_ngtv1ti"
+  [(set (match_operand:V1TI 3 "vlogical_operand")
+	(gt:V1TI (match_operand:V1TI 1 "vlogical_operand")
+		 (match_operand:V1TI 2 "vlogical_operand")))
+   (set (match_operand:V1TI 0 "vlogical_operand")
+        (not:V1TI (match_dup 3)))]
+  "TARGET_POWER10"
+{
+  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
+})
+
 (define_expand "vector_ngtu<mode>"
   [(set (match_operand:VEC_I 3 "vlogical_operand")
 	(gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand")
@@ -746,6 +793,17 @@ 
   operands[3] = gen_reg_rtx_and_attrs (operands[0]);
 })
 
+(define_expand "vector_ngtuv1ti"
+  [(set (match_operand:V1TI 3 "vlogical_operand")
+	(gtu:V1TI (match_operand:V1TI 1 "vlogical_operand")
+		  (match_operand:V1TI 2 "vlogical_operand")))
+   (set (match_operand:V1TI 0 "vlogical_operand")
+        (not:V1TI (match_dup 3)))]
+  "TARGET_POWER10"
+{
+  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
+})
+
 ; There are 14 possible vector FP comparison operators, gt and eq of them have
 ; been expanded above, so just support 12 remaining operators here.
 
@@ -894,6 +952,18 @@ 
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
+(define_expand "vector_eq_v1ti_p"
+  [(parallel
+    [(set (reg:CC CR6_REGNO)
+	  (unspec:CC [(eq:CC (match_operand:V1TI 1 "altivec_register_operand")
+			     (match_operand:V1TI 2 "altivec_register_operand"))]
+		     UNSPEC_PREDICATE))
+     (set (match_operand:V1TI 0 "vlogical_operand")
+	  (eq:V1TI (match_dup 1)
+		   (match_dup 2)))])]
+  "TARGET_POWER10"
+  "")
+
 ;; This expansion handles the V16QI, V8HI, and V4SI modes in the
 ;; implementation of the vec_all_ne built-in functions on Power9.
 (define_expand "vector_ne_<mode>_p"
@@ -976,6 +1046,23 @@ 
   operands[3] = gen_reg_rtx (V2DImode);
 })
 
+(define_expand "vector_ne_v1ti_p"
+  [(parallel
+    [(set (reg:CC CR6_REGNO)
+	  (unspec:CC [(eq:CC (match_operand:V1TI 1 "altivec_register_operand")
+			     (match_operand:V1TI 2 "altivec_register_operand"))]
+		     UNSPEC_PREDICATE))
+     (set (match_dup 3)
+	  (eq:V1TI (match_dup 1)
+		   (match_dup 2)))])
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (reg:CC CR6_REGNO)
+	       (const_int 0)))]
+  "TARGET_POWER10"
+{
+  operands[3] = gen_reg_rtx (V1TImode);
+})
+
 ;; This expansion handles the V2DI mode in the implementation of the
 ;; vec_any_eq built-in function on Power9.
 ;;
@@ -1002,6 +1089,26 @@ 
   operands[3] = gen_reg_rtx (V2DImode);
 })
 
+(define_expand "vector_ae_v1ti_p"
+  [(parallel
+    [(set (reg:CC CR6_REGNO)
+	  (unspec:CC [(eq:CC (match_operand:V1TI 1 "altivec_register_operand")
+			     (match_operand:V1TI 2 "altivec_register_operand"))]
+		     UNSPEC_PREDICATE))
+     (set (match_dup 3)
+	  (eq:V1TI (match_dup 1)
+		   (match_dup 2)))])
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (reg:CC CR6_REGNO)
+	       (const_int 0)))
+   (set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(const_int 1)))]
+  "TARGET_POWER10"
+{
+  operands[3] = gen_reg_rtx (V1TImode);
+})
+
 ;; This expansion handles the V4SF and V2DF modes in the Power9
 ;; implementation of the vec_all_ne built-in functions.  Note that the
 ;; expansions for this pattern with these modes makes no use of power9-
@@ -1061,6 +1168,18 @@ 
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
+(define_expand "vector_gt_v1ti_p"
+  [(parallel
+    [(set (reg:CC CR6_REGNO)
+	  (unspec:CC [(gt:CC (match_operand:V1TI 1 "vlogical_operand")
+			     (match_operand:V1TI 2 "vlogical_operand"))]
+		     UNSPEC_PREDICATE))
+     (set (match_operand:V1TI 0 "vlogical_operand")
+	  (gt:V1TI (match_dup 1)
+		   (match_dup 2)))])]
+  "TARGET_POWER10"
+  "")
+
 (define_expand "vector_ge_<mode>_p"
   [(parallel
     [(set (reg:CC CR6_REGNO)
@@ -1085,6 +1204,18 @@ 
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
+(define_expand "vector_gtu_v1ti_p"
+  [(parallel
+    [(set (reg:CC CR6_REGNO)
+	  (unspec:CC [(gtu:CC (match_operand:V1TI 1 "altivec_register_operand")
+			      (match_operand:V1TI 2 "altivec_register_operand"))]
+		     UNSPEC_PREDICATE))
+     (set (match_operand:V1TI 0 "altivec_register_operand")
+	  (gtu:V1TI (match_dup 1)
+		    (match_dup 2)))])]
+  "TARGET_POWER10"
+  "")
+
 ;; AltiVec/VSX predicates.
 
 ;; This expansion is triggered during expansion of predicate built-in
@@ -1460,6 +1591,20 @@ 
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
+(define_expand "vrotlv1ti3"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+        (rotate:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
+                     (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+  "TARGET_POWER10"
+{
+  /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */
+  rtx tmp = gen_reg_rtx (V1TImode);
+
+  emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
+  emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp));
+  DONE;
+})
+
 ;; Expanders for rotatert to make use of vrotl
 (define_expand "vrotr<mode>3"
   [(set (match_operand:VEC_I 0 "vint_operand")
@@ -1481,6 +1626,21 @@ 
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
+;; No immediate version of this 128-bit instruction
+(define_expand "vashlv1ti3"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+	(ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
+		     (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+  "TARGET_POWER10"
+{
+  /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */
+  rtx tmp = gen_reg_rtx (V1TImode);
+
+  emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
+  emit_insn (gen_altivec_vslq (operands[0], operands[1], tmp));
+  DONE;
+})
+
 ;; Expanders for logical shift right on each vector element
 (define_expand "vlshr<mode>3"
   [(set (match_operand:VEC_I 0 "vint_operand")
@@ -1489,6 +1649,21 @@ 
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
+;; No immediate version of this 128-bit instruction
+(define_expand "vlshrv1ti3"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+	(lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
+		       (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+  "TARGET_POWER10"
+{
+  /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */
+  rtx tmp = gen_reg_rtx (V1TImode);
+
+  emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
+  emit_insn (gen_altivec_vsrq (operands[0], operands[1], tmp));
+  DONE;
+})
+
 ;; Expanders for arithmetic shift right on each vector element
 (define_expand "vashr<mode>3"
   [(set (match_operand:VEC_I 0 "vint_operand")
@@ -1496,6 +1671,22 @@ 
 			(match_operand:VEC_I 2 "vint_operand")))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
+
+;; No immediate version of this 128-bit instruction
+(define_expand "vashrv1ti3"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+	(ashiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
+		       (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+  "TARGET_POWER10"
+{
+  /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */
+  rtx tmp = gen_reg_rtx (V1TImode);
+
+  emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
+  emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp));
+  DONE;
+})
+
 
 ;; Vector reduction expanders for VSX
 ; The (VEC_reduc:...
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e17b9c556d4..fd779435390 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -298,6 +298,12 @@ 
    UNSPEC_VSX_XXSPLTD
    UNSPEC_VSX_DIVSD
    UNSPEC_VSX_DIVUD
+   UNSPEC_VSX_DIVSQ
+   UNSPEC_VSX_DIVUQ
+   UNSPEC_VSX_DIVESQ
+   UNSPEC_VSX_DIVEUQ
+   UNSPEC_VSX_MODSQ
+   UNSPEC_VSX_MODUQ
    UNSPEC_VSX_MULSD
    UNSPEC_VSX_SIGN_EXTEND
    UNSPEC_VSX_XVCVBF16SPN
@@ -1752,6 +1758,70 @@ 
 }
   [(set_attr "type" "div")])
 
+;; 64-bit multiply
+(define_insn "mulv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(mult:V2DI (match_operand:V2DI 1 "register_operand" "v")
+		   (match_operand:V2DI 2 "register_operand" "v")))]
+  "TARGET_POWER10"
+  "vmulld %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+;; Vector integer signed/unsigned divide
+(define_insn "vsx_div_v1ti"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+        (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
+                      (match_operand:V1TI 2 "vsx_register_operand" "v")]
+                     UNSPEC_VSX_DIVSQ))]
+  "TARGET_POWER10"
+  "vdivsq %0,%1,%2"
+  [(set_attr "type" "div")])
+
+(define_insn "vsx_udiv_v1ti"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+        (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
+                      (match_operand:V1TI 2 "vsx_register_operand" "v")]
+                     UNSPEC_VSX_DIVUQ))]
+  "TARGET_POWER10"
+  "vdivuq %0,%1,%2"
+  [(set_attr "type" "div")])
+
+(define_insn "vsx_dives_v1ti"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+        (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
+                      (match_operand:V1TI 2 "vsx_register_operand" "v")]
+                     UNSPEC_VSX_DIVESQ))]
+  "TARGET_POWER10"
+  "vdivesq %0,%1,%2"
+  [(set_attr "type" "div")])
+
+(define_insn "vsx_diveu_v1ti"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+        (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
+                      (match_operand:V1TI 2 "vsx_register_operand" "v")]
+                     UNSPEC_VSX_DIVEUQ))]
+  "TARGET_POWER10"
+  "vdiveuq %0,%1,%2"
+  [(set_attr "type" "div")])
+
+(define_insn "vsx_mods_v1ti"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+        (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
+                      (match_operand:V1TI 2 "vsx_register_operand" "v")]
+                     UNSPEC_VSX_MODSQ))]
+  "TARGET_POWER10"
+  "vmodsq %0,%1,%2"
+  [(set_attr "type" "div")])
+
+(define_insn "vsx_modu_v1ti"
+ [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+        (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
+                      (match_operand:V1TI 2 "vsx_register_operand" "v")]
+                     UNSPEC_VSX_MODUQ))]
+  "TARGET_POWER10"
+  "vmoduq %0,%1,%2"
+  [(set_attr "type" "div")])
+
 ;; *tdiv* instruction returning the FG flag
 (define_expand "vsx_tdiv<mode>3_fg"
   [(set (match_dup 3)
@@ -3103,6 +3173,21 @@ 
   "xxpermdi %x0,%x1,%x1,2"
   [(set_attr "type" "vecperm")])
 
+;; Swap upper/lower 64-bit values in a 128-bit vector
+(define_insn "xxswapd_v1ti"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+	(subreg:V1TI
+           (vec_select:V2DI
+             (subreg:V2DI
+                (match_operand:V1TI 1 "vsx_register_operand" "v") 0 )
+	     (parallel [(const_int 1)(const_int 0)]))
+           0))]
+  "TARGET_POWER10"
+;; AIX does not support extended mnemonic xxswapd.  Use the basic
+;; mnemonic xxpermdi instead.
+  "xxpermdi %x0,%x1,%x1,2"
+  [(set_attr "type" "vecperm")])
+
 (define_insn "xxgenpcvm_<mode>_internal"
   [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
 	(unspec:VSX_EXTRACT_I4
@@ -4787,6 +4872,15 @@ 
    (set_attr "type" "vecload")])
 
 
+;; ISA 3.1 vector extend sign support
+(define_insn "vsx_sign_extend_v2di_v1ti"
+  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
+	(unspec:V1TI [(match_operand:V2DI 1 "vsx_register_operand" "v")]
+		     UNSPEC_VSX_SIGN_EXTEND))]
+  "TARGET_POWER10"
+  "vextsd2q %0,%1"
+  [(set_attr "type" "vecexts")])
+
 ;; ISA 3.0 vector extend sign support
 
 (define_insn "vsx_sign_extend_qi_<mode>"
@@ -5502,6 +5596,19 @@ 
   "vcmpneb %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
+;; Vector Compare Not Equal v1ti (specified/not+eq:)
+(define_expand "vcmpnet"
+  [(set (match_operand:V1TI 0 "altivec_register_operand")
+	(not:V1TI
+	  (eq:V1TI (match_operand:V1TI 1 "altivec_register_operand")
+		   (match_operand:V1TI 2 "altivec_register_operand"))))]
+   "TARGET_POWER10"
+{
+  emit_insn (gen_eqvv1ti3 (operands[0], operands[1], operands[2]));
+  emit_insn (gen_one_cmplv1ti2 (operands[0], operands[0]));
+  DONE;
+})
+
 ;; Vector Compare Not Equal or Zero Byte
 (define_insn "vcmpnezb"
   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index feaa4929697..f2efd73e12d 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21662,6 +21662,180 @@  Generate PCV from specified Mask size, as if implemented by the
 immediate value is either 0, 1, 2 or 3.
 @findex vec_genpcvm
 
+@smallexample
+@exdent vector unsigned __int128 vec_rl (vector unsigned __int128,
+                                         vector unsigned __int128);
+@exdent vector signed __int128 vec_rl (vector signed __int128,
+                                       vector unsigned __int128);
+@end smallexample
+
+Returns the result of rotating the first input left by the number of bits
+specified in the most significant quad word of the second input truncated to
+7 bits (bits [125:131]).
+
+@smallexample
+@exdent vector unsigned __int128 vec_rlmi (vector unsigned __int128,
+                                           vector unsigned __int128,
+                                           vector unsigned __int128);
+@exdent vector signed __int128 vec_rlmi (vector signed __int128,
+                                         vector signed __int128,
+                                         vector unsigned __int128);
+@end smallexample
+
+Returns the result of rotating the first input and inserting it under mask
+into the second input.  The first bit in the mask, the last bit in the mask are
+obtained from the two 7-bit fields bits [108:115] and bits [117:123]
+respectively of the second input.  The shift is obtained from the third input
+in the 7-bit field [125:131] where all bits counted from zero at the left.
+
+@smallexample
+@exdent vector unsigned __int128 vec_rlnm (vector unsigned __int128,
+                                           vector unsigned __int128,
+                                           vector unsigned __int128);
+@exdent vector signed __int128 vec_rlnm (vector signed __int128,
+                                         vector unsigned __int128,
+                                         vector unsigned __int128);
+@end smallexample
+
+Returns the result of rotating the first input and ANDing it with a mask.  The
+first bit in the mask and the last bit in the mask are obtained from the two
+7-bit fields bits [117:123] and bits [125:131] respectively of the second
+input.  The shift is obtained from the third input in the 7-bit field bits
+[125:131] where all bits counted from zero at the left.
+
+@smallexample
+@exdent vector unsigned __int128 vec_sl(vector unsigned __int128, vector unsigned __int128);
+@exdent vector signed __int128 vec_sl(vector signed __int128, vector unsigned __int128);
+@end smallexample
+
+Returns the result of shifting the first input left by the number of bits
+specified in the most significant bits of the second input truncated to
+7 bits (bits [125:131]).
+
+@smallexample
+@exdent vector unsigned __int128 vec_sr(vector unsigned __int128, vector unsigned __int128);
+@exdent vector signed __int128 vec_sr(vector signed __int128, vector unsigned __int128);
+@end smallexample
+
+Returns the result of performing a logical right shift of the first argument
+by the number of bits specified in the most significant double word of the
+second input truncated to 7 bits (bits [125:131]).
+
+@smallexample
+@exdent vector unsigned __int128 vec_sra(vector unsigned __int128, vector unsigned __int128);
+@exdent vector signed __int128 vec_sra(vector signed __int128, vector unsigned __int128);
+@end smallexample
+
+Returns the result of performing arithmetic right shift of the first argument
+by the number of bits specified in the most significant bits of the
+second input truncated to 7 bits (bits [125:131]).
+
+@smallexample
+@exdent vector unsigned __int128 vec_mule (vector unsigned long long,
+                                           vector unsigned long long);
+@exdent vector signed __int128 vec_mule (vector signed long long,
+                                         vector signed long long);
+@end smallexample
+
+Returns a vector containing a 128-bit integer result of multiplying the even
+doubleword elements of the two inputs.
+
+@smallexample
+@exdent vector unsigned __int128 vec_mulo (vector unsigned long long,
+                                           vector unsigned long long);
+@exdent vector signed __int128 vec_mulo (vector signed long long,
+                                         vector signed long long);
+@end smallexample
+
+Returns a vector containing a 128-bit integer result of multiplying the odd
+doubleword elements of the two inputs.
+
+@smallexample
+@exdent vector unsigned __int128 vec_div (vector unsigned __int128,
+                                          vector unsigned __int128);
+@exdent vector signed __int128 vec_div (vector signed __int128,
+                                        vector signed __int128);
+@end smallexample
+
+Returns the result of dividing the first operand by the second operand. An
+attempt to divide any value by zero or to divide the most negative signed
+128-bit integer by negative one results in an undefined value.
+
+@smallexample
+@exdent vector unsigned __int128 vec_dive (vector unsigned __int128,
+                                           vector unsigned __int128);
+@exdent vector signed __int128 vec_dive (vector signed __int128,
+                                         vector signed __int128);
+@end smallexample
+
+The result is produced by shifting the first input left by 128 bits and
+dividing by the second.  If an attempt is made to divide by zero or the result
+is larger than 128 bits, the result is undefined.
+
+@smallexample
+@exdent vector unsigned __int128 vec_mod (vector unsigned __int128,
+                                          vector unsigned __int128);
+@exdent vector signed __int128 vec_mod (vector signed __int128,
+                                        vector signed __int128);
+@end smallexample
+
+The result is the modulo result of dividing the first input  by the second
+input.
+
+The following builtins perform 128-bit vector comparisons.  The
+@code{vec_all_xx}, @code{vec_any_xx}, and @code{vec_cmpxx}, where @code{xx} is
+one of the operations @code{eq, ne, gt, lt, ge, le} perform pairwise
+comparisons between the elements at the same positions within their two vector
+arguments.  The @code{vec_all_xx}function returns a non-zero value if and only
+if all pairwise comparisons are true.  The @code{vec_any_xx} function returns
+a non-zero value if and only if at least one pairwise comparison is true.  The
+@code{vec_cmpxx}function returns a vector of the same type as its two
+arguments, within which each element consists of all ones to denote that
+specified logical comparison of the corresponding elements was true.
+Otherwise, the element of the returned vector contains all zeros.
+
+@smallexample
+vector bool __int128 vec_cmpeq (vector signed __int128, vector signed __int128);
+vector bool __int128 vec_cmpeq (vector unsigned __int128, vector unsigned __int128);
+vector bool __int128 vec_cmpne (vector signed __int128, vector signed __int128);
+vector bool __int128 vec_cmpne (vector unsigned __int128, vector unsigned __int128);
+vector bool __int128 vec_cmpgt (vector signed __int128, vector signed __int128);
+vector bool __int128 vec_cmpgt (vector unsigned __int128, vector unsigned __int128);
+vector bool __int128 vec_cmplt (vector signed __int128, vector signed __int128);
+vector bool __int128 vec_cmplt (vector unsigned __int128, vector unsigned __int128);
+vector bool __int128 vec_cmpge (vector signed __int128, vector signed __int128);
+vector bool __int128 vec_cmpge (vector unsigned __int128, vector unsigned __int128);
+vector bool __int128 vec_cmple (vector signed __int128, vector signed __int128);
+vector bool __int128 vec_cmple (vector unsigned __int128, vector unsigned __int128);
+
+int vec_all_eq (vector signed __int128, vector signed __int128);
+int vec_all_eq (vector unsigned __int128, vector unsigned __int128);
+int vec_all_ne (vector signed __int128, vector signed __int128);
+int vec_all_ne (vector unsigned __int128, vector unsigned __int128);
+int vec_all_gt (vector signed __int128, vector signed __int128);
+int vec_all_gt (vector unsigned __int128, vector unsigned __int128);
+int vec_all_lt (vector signed __int128, vector signed __int128);
+int vec_all_lt (vector unsigned __int128, vector unsigned __int128);
+int vec_all_ge (vector signed __int128, vector signed __int128);
+int vec_all_ge (vector unsigned __int128, vector unsigned __int128);
+int vec_all_le (vector signed __int128, vector signed __int128);
+int vec_all_le (vector unsigned __int128, vector unsigned __int128);
+
+int vec_any_eq (vector signed __int128, vector signed __int128);
+int vec_any_eq (vector unsigned __int128, vector unsigned __int128);
+int vec_any_ne (vector signed __int128, vector signed __int128);
+int vec_any_ne (vector unsigned __int128, vector unsigned __int128);
+int vec_any_gt (vector signed __int128, vector signed __int128);
+int vec_any_gt (vector unsigned __int128, vector unsigned __int128);
+int vec_any_lt (vector signed __int128, vector signed __int128);
+int vec_any_lt (vector unsigned __int128, vector unsigned __int128);
+int vec_any_ge (vector signed __int128, vector signed __int128);
+int vec_any_ge (vector unsigned __int128, vector unsigned __int128);
+int vec_any_le (vector signed __int128, vector signed __int128);
+int vec_any_le (vector unsigned __int128, vector unsigned __int128);
+@end smallexample
+
+
 @node PowerPC Hardware Transactional Memory Built-in Functions
 @subsection PowerPC Hardware Transactional Memory Built-in Functions
 GCC provides two interfaces for accessing the Hardware Transactional
diff --git a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
new file mode 100644
index 00000000000..3f8892b39d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
@@ -0,0 +1,2301 @@ 
+/* { dg-do run } */
+/* { dg-options "-mcpu=power10 -save-temps" } */
+/* { dg-require-effective-target power10_hw } */
+
+/* Check that the expected 128-bit instructions are generated if the processor
+   supports the 128-bit integer instructions. */
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvslq\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvsrq\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvsraq\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvrlq\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvrlqnm\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvrlqmi\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvcmpuq\M} 0 } } */
+/* { dg-final { scan-assembler-times {\mvcmpsq\M} 0 } } */
+/* { dg-final { scan-assembler-times {\mvcmpequq\M} 0 } } */
+/* { dg-final { scan-assembler-times {\mvcmpequq.\M} 16 } } */
+/* { dg-final { scan-assembler-times {\mvcmpgtsq\M} 0 } } */
+/* { dg-final { scan-assembler-times {\mvcmpgtsq.\M} 16 } } */
+/* { dg-final { scan-assembler-times {\mvcmpgtuq\M} 0 } } */
+/* { dg-final { scan-assembler-times {\mvcmpgtuq.\M} 16 } } */
+/* { dg-final { scan-assembler-times {\mvmuleud\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmuloud\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmulesd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmulosd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmulld\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvdivsq\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvdivuq\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvdivesq\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvdiveuq\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmodsq\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmoduq\M} 1 } } */
+
+#include <altivec.h>
+
+#define DEBUG 0
+
+#if DEBUG
+#include <stdio.h>
+#include <stdlib.h>
+
+
+void print_i128(__int128_t val)
+{
+  printf(" %lld %llu (0x%llx %llx)",
+	 (signed long long)(val >> 64),
+	 (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF),
+	 (unsigned long long)(val >> 64),
+	 (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF));
+}
+#endif
+
+void abort (void);
+
+int main ()
+{
+  int i, result_int;
+
+  __int128_t arg1, result;
+  __uint128_t uarg2;
+
+  vector signed long long int vec_arg1_di, vec_arg2_di;
+  vector signed long long int vec_result_di, vec_expected_result_di;
+  vector unsigned long long int vec_uarg1_di, vec_uarg2_di, vec_uarg3_di;
+  vector unsigned long long int vec_uresult_di;
+  vector unsigned long long int vec_uexpected_result_di;
+  
+  __int128_t expected_result;
+  __uint128_t uexpected_result;
+
+  vector __int128 vec_arg1, vec_arg2, vec_result;
+  vector unsigned __int128 vec_uarg1, vec_uarg2, vec_uarg3, vec_uresult;
+  vector bool __int128  vec_result_bool;
+
+  /* sign extend double to 128-bit integer  */
+  vec_arg1_di[0] = 1000;
+  vec_arg1_di[1] = -123456;
+
+  expected_result = 1000;
+
+  vec_result = vec_signextq (vec_arg1_di);
+
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_signextq ((long long) %lld) =  ",  vec_arg1_di[0]);
+    print_i128(vec_result[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg1_di[0] = -123456;
+  vec_arg1_di[1] = 1000;
+
+  expected_result = -123456;
+
+  vec_result = vec_signextq (vec_arg1_di);
+
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_signextq ((long long) %lld) =  ",  vec_arg1_di[0]);
+    print_i128(vec_result[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  /* test shift 128-bit integers.
+     Note, shift amount is given by the lower 7-bits of the shift amount. */
+  vec_arg1[0] = 3;
+  vec_uarg2[0] = 2;
+  expected_result = vec_arg1[0]*4;
+
+  vec_result = vec_sl (vec_arg1, vec_uarg2);
+
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_sl(int128, uint128):  ");
+    print_i128(vec_arg1[0]);
+    printf(" << %lld", vec_uarg2[0] & 0xFF);
+    printf(" = ");
+    print_i128(vec_result[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  arg1 = 3;
+  uarg2 = 4;
+  expected_result = arg1*16;
+
+  result = arg1 << uarg2;
+
+  if (result != expected_result) {
+#if DEBUG
+    printf("ERROR: int128 << uint128):  ");
+    print_i128(arg1);
+    printf(" << %lld", uarg2 & 0xFF);
+    printf(" = ");
+    print_i128(result);
+    printf("\n does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 3;
+  vec_uarg2[0] = 2;
+  uexpected_result = vec_uarg1[0]*4;
+  
+  vec_uresult = vec_sl (vec_uarg1, vec_uarg2);
+
+  if (vec_uresult[0] != uexpected_result) {
+#if DEBUG
+    printf("ERROR: vec_sl(uint128, uint128):  ");
+    print_i128(vec_uarg1[0]);
+    printf(" << %lld", vec_uarg2[0] & 0xFF);
+    printf(" = ");
+    print_i128(vec_uresult[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(uexpected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg1[0] = 12;
+  vec_uarg2[0] = 2;
+  expected_result = vec_arg1[0]/4;
+
+  vec_result = vec_sr (vec_arg1, vec_uarg2);
+
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_sr(int128, uint128):  ");
+    print_i128(vec_arg1[0]);
+    printf(" >> %lld", vec_uarg2[0] & 0xFF);
+    printf(" = ");
+    print_i128(vec_result[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 48;
+  vec_uarg2[0] = 2;
+  uexpected_result = vec_uarg1[0]/4;
+  
+  vec_uresult = vec_sr (vec_uarg1, vec_uarg2);
+
+  if (vec_uresult[0] != uexpected_result) {
+#if DEBUG
+    printf("ERROR: vec_sr(uint128, uint128):  ");
+    print_i128(vec_uarg1[0]);
+    printf(" >> %lld", vec_uarg2[0] & 0xFF);
+    printf(" = ");
+    print_i128(vec_uresult[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(uexpected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  arg1 = 48;
+  uarg2 = 4;
+  expected_result = arg1/16;
+
+  result = arg1 >> uarg2;
+
+  if (result != expected_result) {
+#if DEBUG
+    printf("ERROR: int128 >> uint128:  ");
+    print_i128(arg1);
+    printf(" >> %lld", uarg2 & 0xFF);
+    printf(" = ");
+    print_i128(result);
+    printf("\n does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg1[0] = 0x1234567890ABCDEFULL;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 0xAABBCCDDEEFF1122ULL;
+  vec_uarg2[0] = 32;
+  expected_result = 0x0000000012345678ULL;
+  expected_result = (expected_result << 64) | 0x90ABCDEFAABBCCDDULL;
+
+  vec_result = vec_sra (vec_arg1, vec_uarg2);
+  
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_sra(int128, uint128):  ");
+    print_i128(vec_arg1[0]);
+    printf(" >> %lld = \n", vec_uarg2[0]);
+    print_i128(vec_result[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 0xAABBCCDDEEFF1122ULL;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 0x1234567890ABCDEFULL;
+  vec_uarg2[0] = 48;
+  uexpected_result = 0xFFFFFFFFFFFFAABBLL;
+  uexpected_result = (uexpected_result << 64) | 0xCCDDEEFF11221234ULL;
+
+  vec_uresult = vec_sra (vec_uarg1, vec_uarg2);
+
+  if (vec_uresult[0] != uexpected_result) {
+#if DEBUG
+    printf("ERROR: vec_sra(uint128, uint128):  ");
+    print_i128(vec_uarg1[0]);
+    printf(" >> %lld = \n", vec_uarg2[0] & 0xFF);
+    print_i128(vec_uresult[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(uexpected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg1[0] = 0x1234567890ABCDEFULL;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 0xAABBCCDDEEFF1122ULL;
+  vec_uarg2[0] = 32;
+  expected_result = 0x90ABCDEFAABBCCDDULL;
+  expected_result = (expected_result << 64) | 0xEEFF112212345678ULL;
+
+  vec_result = vec_rl (vec_arg1, vec_uarg2);
+  
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_rl(int128, uint128):  ");
+    print_i128(vec_arg1[0]);
+    printf(" >> %lld = \n", vec_uarg2[0]);
+    print_i128(vec_result[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 0xAABBCCDDEEFF1122ULL;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 0x1234567890ABCDEFULL;
+  vec_uarg2[0] = 48;
+  uexpected_result = 0x11221234567890ABULL;
+  uexpected_result = (uexpected_result << 64) | 0xCDEFAABBCCDDEEFFULL;
+
+  vec_uresult = vec_rl (vec_uarg1, vec_uarg2);
+
+  if (vec_uresult[0] != uexpected_result) {
+#if DEBUG
+    printf("ERROR: vec_rl(uint128, uint128):  ");
+    print_i128(vec_uarg1[0]);
+    printf(" >> %lld = \n", vec_uarg2[0]);
+    print_i128(vec_uresult[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(uexpected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  /* vec_rlnm(arg1, arg2, arg3)
+     result - rotate each element of arg1 left by shift in element of arg2.
+       Then AND with mask whose  start/stop bits are specified in element of
+       arg3.  */
+  vec_arg1[0] = 0x1234567890ABCDEFULL;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 0xAABBCCDDEEFF1122ULL;
+  vec_uarg2[0] = 32;
+  vec_uarg3[0] = (32 << 8) | 95;
+  expected_result = 0xaabbccddULL;
+  expected_result = (expected_result << 64) | 0xeeff112200000000ULL;
+
+  vec_result = vec_rlnm (vec_arg1, vec_uarg2, vec_uarg3);
+  
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_rlnm(int128, uint128, uint128):  ");
+    print_i128(vec_arg1[0]);
+    printf(" << %lld = \n", vec_uarg3[0] & 0xFF);
+    print_i128(vec_result[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  
+
+  /* vec_rlnm(arg1, arg2, arg3)
+     result - rotate each element of arg1 left by shift in element of arg2;
+       then AND with mask whose  start/stop bits are specified in element of
+       arg3.  */
+  vec_uarg1[0] = 0xAABBCCDDEEFF1122ULL;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 0x1234567890ABCDEFULL;
+  vec_uarg2[0] = 48;
+  vec_uarg3[0] = (8 << 8) | 119;
+
+  uexpected_result = 0x00221234567890ABULL;
+  uexpected_result = (uexpected_result << 64) | 0xCDEFAABBCCDDEE00ULL;
+
+  vec_uresult = vec_rlnm (vec_uarg1, vec_uarg2, vec_uarg3);
+
+  if (vec_uresult[0] != uexpected_result) {
+#if DEBUG
+    printf("ERROR: vec_rlnm(uint128, uint128, uint128):  ");
+    print_i128(vec_uarg1[0]);
+    printf(" << %lld = \n", vec_uarg3[0] & 0xFF);
+    print_i128(vec_uresult[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(uexpected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  /* vec_rlmi(arg1, arg2, arg3)
+     result - rotate each element of arg1 left and inserting it into arg2 
+       ement of arg2 based on the mask specified in arg3.  The shift, mask
+       start and end is specified in arg3.  */
+  vec_arg1[0] = 0x1234567890ABCDEFULL;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 0xAABBCCDDEEFF1122ULL;
+  vec_arg2[0] = 0x000000000000DEADULL;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 0x0000BEEF00000000ULL;
+  vec_uarg3[0] = 96 << 16 | 127 << 8 | 32;
+  expected_result = 0x000000000000DEADULL;
+  expected_result = (expected_result << 64) | 0x0000BEEF12345678ULL;
+
+  vec_result = vec_rlmi (vec_arg1, vec_arg2, vec_uarg3);
+  
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_rlmi(int128, int128, uint128):  ");
+    print_i128(vec_arg1[0]);
+    printf(" << %lld = \n", vec_uarg2_di[1] & 0xFF);
+    print_i128(vec_result[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  /* vec_rlmi(arg1, arg2, arg3)
+     result - rotate each element of arg1 left and inserting it into arg2 
+       ement of arg2 based on the mask specified in arg3.  The shift, mask
+       start and end is specified in arg3.  */
+  vec_uarg1[0] = 0xAABBCCDDEEFF1122ULL;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 0x1234567890ABCDEFULL;
+  vec_uarg2[0] = 0xDEAD000000000000ULL;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 0x000000000000BEEFULL;
+  vec_uarg3[0] = 16 << 16 | 111 << 8 | 48;
+  uexpected_result = 0xDEAD1234567890ABULL;
+  uexpected_result = (uexpected_result << 64) | 0xCDEFAABBCCDDBEEFULL;
+
+  vec_uresult = vec_rlmi (vec_uarg1, vec_uarg2, vec_uarg3);
+
+  if (vec_uresult[0] != uexpected_result) {
+#if DEBUG
+    printf("ERROR: vec_rlmi(uint128, unit128, uint128):  ");
+    print_i128(vec_uarg1[0]);
+    printf(" << %lld = \n", vec_uarg3[1] & 0xFF);
+    print_i128(vec_uresult[0]);
+    printf("\n does not match expected_result = ");
+    print_i128(uexpected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  /* 128-bit compare tests, result is all 1's if true */
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1[0] = 2468;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  uexpected_result = 0xFFFFFFFFFFFFFFFFULL;
+  uexpected_result = (uexpected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmpgt (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != uexpected_result) {
+#if DEBUG
+    printf("ERROR: unsigned vec_cmpgt ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(uexpected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg1[0] = 12468;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmpgt (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: signed vec_cmpgt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+
+  vec_arg1[0] = 12468;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = -1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  expected_result = 0x0ULL;
+
+  vec_result_bool = vec_cmpeq (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR:not equal signed vec_cmpeq ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmpeq (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: signed equal vec_cmpeq ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 12468;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  expected_result = 0x0ULL;
+
+  vec_result_bool = vec_cmpeq (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: unsigned  not equal vec_cmpeq ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmpeq (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: equal unsigned vec_cmpeq ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 12468;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmpne (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: unsigned  not equal vec_cmpne ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+  expected_result = 0x0ULL;
+
+  vec_result_bool = vec_cmpne (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: equal unsigned vec_cmpne ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg1[0] = 12468;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = -1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmpne (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR:not equal signed vec_cmpne ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+  expected_result = 0x0ULL;
+
+  vec_result_bool = vec_cmpne (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: signed equal vec_cmpne ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 12468;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  expected_result = 0x0;
+
+  vec_result_bool = vec_cmplt (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: unsigned  arg1 > arg2 vec_cmplt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 1234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 12468;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmplt (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: unsigned  arg1 < arg2 vec_cmplt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+  expected_result = 0x0ULL;
+
+  vec_result_bool = vec_cmplt (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR:  unsigned arg1 = arg2 vec_cmplt ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg1[0] = 12468;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = -1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  expected_result = 0x0;
+
+  vec_result_bool = vec_cmplt (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: signed  arg1 > arg2 vec_cmplt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg1[0] = -1234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 12468;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmplt (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: signed  arg1 < arg2 vec_cmplt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+  expected_result = 0x0ULL;
+
+  vec_result_bool = vec_cmplt (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_cmplt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+   
+  vec_uarg1[0] = 12468;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  expected_result = 0x0;
+
+  vec_result_bool = vec_cmple (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: unsigned  arg1 > arg2 vec_cmple ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 1234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 12468;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmple (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: unsigned  arg1 < arg2 vec_cmple ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmple (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR:  unsigned arg1 = arg2 vec_cmple ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg1[0] = 12468;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = -1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  expected_result = 0x0;
+
+  vec_result_bool = vec_cmple (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: signed  arg1 > arg2 vec_cmple ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg1[0] = -1234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 12468;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmple (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: signed  arg1 < arg2 vec_cmple ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmple (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_cmple ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 12468;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmpge (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: unsigned  arg1 > arg2 vec_cmpge ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 1234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 12468;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  expected_result = 0x0;
+
+  vec_result_bool = vec_cmpge (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: unsigned  arg1 < arg2 vec_cmpge ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmpge (vec_uarg1, vec_uarg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR:  unsigned arg1 = arg2 vec_cmpge ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg1[0] = 12468;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = -1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmpge (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: signed  arg1 > arg2 vec_cmpge ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg1[0] = -1234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 12468;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  expected_result = 0x0;
+
+  vec_result_bool = vec_cmpge (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: signed  arg1 < arg2 vec_cmpge ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+  expected_result = 0xFFFFFFFFFFFFFFFFULL;
+  expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL;
+
+  vec_result_bool = vec_cmpge (vec_arg1, vec_arg2);
+
+  if (vec_result_bool[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_cmpge ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.");
+    print_i128(vec_result_bool[0]);
+    printf("\n Result does not match expected_result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+#if 1
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+
+  result_int = vec_all_eq (vec_arg1, vec_arg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_all_eq ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1[0] = -234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+
+  result_int = vec_all_eq (vec_arg1, vec_arg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 != arg2 vec_all_eq ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  result_int = vec_all_eq (vec_uarg1, vec_uarg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 = uarg2 vec_all_eq ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1[0] = 234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+
+  result_int = vec_all_eq (vec_uarg1, vec_uarg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 != uarg2 vec_all_eq ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+
+  result_int = vec_all_ne (vec_arg1, vec_arg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_all_ne ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1[0] = -234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+
+  result_int = vec_all_ne (vec_arg1, vec_arg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 != arg2 vec_all_ne ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  result_int = vec_all_ne (vec_uarg1, vec_uarg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 = uarg2 vec_all_ne ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1[0] = 234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+
+  result_int = vec_all_ne (vec_uarg1, vec_uarg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 != uarg2 vec_all_ne ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+
+  result_int = vec_all_lt (vec_arg1, vec_arg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_all_lt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1[0] = -234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+
+  result_int = vec_all_lt (vec_arg1, vec_arg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 != arg2 vec_all_lt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  result_int = vec_all_lt (vec_uarg1, vec_uarg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 = uarg2 vec_all_lt ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1[0] = 234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+
+  result_int = vec_all_lt (vec_uarg1, vec_uarg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 != uarg2 vec_all_lt ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+
+  result_int = vec_all_le (vec_arg1, vec_arg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_all_le ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1[0] = -234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+
+  result_int = vec_all_le (vec_arg1, vec_arg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 != arg2 vec_all_le ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  result_int = vec_all_le (vec_uarg1, vec_uarg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 = uarg2 vec_all_le ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1[0] = 234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+
+  result_int = vec_all_le (vec_uarg1, vec_uarg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 != uarg2 vec_all_le ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+
+  result_int = vec_all_gt (vec_arg1, vec_arg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_all_gt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1[0] = -234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+
+  result_int = vec_all_gt (vec_arg1, vec_arg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 != arg2 vec_all_gt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  result_int = vec_all_gt (vec_uarg1, vec_uarg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 = uarg2 vec_all_gt ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1[0] = 234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+
+  result_int = vec_all_gt (vec_uarg1, vec_uarg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 != uarg2 vec_all_gt ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+
+  result_int = vec_all_ge (vec_arg1, vec_arg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_all_ge ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1[0] = -234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+
+  result_int = vec_all_ge (vec_arg1, vec_arg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 != arg2 vec_all_ge ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  result_int = vec_all_ge (vec_uarg1, vec_uarg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 = uarg2 vec_all_ge ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1[0] = 234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+
+  result_int = vec_all_ge (vec_uarg1, vec_uarg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 != uarg2 vec_all_ge ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+
+  result_int = vec_any_eq (vec_arg1, vec_arg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_any_eq ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1[0] = -234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+
+  result_int = vec_any_eq (vec_arg1, vec_arg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 != arg2 vec_any_eq ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  result_int = vec_any_eq (vec_uarg1, vec_uarg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 = uarg2 vec_any_eq ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1[0] = 234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+
+  result_int = vec_any_eq (vec_uarg1, vec_uarg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 != uarg2 vec_any_eq ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+
+  result_int = vec_any_ne (vec_arg1, vec_arg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_any_ne ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1[0] = -234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+
+  result_int = vec_any_ne (vec_arg1, vec_arg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 != arg2 vec_any_ne ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  result_int = vec_any_ne (vec_uarg1, vec_uarg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 = uarg2 vec_any_ne ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1[0] = 234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+
+  result_int = vec_any_ne (vec_uarg1, vec_uarg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 != uarg2 vec_any_ne ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+
+  result_int = vec_any_lt (vec_arg1, vec_arg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_any_lt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1[0] = -234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+
+  result_int = vec_any_lt (vec_arg1, vec_arg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 != arg2 vec_any_lt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  result_int = vec_any_lt (vec_uarg1, vec_uarg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 = uarg2 vec_any_lt ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1[0] = 234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+
+  result_int = vec_any_lt (vec_uarg1, vec_uarg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 != uarg2 vec_any_lt ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+
+  result_int = vec_any_gt (vec_arg1, vec_arg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_any_gt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1[0] = -234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+
+  result_int = vec_any_gt (vec_arg1, vec_arg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 != arg2 vec_any_gt ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  result_int = vec_any_gt (vec_uarg1, vec_uarg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 = uarg2 vec_any_gt ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1[0] = 234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+
+  result_int = vec_any_gt (vec_uarg1, vec_uarg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 != uarg2 vec_any_gt ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+
+  result_int = vec_any_le (vec_arg1, vec_arg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_any_le ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1[0] = -234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+
+  result_int = vec_any_le (vec_arg1, vec_arg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 != arg2 vec_any_le ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  result_int = vec_any_le (vec_uarg1, vec_uarg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 = uarg2 vec_any_le ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1[0] = 234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+
+  result_int = vec_any_le (vec_uarg1, vec_uarg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 != uarg2 vec_any_le ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+  vec_arg1 = vec_arg2;
+
+  result_int = vec_any_ge (vec_arg1, vec_arg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 = arg2 vec_any_ge ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1[0] = -234;
+  vec_arg1[0] = (vec_arg1[0] << 64) | 4567;
+  vec_arg2[0] = 1234;
+  vec_arg2[0] = (vec_arg2[0] << 64) | 4567;
+
+  result_int = vec_any_ge (vec_arg1, vec_arg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: signed arg1 != arg2 vec_any_ge ( ");
+    print_i128(vec_arg1[0]);
+    printf(", ");
+    print_i128(vec_arg2[0]);
+    printf(") failed.\n\n");
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+  vec_uarg1 = vec_uarg2;
+
+  result_int = vec_any_ge (vec_uarg1, vec_uarg2);
+
+  if (!result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 = uarg2 vec_any_ge ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1[0] = 234;
+  vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567;
+  vec_uarg2[0] = 1234;
+  vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567;
+
+  result_int = vec_any_ge (vec_uarg1, vec_uarg2);
+
+  if (result_int) {
+#if DEBUG
+    printf("ERROR: unsigned uarg1 != uarg2 vec_any_gt ( ");
+    print_i128(vec_uarg1[0]);
+    printf(", ");
+    print_i128(vec_uarg2[0]);
+    printf(") failed.\n\n");
+#else
+    abort();
+#endif
+  }
+#endif
+
+  /* Vector multiply Even and Odd tests */
+  vec_arg1_di[0] = 200;
+  vec_arg1_di[1] = 400;
+  vec_arg2_di[0] = 1234;
+  vec_arg2_di[1] = 4567;
+  expected_result = vec_arg1_di[0] * vec_arg2_di[0];
+
+  vec_result = vec_mule (vec_arg1_di, vec_arg2_di);
+
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_mule (signed, signed) failed.\n");
+    printf(" vec_arg1_di[0] = %lld\n", vec_arg1_di[0]);
+    printf(" vec_arg2_di[0] = %lld\n", vec_arg2_di[0]);
+    printf("Result = ");
+    print_i128(vec_result[0]);
+    printf("\nExpected Result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_arg1_di[0] = -200;
+  vec_arg1_di[1] = -400;
+  vec_arg2_di[0] = 1234;
+  vec_arg2_di[1] = 4567;
+  expected_result = vec_arg1_di[1] * vec_arg2_di[1];
+
+  vec_result = vec_mulo (vec_arg1_di, vec_arg2_di);
+
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_mulo (signed, signed) failed.\n");
+    printf(" vec_arg1_di[1] = %lld\n", vec_arg1_di[1]);
+    printf(" vec_arg2_di[1] = %lld\n", vec_arg2_di[1]);
+    printf("Result = ");
+    print_i128(vec_result[0]);
+    printf("\nExpected Result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1_di[0] = 200;
+  vec_uarg1_di[1] = 400;
+  vec_uarg2_di[0] = 1234;
+  vec_uarg2_di[1] = 4567;
+  uexpected_result = vec_uarg1_di[0] * vec_uarg2_di[0];
+
+  vec_uresult = vec_mule (vec_uarg1_di, vec_uarg2_di);
+
+  if (vec_uresult[0] != uexpected_result) {
+#if DEBUG
+    printf("ERROR: vec_mule (unsigned, unsigned) failed.\n");
+    printf(" vec_uarg1_di[1] = %lld\n", vec_uarg1_di[1]);
+    printf(" vec_uarg2_di[1] = %lld\n", vec_uarg2_di[1]);
+    printf("Result = ");
+    print_i128(vec_uresult[0]);
+    printf("\nExpected Result = ");
+    print_i128(uexpected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+  
+  vec_uarg1_di[0] = 200;
+  vec_uarg1_di[1] = 400;
+  vec_uarg2_di[0] = 1234;
+  vec_uarg2_di[1] = 4567;
+  uexpected_result = vec_uarg1_di[1] * vec_uarg2_di[1];
+
+  vec_uresult = vec_mulo (vec_uarg1_di, vec_uarg2_di);
+
+  if (vec_uresult[0] != uexpected_result) {
+#if DEBUG
+    printf("ERROR: vec_mulo (unsigned, unsigned) failed.\n");
+    printf(" vec_uarg1_di[0] = %lld\n", vec_uarg1_di[0]);
+    printf(" vec_uarg2_di[0] = %lld\n", vec_uarg2_di[0]);
+    printf("Result = ");
+    print_i128(vec_uresult[0]);
+    printf("\nExpected Result = ");
+    print_i128(uexpected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  /* Vector Multiply Longword */
+  vec_arg1_di[0] = 100;
+  vec_arg1_di[1] = -123456;
+
+  vec_arg2_di[0] = 123;
+  vec_arg2_di[1] = 1000;
+
+  vec_expected_result_di[0] = 12300;
+  vec_expected_result_di[1] = -123456000;
+
+  vec_result_di = vec_arg1_di * vec_arg2_di;
+
+  for (i = 0; i<2; i++) {
+    if (vec_result_di[i] != vec_expected_result_di[i]) {
+#if DEBUG
+      printf("ERROR: vector multipy [%d] ((long long) %lld) =  ", i,
+	     vec_result_di[i]);
+      printf("\n does not match expected_result [%d] = ((long long) %lld)", i,
+	     vec_expected_result_di[i]);
+      printf("\n\n");
+#else
+      abort();
+#endif
+    }
+  }
+
+  /* Vector Divide Quadword */
+  vec_arg1[0] = -12345678;
+  vec_arg2[0] = 2;
+  expected_result = -6172839;
+
+  vec_result = vec_div (vec_arg1, vec_arg2);
+
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_div (signed, signed) failed.\n");
+    printf("vec_arg1[0] = ");
+    print_i128(vec_arg1[0]);
+    printf("\nvec_arg2[0] = ");
+    print_i128(vec_arg2[0]);
+    printf("\nResult = ");
+    print_i128(vec_result[0]);
+    printf("\nExpected result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 24680;
+  vec_uarg2[0] = 4;
+  uexpected_result = 6170;
+
+  vec_uresult = vec_div (vec_uarg1, vec_uarg2);
+
+  if (vec_uresult[0] != uexpected_result) {
+#if DEBUG
+    printf("ERROR: vec_div (unsigned, unsigned) failed.\n");
+    printf("vec_uarg1[0] = ");
+    print_i128(vec_uarg1[0]);
+    printf("\nvec_uarg2[0] = ");
+    print_i128(vec_uarg2[0]);
+    printf("\nResult = ");
+    print_i128(vec_uresult[0]);
+    printf("\nExpected result = ");
+    print_i128(uexpected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  /* Vector Divide Extended Quadword */
+  vec_arg1[0] = -20;        // has 128-bit of zero concatenated onto it
+  vec_arg2[0] = 0x2000000000000000;
+  vec_arg2[0] = vec_arg2[0] << 64;
+  expected_result = -160;
+
+  vec_result = vec_dive (vec_arg1, vec_arg2);
+
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_dive (signed, signed) failed.\n");
+    printf("vec_arg1[0] = ");
+    print_i128(vec_arg1[0]);
+    printf("\nvec_arg2[0] = ");
+    print_i128(vec_arg2[0]);
+    printf("\nResult = ");
+    print_i128(vec_result[0]);
+    printf("\nExpected result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 20;        // has 128-bit of zero concatenated onto it
+  vec_uarg2[0] = 0x4000000000000000;
+  vec_uarg2[0] = vec_uarg2[0] << 64;
+  uexpected_result = 80;
+
+  vec_uresult = vec_dive (vec_uarg1, vec_uarg2);
+
+  if (vec_uresult[0] != uexpected_result) {
+#if DEBUG
+    printf("ERROR: vec_dive (unsigned, unsigned) failed.\n");
+    printf("vec_uarg1[0] = ");
+    print_i128(vec_uarg1[0]);
+    printf("\nvec_uarg2[0] = ");
+    print_i128(vec_uarg2[0]);
+    printf("\nResult = ");
+    print_i128(vec_uresult[0]);
+    printf("\nExpected result = ");
+    print_i128(uexpected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  /* Vector modulo quad word  */
+  vec_arg1[0] = -12345675;
+  vec_arg2[0] = 2;
+  expected_result = -1;
+
+  vec_result = vec_mod (vec_arg1, vec_arg2);
+
+  if (vec_result[0] != expected_result) {
+#if DEBUG
+    printf("ERROR: vec_mod (signed, signed) failed.\n");
+    printf("vec_arg1[0] = ");
+    print_i128(vec_arg1[0]);
+    printf("\nvec_arg2[0] = ");
+    print_i128(vec_arg2[0]);
+    printf("\nResult = ");
+    print_i128(vec_result[0]);
+    printf("\nExpected result = ");
+    print_i128(expected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  vec_uarg1[0] = 24685;
+  vec_uarg2[0] = 4;
+  uexpected_result = 1;
+
+  vec_uresult = vec_mod (vec_uarg1, vec_uarg2);
+
+  if (vec_uresult[0] != uexpected_result) {
+#if DEBUG
+    printf("ERROR: vec_mod (unsigned, unsigned) failed.\n");
+    printf("vec_uarg1[0] = ");
+    print_i128(vec_uarg1[0]);
+    printf("\nvec_uarg2[0] = ");
+    print_i128(vec_uarg2[0]);
+    printf("\nResult = ");
+    print_i128(vec_uresult[0]);
+    printf("\nExpected result = ");
+    print_i128(uexpected_result);
+    printf("\n\n");
+#else
+    abort();
+#endif
+  }
+
+  return 0;
+}