[2/5,ver4] RS6000: Add 128-bit Integer Operations

Message ID	c2f5e177527fcd8f7ba8b806032aa77b245a74ee.camel@us.ibm.com
State	New
Headers	show Return-Path: <gcc-patches-bounces@gcc.gnu.org> DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org 994393890434 Message-ID: <c2f5e177527fcd8f7ba8b806032aa77b245a74ee.camel@us.ibm.com> Subject: [PATCH 2/5 ver4] RS6000: Add 128-bit Integer Operations To: Segher Boessenkool <segher@kernel.crashing.org>, will schmidt <will_schmidt@vnet.ibm.com>, cel@us.ibm.com Date: Mon, 26 Apr 2021 09:36:12 -0700 In-Reply-To: <d660f874049b7fdb338ff33478c56b2259828ab1.camel@us.ibm.com> References: <d660f874049b7fdb338ff33478c56b2259828ab1.camel@us.ibm.com> Content-Type: text/plain; charset="UTF-8" Mime-Version: 1.0 Content-Transfer-Encoding: 7bit Precedence: list From: Carl Love via Gcc-patches <gcc-patches@gcc.gnu.org> Reply-To: Carl Love <cel@us.ibm.com> Cc: Peter Bergner <bergner@vnet.ibm.com>, gcc-patches@gcc.gnu.org, dje.gcc@gmail.com Errors-To: gcc-patches-bounces@gcc.gnu.org Sender: "Gcc-patches" <gcc-patches-bounces@gcc.gnu.org>
Series	RS6000: Add 128-bit Integer Operations \| expand [0/5,ver4] RS6000: Add 128-bit Integer Operations [1/5,ver4] RS6000: Add 128-bit Integer Operations [2/5,ver4] RS6000: Add 128-bit Integer Operations [3/5,ver4] RS6000: Add TI to TD (128-bit DFP) and TD to TI support [4/5,ver4] RS6000, Add test 128-bit shifts for just the int128 type. [5/5,ver4] RS6000: Conversions between 128-bit integer and floating point values.

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 961621a0841..314695a43ca 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -715,6 +715,9 @@ __altivec_scalar_pred(vec_any_nle, #define vec_step(x) __builtin_vec_step (* (__typeof__ (x) *) 0) #ifdef _ARCH_PWR10 +#define vec_dive __builtin_vec_dive +#define vec_mod __builtin_vec_mod + /* May modify these macro definitions if future capabilities overload with support for different vector argument and result types. */ #define vec_cntlzm(a, b) __builtin_altivec_vclzdm (a, b) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 97dc9d2bda9..c4c82b33f8d 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -39,12 +39,16 @@ UNSPEC_VMULESH UNSPEC_VMULEUW UNSPEC_VMULESW + UNSPEC_VMULEUD + UNSPEC_VMULESD UNSPEC_VMULOUB UNSPEC_VMULOSB UNSPEC_VMULOUH UNSPEC_VMULOSH UNSPEC_VMULOUW UNSPEC_VMULOSW + UNSPEC_VMULOUD + UNSPEC_VMULOSD UNSPEC_VPKPX UNSPEC_VPACK_SIGN_SIGN_SAT UNSPEC_VPACK_SIGN_UNS_SAT @@ -627,6 +631,14 @@ "vcmpequ<VI_char> %0,%1,%2" [(set_attr "type" "veccmpfx")]) +(define_insn "altivec_eqv1ti" + [(set (match_operand:V1TI 0 "altivec_register_operand" "=v") + (eq:V1TI (match_operand:V1TI 1 "altivec_register_operand" "v") + (match_operand:V1TI 2 "altivec_register_operand" "v")))] + "TARGET_POWER10" + "vcmpequq %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + (define_insn "*altivec_gt<mode>" [(set (match_operand:VI2 0 "altivec_register_operand" "=v") (gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") @@ -635,6 +647,14 @@ "vcmpgts<VI_char> %0,%1,%2" [(set_attr "type" "veccmpfx")]) +(define_insn "*altivec_gtv1ti" + [(set (match_operand:V1TI 0 "altivec_register_operand" "=v") + (gt:V1TI (match_operand:V1TI 1 "altivec_register_operand" "v") + (match_operand:V1TI 2 "altivec_register_operand" "v")))] + "TARGET_POWER10" + "vcmpgtsq %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + (define_insn "*altivec_gtu<mode>" [(set (match_operand:VI2 0 "altivec_register_operand" "=v") (gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") @@ -643,6 +663,14 @@ "vcmpgtu<VI_char> %0,%1,%2" [(set_attr "type" "veccmpfx")]) +(define_insn "*altivec_gtuv1ti" + [(set (match_operand:V1TI 0 "altivec_register_operand" "=v") + (gtu:V1TI (match_operand:V1TI 1 "altivec_register_operand" "v") + (match_operand:V1TI 2 "altivec_register_operand" "v")))] + "TARGET_POWER10" + "vcmpgtuq %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + (define_insn "*altivec_eqv4sf" [(set (match_operand:V4SF 0 "altivec_register_operand" "=v") (eq:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v") @@ -1693,6 +1721,19 @@ DONE; }) +(define_expand "vec_widen_umult_even_v2di" + [(use (match_operand:V1TI 0 "register_operand")) + (use (match_operand:V2DI 1 "register_operand")) + (use (match_operand:V2DI 2 "register_operand"))] + "TARGET_POWER10" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vmuleud (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuloud (operands[0], operands[1], operands[2])); + DONE; +}) + (define_expand "vec_widen_smult_even_v4si" [(use (match_operand:V2DI 0 "register_operand")) (use (match_operand:V4SI 1 "register_operand")) @@ -1706,6 +1747,19 @@ DONE; }) +(define_expand "vec_widen_smult_even_v2di" + [(use (match_operand:V1TI 0 "register_operand")) + (use (match_operand:V2DI 1 "register_operand")) + (use (match_operand:V2DI 2 "register_operand"))] + "TARGET_POWER10" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vmulesd (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulosd (operands[0], operands[1], operands[2])); + DONE; +}) + (define_expand "vec_widen_umult_odd_v16qi" [(use (match_operand:V8HI 0 "register_operand")) (use (match_operand:V16QI 1 "register_operand")) @@ -1771,6 +1825,19 @@ DONE; }) +(define_expand "vec_widen_umult_odd_v2di" + [(use (match_operand:V1TI 0 "register_operand")) + (use (match_operand:V2DI 1 "register_operand")) + (use (match_operand:V2DI 2 "register_operand"))] + "TARGET_POWER10" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vmuloud (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuleud (operands[0], operands[1], operands[2])); + DONE; +}) + (define_expand "vec_widen_smult_odd_v4si" [(use (match_operand:V2DI 0 "register_operand")) (use (match_operand:V4SI 1 "register_operand")) @@ -1784,6 +1851,19 @@ DONE; }) +(define_expand "vec_widen_smult_odd_v2di" + [(use (match_operand:V1TI 0 "register_operand")) + (use (match_operand:V2DI 1 "register_operand")) + (use (match_operand:V2DI 2 "register_operand"))] + "TARGET_POWER10" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vmulosd (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulesd (operands[0], operands[1], operands[2])); + DONE; +}) + (define_insn "altivec_vmuleub" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") @@ -1865,6 +1945,15 @@ "vmuleuw %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmuleud" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v")] + UNSPEC_VMULEUD))] + "TARGET_POWER10" + "vmuleud %0,%1,%2" + [(set_attr "type" "veccomplex")]) + (define_insn "altivec_vmulouw" [(set (match_operand:V2DI 0 "register_operand" "=v") (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") @@ -1874,6 +1963,15 @@ "vmulouw %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmuloud" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v")] + UNSPEC_VMULOUD))] + "TARGET_POWER10" + "vmuloud %0,%1,%2" + [(set_attr "type" "veccomplex")]) + (define_insn "altivec_vmulesw" [(set (match_operand:V2DI 0 "register_operand" "=v") (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") @@ -1883,6 +1981,15 @@ "vmulesw %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmulesd" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v")] + UNSPEC_VMULESD))] + "TARGET_POWER10" + "vmulesd %0,%1,%2" + [(set_attr "type" "veccomplex")]) + (define_insn "altivec_vmulosw" [(set (match_operand:V2DI 0 "register_operand" "=v") (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") @@ -1892,6 +1999,15 @@ "vmulosw %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmulosd" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v")] + UNSPEC_VMULOSD))] + "TARGET_POWER10" + "vmulosd %0,%1,%2" + [(set_attr "type" "veccomplex")]) + ;; Vector pack/unpack (define_insn "altivec_vpkpx" [(set (match_operand:V8HI 0 "register_operand" "=v") @@ -1985,6 +2101,15 @@ "vrl<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) +(define_insn "altivec_vrlq" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (rotate:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" +;; rotate amount in needs to be in bits[57:63] of operand2. + "vrlq %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "altivec_vrl<VI_char>mi" [(set (match_operand:VIlong 0 "register_operand" "=v") (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "v") @@ -1995,6 +2120,34 @@ "vrl<VI_char>mi %0,%1,%3" [(set_attr "type" "veclogical")]) +(define_expand "altivec_vrlqmi" + [(set (match_operand:V1TI 0 "vsx_register_operand") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand") + (match_operand:V1TI 2 "vsx_register_operand") + (match_operand:V1TI 3 "vsx_register_operand")] + UNSPEC_VRLMI))] + "TARGET_POWER10" +{ + /* Mask bit begin, end fields need to be in bits [41:55] of 128-bit operand2. + Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ + rtx tmp = gen_reg_rtx (V1TImode); + + emit_insn (gen_xxswapd_v1ti (tmp, operands[3])); + emit_insn (gen_altivec_vrlqmi_inst (operands[0], operands[1], operands[2], + tmp)); + DONE; +}) + +(define_insn "altivec_vrlqmi_inst" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "0") + (match_operand:V1TI 3 "vsx_register_operand" "v")] + UNSPEC_VRLMI))] + "TARGET_POWER10" + "vrlqmi %0,%1,%3" + [(set_attr "type" "veclogical")]) + (define_insn "altivec_vrl<VI_char>nm" [(set (match_operand:VIlong 0 "register_operand" "=v") (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "v") @@ -2004,6 +2157,31 @@ "vrl<VI_char>nm %0,%1,%2" [(set_attr "type" "veclogical")]) +(define_expand "altivec_vrlqnm" + [(set (match_operand:V1TI 0 "vsx_register_operand") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand") + (match_operand:V1TI 2 "vsx_register_operand")] + UNSPEC_VRLNM))] + "TARGET_POWER10" +{ + /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ + rtx tmp = gen_reg_rtx (V1TImode); + + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn (gen_altivec_vrlqnm_inst (operands[0], operands[1], tmp)); + DONE; +}) + +(define_insn "altivec_vrlqnm_inst" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VRLNM))] + "TARGET_POWER10" + ;; rotate and mask bits need to be in upper 64-bits of operand2. + "vrlqnm %0,%1,%2" + [(set_attr "type" "veclogical")]) + (define_insn "altivec_vsl" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") @@ -2048,6 +2226,15 @@ "vsl<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) +(define_insn "altivec_vslq" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" + /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */ + "vslq %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "*altivec_vsr<VI_char>" [(set (match_operand:VI2 0 "register_operand" "=v") (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") @@ -2056,6 +2243,15 @@ "vsr<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) +(define_insn "altivec_vsrq" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" + /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */ + "vsrq %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "*altivec_vsra<VI_char>" [(set (match_operand:VI2 0 "register_operand" "=v") (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") @@ -2064,6 +2260,15 @@ "vsra<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) +(define_insn "altivec_vsraq" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (ashiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" + /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */ + "vsraq %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "altivec_vsr" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") @@ -2624,6 +2829,18 @@ "vcmpequ<VI_char>. %0,%1,%2" [(set_attr "type" "veccmpfx")]) +(define_insn "altivec_vcmpequt_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V1TI 1 "altivec_register_operand" "v") + (match_operand:V1TI 2 "altivec_register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:V1TI 0 "altivec_register_operand" "=v") + (eq:V1TI (match_dup 1) + (match_dup 2)))] + "TARGET_POWER10" + "vcmpequq. %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + (define_insn "*altivec_vcmpgts<VI_char>_p" [(set (reg:CC CR6_REGNO) (unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v") @@ -2636,6 +2853,18 @@ "vcmpgts<VI_char>. %0,%1,%2" [(set_attr "type" "veccmpfx")]) +(define_insn "*altivec_vcmpgtst_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gt:CC (match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:V1TI 0 "register_operand" "=v") + (gt:V1TI (match_dup 1) + (match_dup 2)))] + "TARGET_POWER10" + "vcmpgtsq. %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + (define_insn "*altivec_vcmpgtu<VI_char>_p" [(set (reg:CC CR6_REGNO) (unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v") @@ -2648,6 +2877,18 @@ "vcmpgtu<VI_char>. %0,%1,%2" [(set_attr "type" "veccmpfx")]) +(define_insn "*altivec_vcmpgtut_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gtu:CC (match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:V1TI 0 "register_operand" "=v") + (gtu:V1TI (match_dup 1) + (match_dup 2)))] + "TARGET_POWER10" + "vcmpgtuq. %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + (define_insn "*altivec_vcmpeqfp_p" [(set (reg:CC CR6_REGNO) (unspec:CC [(eq:CC (match_operand:V4SF 1 "register_operand" "v") diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 609bebdfd74..dba22825b79 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1269,6 +1269,15 @@ | RS6000_BTC_TERNARY), \ CODE_FOR_ ## ICODE) /* ICODE */ +/* See the comment on BU_ALTIVEC_P. */ +#define BU_P10V_AV_P(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_P (P10V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P10, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_ ## ICODE) /* ICODE */ + #define BU_P10V_AV_X(ENUM, NAME, ATTR) \ RS6000_BUILTIN_X (P10_BUILTIN_ ## ENUM, /* ENUM */ \ "__builtin_altivec_" NAME, /* NAME */ \ @@ -2880,6 +2889,10 @@ BU_P9_OVERLOAD_2 (CMPRB2, "byte_in_either_range") BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set") /* Builtins for scalar instructions added in ISA 3.1 (power10). */ +BU_P10V_AV_P (VCMPEQUT_P, "vcmpequt_p", CONST, vector_eq_v1ti_p) +BU_P10V_AV_P (VCMPGTST_P, "vcmpgtst_p", CONST, vector_gt_v1ti_p) +BU_P10V_AV_P (VCMPGTUT_P, "vcmpgtut_p", CONST, vector_gtu_v1ti_p) + BU_P10_POWERPC64_MISC_2 (CFUGED, "cfuged", CONST, cfuged) BU_P10_POWERPC64_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm) BU_P10_POWERPC64_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm) @@ -2900,7 +2913,36 @@ BU_P10V_VSX_2 (XXGENPCVM_V16QI, "xxgenpcvm_v16qi", CONST, xxgenpcvm_v16qi) BU_P10V_VSX_2 (XXGENPCVM_V8HI, "xxgenpcvm_v8hi", CONST, xxgenpcvm_v8hi) BU_P10V_VSX_2 (XXGENPCVM_V4SI, "xxgenpcvm_v4si", CONST, xxgenpcvm_v4si) BU_P10V_VSX_2 (XXGENPCVM_V2DI, "xxgenpcvm_v2di", CONST, xxgenpcvm_v2di) - +BU_P10V_AV_2 (VCMPGTUT, "vcmpgtut", CONST, vector_gtuv1ti) +BU_P10V_AV_2 (VCMPGTST, "vcmpgtst", CONST, vector_gtv1ti) +BU_P10V_AV_2 (VCMPEQUT, "vcmpequt", CONST, eqvv1ti3) +BU_P10V_AV_2 (CMPNET, "vcmpnet", CONST, vcmpnet) +BU_P10V_AV_2 (CMPGE_1TI, "cmpge_1ti", CONST, vector_nltv1ti) +BU_P10V_AV_2 (CMPGE_U1TI, "cmpge_u1ti", CONST, vector_nltuv1ti) +BU_P10V_AV_2 (CMPLE_1TI, "cmple_1ti", CONST, vector_ngtv1ti) +BU_P10V_AV_2 (CMPLE_U1TI, "cmple_u1ti", CONST, vector_ngtuv1ti) +BU_P10V_AV_2 (VNOR_V1TI_UNS, "vnor_v1ti_uns",CONST, norv1ti3) +BU_P10V_AV_2 (VNOR_V1TI, "vnor_v1ti", CONST, norv1ti3) +BU_P10V_AV_2 (VCMPNET_P, "vcmpnet_p", CONST, vector_ne_v1ti_p) +BU_P10V_AV_2 (VCMPAET_P, "vcmpaet_p", CONST, vector_ae_v1ti_p) + +BU_P10V_AV_2 (VMULEUD, "vmuleud", CONST, vec_widen_umult_even_v2di) +BU_P10V_AV_2 (VMULESD, "vmulesd", CONST, vec_widen_smult_even_v2di) +BU_P10V_AV_2 (VMULOUD, "vmuloud", CONST, vec_widen_umult_odd_v2di) +BU_P10V_AV_2 (VMULOSD, "vmulosd", CONST, vec_widen_smult_odd_v2di) +BU_P10V_AV_2 (VRLQ, "vrlq", CONST, vrotlv1ti3) +BU_P10V_AV_2 (VSLQ, "vslq", CONST, vashlv1ti3) +BU_P10V_AV_2 (VSRQ, "vsrq", CONST, vlshrv1ti3) +BU_P10V_AV_2 (VSRAQ, "vsraq", CONST, vashrv1ti3) +BU_P10V_AV_2 (VRLQNM, "vrlqnm", CONST, altivec_vrlqnm) +BU_P10V_AV_2 (DIV_V1TI, "div_1ti", CONST, vsx_div_v1ti) +BU_P10V_AV_2 (UDIV_V1TI, "udiv_1ti", CONST, vsx_udiv_v1ti) +BU_P10V_AV_2 (DIVES_V1TI, "dives", CONST, vsx_dives_v1ti) +BU_P10V_AV_2 (DIVEU_V1TI, "diveu", CONST, vsx_diveu_v1ti) +BU_P10V_AV_2 (MODS_V1TI, "mods", CONST, vsx_mods_v1ti) +BU_P10V_AV_2 (MODU_V1TI, "modu", CONST, vsx_modu_v1ti) + +BU_P10V_AV_3 (VRLQMI, "vrlqmi", CONST, altivec_vrlqmi) BU_P10V_AV_3 (VEXTRACTBL, "vextdubvlx", CONST, vextractlv16qi) BU_P10V_AV_3 (VEXTRACTHL, "vextduhvlx", CONST, vextractlv8hi) BU_P10V_AV_3 (VEXTRACTWL, "vextduwvlx", CONST, vextractlv4si) @@ -3025,6 +3067,10 @@ BU_P10_OVERLOAD_2 (CLRR, "clrr") BU_P10_OVERLOAD_2 (GNB, "gnb") BU_P10_OVERLOAD_4 (XXEVAL, "xxeval") BU_P10_OVERLOAD_2 (XXGENPCVM, "xxgenpcvm") +BU_P10_OVERLOAD_2 (VRLQ, "vrlq") +BU_P10_OVERLOAD_2 (VSLQ, "vslq") +BU_P10_OVERLOAD_2 (VSRQ, "vsrq") +BU_P10_OVERLOAD_2 (VSRAQ, "vsraq") BU_P10_OVERLOAD_3 (EXTRACTL, "extractl") BU_P10_OVERLOAD_3 (EXTRACTH, "extracth") diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index f5676255387..32a8af92458 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -843,6 +843,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, P10V_BUILTIN_VCMPEQUT, + RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, P10V_BUILTIN_VCMPEQUT, + RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP, @@ -889,6 +893,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0}, + + { ALTIVEC_BUILTIN_VEC_CMPGE, P10V_BUILTIN_CMPGE_1TI, + RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, P10V_BUILTIN_CMPGE_U1TI, + RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0}, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUB, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSB, @@ -903,8 +913,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, P10V_BUILTIN_VCMPGTUT, + RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, P10V_BUILTIN_VCMPGTST, + RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP, @@ -947,6 +961,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, P10V_BUILTIN_CMPLE_1TI, + RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, P10V_BUILTIN_CMPLE_U1TI, + RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0}, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUB, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSB, @@ -1086,6 +1105,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIVU_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIV_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_UDIV_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVES_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, @@ -1097,6 +1121,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVEU_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVES_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVEU_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODS_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, @@ -1108,6 +1137,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODU_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODS_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODU_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { VSX_BUILTIN_VEC_DOUBLE, VSX_BUILTIN_XVCVSXDDP, RS6000_BTI_V2DF, RS6000_BTI_V2DI, 0, 0 }, @@ -1973,6 +2007,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_MULE, P8V_BUILTIN_VMULEUW, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULE, P10V_BUILTIN_VMULESD, + RS6000_BTI_V1TI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULE, P10V_BUILTIN_VMULEUD, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VMULEUB, ALTIVEC_BUILTIN_VMULEUB, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_VMULESB, ALTIVEC_BUILTIN_VMULESB, @@ -1996,6 +2035,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_MULO, P8V_BUILTIN_VMULOUW, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULO, P10V_BUILTIN_VMULOSD, + RS6000_BTI_V1TI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULO, P10V_BUILTIN_VMULOUD, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH, RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, { ALTIVEC_BUILTIN_VEC_VMULOSH, ALTIVEC_BUILTIN_VMULOSH, @@ -2038,6 +2082,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_bool_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI_UNS, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI_UNS, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_bool_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI_UNS, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR_V2DI_UNS, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR_V2DI_UNS, @@ -2299,6 +2353,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, P10V_BUILTIN_VRLQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, P10V_BUILTIN_VRLQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW, @@ -2317,12 +2376,23 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLDMI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { P9V_BUILTIN_VEC_RLMI, P10V_BUILTIN_VRLQMI, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI }, + { P9V_BUILTIN_VEC_RLMI, P10V_BUILTIN_VRLQMI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLWNM, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLDNM, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P9V_BUILTIN_VEC_RLNM, P10V_BUILTIN_VRLQNM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + { P9V_BUILTIN_VEC_RLNM, P10V_BUILTIN_VRLQNM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB, @@ -2339,6 +2409,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, P10V_BUILTIN_VSLQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, P10V_BUILTIN_VSLQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP, @@ -2535,6 +2610,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, P10V_BUILTIN_VSRQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, P10V_BUILTIN_VSRQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW, @@ -2563,6 +2643,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, P10V_BUILTIN_VSRAQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, P10V_BUILTIN_VSRAQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW, @@ -4180,12 +4265,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P10V_BUILTIN_VCMPGTUT_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P10V_BUILTIN_VCMPGTST_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, VSX_BUILTIN_XVCMPGTDP_P, @@ -4250,6 +4339,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P10V_BUILTIN_VCMPEQUT_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P10V_BUILTIN_VCMPEQUT_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P, @@ -4301,12 +4394,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P10V_BUILTIN_VCMPGTUT_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P10V_BUILTIN_VCMPGTST_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P, @@ -4955,6 +5052,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_CMPNE, P9V_BUILTIN_CMPNEW, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPNE, P10V_BUILTIN_CMPNET, + RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, + RS6000_BTI_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPNE, P10V_BUILTIN_CMPNET, + RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, /* The following 2 entries have been deprecated. */ { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEB_P, @@ -5055,6 +5158,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNED_P, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P10V_BUILTIN_VCMPNET_P, + RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P10V_BUILTIN_VCMPNET_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEFP_P, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, @@ -5160,7 +5267,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAED_P, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, - + { P9V_BUILTIN_VEC_VCMPAE_P, P10V_BUILTIN_VCMPAET_P, + RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P10V_BUILTIN_VCMPAET_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEFP_P, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEDP_P, @@ -12577,12 +12687,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case ALTIVEC_BUILTIN_VCMPEQUH: case ALTIVEC_BUILTIN_VCMPEQUW: case P8V_BUILTIN_VCMPEQUD: + case P10V_BUILTIN_VCMPEQUT: fold_compare_helper (gsi, EQ_EXPR, stmt); return true; case P9V_BUILTIN_CMPNEB: case P9V_BUILTIN_CMPNEH: case P9V_BUILTIN_CMPNEW: + case P10V_BUILTIN_CMPNET: fold_compare_helper (gsi, NE_EXPR, stmt); return true; @@ -12594,6 +12706,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case VSX_BUILTIN_CMPGE_U4SI: case VSX_BUILTIN_CMPGE_2DI: case VSX_BUILTIN_CMPGE_U2DI: + case P10V_BUILTIN_CMPGE_1TI: + case P10V_BUILTIN_CMPGE_U1TI: fold_compare_helper (gsi, GE_EXPR, stmt); return true; @@ -12605,6 +12719,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case ALTIVEC_BUILTIN_VCMPGTUW: case P8V_BUILTIN_VCMPGTUD: case P8V_BUILTIN_VCMPGTSD: + case P10V_BUILTIN_VCMPGTUT: + case P10V_BUILTIN_VCMPGTST: fold_compare_helper (gsi, GT_EXPR, stmt); return true; @@ -12616,6 +12732,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case VSX_BUILTIN_CMPLE_U4SI: case VSX_BUILTIN_CMPLE_2DI: case VSX_BUILTIN_CMPLE_U2DI: + case P10V_BUILTIN_CMPLE_1TI: + case P10V_BUILTIN_CMPLE_U1TI: fold_compare_helper (gsi, LE_EXPR, stmt); return true; @@ -13343,6 +13461,8 @@ rs6000_init_builtins (void) ? "__vector __bool long" : "__vector __bool long long", bool_long_long_type_node, 2); + bool_V1TI_type_node = rs6000_vector_type ("__vector __bool __int128", + intTI_type_node, 1); pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel", pixel_type_node, 8); @@ -13540,6 +13660,10 @@ altivec_init_builtins (void) = build_function_type_list (integer_type_node, integer_type_node, V2DI_type_node, V2DI_type_node, NULL_TREE); + tree int_ftype_int_v1ti_v1ti + = build_function_type_list (integer_type_node, + integer_type_node, V1TI_type_node, + V1TI_type_node, NULL_TREE); tree void_ftype_v4si = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE); tree v8hi_ftype_void @@ -13907,6 +14031,9 @@ altivec_init_builtins (void) case E_VOIDmode: type = int_ftype_int_opaque_opaque; break; + case E_V1TImode: + type = int_ftype_int_v1ti_v1ti; + break; case E_V2DImode: type = int_ftype_int_v2di_v2di; break; @@ -14512,12 +14639,16 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, case P10V_BUILTIN_XXGENPCVM_V2DI: case P10V_BUILTIN_DIVEU_V4SI: case P10V_BUILTIN_DIVEU_V2DI: + case P10V_BUILTIN_DIVEU_V1TI: case P10V_BUILTIN_DIVU_V4SI: case P10V_BUILTIN_DIVU_V2DI: + case P10V_BUILTIN_MODU_V1TI: case P10V_BUILTIN_MODU_V2DI: case P10V_BUILTIN_MODU_V4SI: case P10V_BUILTIN_MULHU_V2DI: case P10V_BUILTIN_MULHU_V4SI: + case P10V_BUILTIN_VMULEUD: + case P10V_BUILTIN_VMULOUD: h.uns_p[0] = 1; h.uns_p[1] = 1; h.uns_p[2] = 1; @@ -14617,10 +14748,13 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, case VSX_BUILTIN_CMPGE_U8HI: case VSX_BUILTIN_CMPGE_U4SI: case VSX_BUILTIN_CMPGE_U2DI: + case P10V_BUILTIN_CMPGE_U1TI: case ALTIVEC_BUILTIN_VCMPGTUB: case ALTIVEC_BUILTIN_VCMPGTUH: case ALTIVEC_BUILTIN_VCMPGTUW: case P8V_BUILTIN_VCMPGTUD: + case P10V_BUILTIN_VCMPGTUT: + case P10V_BUILTIN_VCMPEQUT: h.uns_p[1] = 1; h.uns_p[2] = 1; break; diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 844fee88cf3..531a9c87243 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -20225,6 +20225,7 @@ rs6000_handle_altivec_attribute (tree *node, case 'b': switch (mode) { + case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break; case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break; case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break; case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break; diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 164d359b724..a7953fad965 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -2332,7 +2332,6 @@ extern int frame_pointer_needed; #define RS6000_BTM_MMA MASK_MMA /* ISA 3.1 MMA. */ #define RS6000_BTM_P10 MASK_POWER10 - #define RS6000_BTM_COMMON (RS6000_BTM_ALTIVEC \ | RS6000_BTM_VSX \ | RS6000_BTM_P8_VECTOR \ @@ -2445,6 +2444,7 @@ enum rs6000_builtin_type_index RS6000_BTI_bool_V8HI, /* __vector __bool short */ RS6000_BTI_bool_V4SI, /* __vector __bool int */ RS6000_BTI_bool_V2DI, /* __vector __bool long */ + RS6000_BTI_bool_V1TI, /* __vector __bool 128-bit */ RS6000_BTI_pixel_V8HI, /* __vector __pixel */ RS6000_BTI_long, /* long_integer_type_node */ RS6000_BTI_unsigned_long, /* long_unsigned_type_node */ @@ -2498,6 +2498,7 @@ enum rs6000_builtin_type_index #define bool_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V8HI]) #define bool_V4SI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V4SI]) #define bool_V2DI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V2DI]) +#define bool_V1TI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V1TI]) #define pixel_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_pixel_V8HI]) #define long_long_integer_type_internal_node (rs6000_builtin_types[RS6000_BTI_long_long]) diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 3446b03d40d..55bbaa9c32f 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -685,6 +685,13 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +(define_expand "vector_gtv1ti" + [(set (match_operand:V1TI 0 "vlogical_operand") + (gt:V1TI (match_operand:V1TI 1 "vlogical_operand") + (match_operand:V1TI 2 "vlogical_operand")))] + "TARGET_POWER10" + "") + ; >= for integer vectors: swap operands and apply not-greater-than (define_expand "vector_nlt<mode>" [(set (match_operand:VEC_I 3 "vlogical_operand") @@ -697,6 +704,17 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) +(define_expand "vector_nltv1ti" + [(set (match_operand:V1TI 3 "vlogical_operand") + (gt:V1TI (match_operand:V1TI 2 "vlogical_operand") + (match_operand:V1TI 1 "vlogical_operand"))) + (set (match_operand:V1TI 0 "vlogical_operand") + (not:V1TI (match_dup 3)))] + "TARGET_POWER10" +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}) + (define_expand "vector_gtu<mode>" [(set (match_operand:VEC_I 0 "vint_operand") (gtu:VEC_I (match_operand:VEC_I 1 "vint_operand") @@ -704,6 +722,13 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +(define_expand "vector_gtuv1ti" + [(set (match_operand:V1TI 0 "altivec_register_operand") + (gtu:V1TI (match_operand:V1TI 1 "altivec_register_operand") + (match_operand:V1TI 2 "altivec_register_operand")))] + "TARGET_POWER10" + "") + ; >= for integer vectors: swap operands and apply not-greater-than (define_expand "vector_nltu<mode>" [(set (match_operand:VEC_I 3 "vlogical_operand") @@ -716,6 +741,17 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) +(define_expand "vector_nltuv1ti" + [(set (match_operand:V1TI 3 "vlogical_operand") + (gtu:V1TI (match_operand:V1TI 2 "vlogical_operand") + (match_operand:V1TI 1 "vlogical_operand"))) + (set (match_operand:V1TI 0 "vlogical_operand") + (not:V1TI (match_dup 3)))] + "TARGET_POWER10" +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}) + (define_expand "vector_geu<mode>" [(set (match_operand:VEC_I 0 "vint_operand") (geu:VEC_I (match_operand:VEC_I 1 "vint_operand") @@ -735,6 +771,17 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) +(define_expand "vector_ngtv1ti" + [(set (match_operand:V1TI 3 "vlogical_operand") + (gt:V1TI (match_operand:V1TI 1 "vlogical_operand") + (match_operand:V1TI 2 "vlogical_operand"))) + (set (match_operand:V1TI 0 "vlogical_operand") + (not:V1TI (match_dup 3)))] + "TARGET_POWER10" +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}) + (define_expand "vector_ngtu<mode>" [(set (match_operand:VEC_I 3 "vlogical_operand") (gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand") @@ -746,6 +793,17 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) +(define_expand "vector_ngtuv1ti" + [(set (match_operand:V1TI 3 "vlogical_operand") + (gtu:V1TI (match_operand:V1TI 1 "vlogical_operand") + (match_operand:V1TI 2 "vlogical_operand"))) + (set (match_operand:V1TI 0 "vlogical_operand") + (not:V1TI (match_dup 3)))] + "TARGET_POWER10" +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}) + ; There are 14 possible vector FP comparison operators, gt and eq of them have ; been expanded above, so just support 12 remaining operators here. @@ -894,6 +952,18 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +(define_expand "vector_eq_v1ti_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V1TI 1 "altivec_register_operand") + (match_operand:V1TI 2 "altivec_register_operand"))] + UNSPEC_PREDICATE)) + (set (match_operand:V1TI 0 "vlogical_operand") + (eq:V1TI (match_dup 1) + (match_dup 2)))])] + "TARGET_POWER10" + "") + ;; This expansion handles the V16QI, V8HI, and V4SI modes in the ;; implementation of the vec_all_ne built-in functions on Power9. (define_expand "vector_ne_<mode>_p" @@ -976,6 +1046,23 @@ operands[3] = gen_reg_rtx (V2DImode); }) +(define_expand "vector_ne_v1ti_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V1TI 1 "altivec_register_operand") + (match_operand:V1TI 2 "altivec_register_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (eq:V1TI (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0)))] + "TARGET_POWER10" +{ + operands[3] = gen_reg_rtx (V1TImode); +}) + ;; This expansion handles the V2DI mode in the implementation of the ;; vec_any_eq built-in function on Power9. ;; @@ -1002,6 +1089,26 @@ operands[3] = gen_reg_rtx (V2DImode); }) +(define_expand "vector_ae_v1ti_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V1TI 1 "altivec_register_operand") + (match_operand:V1TI 2 "altivec_register_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (eq:V1TI (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0))) + (set (match_dup 0) + (xor:SI (match_dup 0) + (const_int 1)))] + "TARGET_POWER10" +{ + operands[3] = gen_reg_rtx (V1TImode); +}) + ;; This expansion handles the V4SF and V2DF modes in the Power9 ;; implementation of the vec_all_ne built-in functions. Note that the ;; expansions for this pattern with these modes makes no use of power9- @@ -1061,6 +1168,18 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +(define_expand "vector_gt_v1ti_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gt:CC (match_operand:V1TI 1 "vlogical_operand") + (match_operand:V1TI 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_operand:V1TI 0 "vlogical_operand") + (gt:V1TI (match_dup 1) + (match_dup 2)))])] + "TARGET_POWER10" + "") + (define_expand "vector_ge_<mode>_p" [(parallel [(set (reg:CC CR6_REGNO) @@ -1085,6 +1204,18 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +(define_expand "vector_gtu_v1ti_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gtu:CC (match_operand:V1TI 1 "altivec_register_operand") + (match_operand:V1TI 2 "altivec_register_operand"))] + UNSPEC_PREDICATE)) + (set (match_operand:V1TI 0 "altivec_register_operand") + (gtu:V1TI (match_dup 1) + (match_dup 2)))])] + "TARGET_POWER10" + "") + ;; AltiVec/VSX predicates. ;; This expansion is triggered during expansion of predicate built-in @@ -1460,6 +1591,20 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +(define_expand "vrotlv1ti3" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (rotate:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ + rtx tmp = gen_reg_rtx (V1TImode); + + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp)); + DONE; +}) + ;; Expanders for rotatert to make use of vrotl (define_expand "vrotr<mode>3" [(set (match_operand:VEC_I 0 "vint_operand") @@ -1481,6 +1626,21 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +;; No immediate version of this 128-bit instruction +(define_expand "vashlv1ti3" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ + rtx tmp = gen_reg_rtx (V1TImode); + + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn (gen_altivec_vslq (operands[0], operands[1], tmp)); + DONE; +}) + ;; Expanders for logical shift right on each vector element (define_expand "vlshr<mode>3" [(set (match_operand:VEC_I 0 "vint_operand") @@ -1489,6 +1649,21 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +;; No immediate version of this 128-bit instruction +(define_expand "vlshrv1ti3" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */ + rtx tmp = gen_reg_rtx (V1TImode); + + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn (gen_altivec_vsrq (operands[0], operands[1], tmp)); + DONE; +}) + ;; Expanders for arithmetic shift right on each vector element (define_expand "vashr<mode>3" [(set (match_operand:VEC_I 0 "vint_operand") @@ -1496,6 +1671,22 @@ (match_operand:VEC_I 2 "vint_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") + +;; No immediate version of this 128-bit instruction +(define_expand "vashrv1ti3" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (ashiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */ + rtx tmp = gen_reg_rtx (V1TImode); + + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp)); + DONE; +}) + ;; Vector reduction expanders for VSX ; The (VEC_reduc:... diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index bcb92be2f5c..ba539549024 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -302,6 +302,12 @@ UNSPEC_VSX_XXSPLTD UNSPEC_VSX_DIVSD UNSPEC_VSX_DIVUD + UNSPEC_VSX_DIVSQ + UNSPEC_VSX_DIVUQ + UNSPEC_VSX_DIVESQ + UNSPEC_VSX_DIVEUQ + UNSPEC_VSX_MODSQ + UNSPEC_VSX_MODUQ UNSPEC_VSX_MULSD UNSPEC_VSX_SIGN_EXTEND UNSPEC_VSX_XVCVBF16SPN @@ -1781,6 +1787,61 @@ } [(set_attr "type" "div")]) +;; Vector integer signed/unsigned divide +(define_insn "vsx_div_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VSX_DIVSQ))] + "TARGET_POWER10" + "vdivsq %0,%1,%2" + [(set_attr "type" "div")]) + +(define_insn "vsx_udiv_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VSX_DIVUQ))] + "TARGET_POWER10" + "vdivuq %0,%1,%2" + [(set_attr "type" "div")]) + +(define_insn "vsx_dives_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VSX_DIVESQ))] + "TARGET_POWER10" + "vdivesq %0,%1,%2" + [(set_attr "type" "div")]) + +(define_insn "vsx_diveu_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VSX_DIVEUQ))] + "TARGET_POWER10" + "vdiveuq %0,%1,%2" + [(set_attr "type" "div")]) + +(define_insn "vsx_mods_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VSX_MODSQ))] + "TARGET_POWER10" + "vmodsq %0,%1,%2" + [(set_attr "type" "div")]) + +(define_insn "vsx_modu_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VSX_MODUQ))] + "TARGET_POWER10" + "vmoduq %0,%1,%2" + [(set_attr "type" "div")]) + ;; *tdiv* instruction returning the FG flag (define_expand "vsx_tdiv<mode>3_fg" [(set (match_dup 3) @@ -3126,6 +3187,21 @@ "xxpermdi %x0,%x1,%x1,2" [(set_attr "type" "vecperm")]) +;; Swap upper/lower 64-bit values in a 128-bit vector +(define_insn "xxswapd_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (subreg:V1TI + (vec_select:V2DI + (subreg:V2DI + (match_operand:V1TI 1 "vsx_register_operand" "v") 0 ) + (parallel [(const_int 1)(const_int 0)])) + 0))] + "TARGET_POWER10" +;; AIX does not support extended mnemonic xxswapd. Use the basic +;; mnemonic xxpermdi instead. + "xxpermdi %x0,%x1,%x1,2" + [(set_attr "type" "vecperm")]) + (define_insn "xxgenpcvm_<mode>_internal" [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa") (unspec:VSX_EXTRACT_I4 @@ -5525,6 +5601,19 @@ "vcmpneb %0,%1,%2" [(set_attr "type" "vecsimple")]) +;; Vector Compare Not Equal v1ti (specified/not+eq:) +(define_expand "vcmpnet" + [(set (match_operand:V1TI 0 "altivec_register_operand") + (not:V1TI + (eq:V1TI (match_operand:V1TI 1 "altivec_register_operand") + (match_operand:V1TI 2 "altivec_register_operand"))))] + "TARGET_POWER10" +{ + emit_insn (gen_eqvv1ti3 (operands[0], operands[1], operands[2])); + emit_insn (gen_one_cmplv1ti2 (operands[0], operands[0])); + DONE; +}) + ;; Vector Compare Not Equal or Zero Byte (define_insn "vcmpnezb" [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 3260f0639d2..d1c56edbaa8 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -20131,6 +20131,177 @@ Generate PCV from specified Mask size, as if implemented by the immediate value is either 0, 1, 2 or 3. @findex vec_genpcvm +@smallexample +@exdent vector unsigned __int128 vec_rl (vector unsigned __int128 A, + vector unsigned __int128 B); +@exdent vector signed __int128 vec_rl (vector signed __int128 A, + vector unsigned __int128 B); +@end smallexample + +Result value: Each element of R is obtained by rotating the corresponding element +of A left by the number of bits specified by the corresponding element of B. + + +@smallexample +@exdent vector unsigned __int128 vec_rlmi (vector unsigned __int128, + vector unsigned __int128, + vector unsigned __int128); +@exdent vector signed __int128 vec_rlmi (vector signed __int128, + vector signed __int128, + vector unsigned __int128); +@end smallexample + +Returns the result of rotating the first input and inserting it under mask +into the second input. The first bit in the mask, the last bit in the mask are +obtained from the two 7-bit fields bits [108:115] and bits [117:123] +respectively of the second input. The shift is obtained from the third input +in the 7-bit field [125:131] where all bits counted from zero at the left. + +@smallexample +@exdent vector unsigned __int128 vec_rlnm (vector unsigned __int128, + vector unsigned __int128, + vector unsigned __int128); +@exdent vector signed __int128 vec_rlnm (vector signed __int128, + vector unsigned __int128, + vector unsigned __int128); +@end smallexample + +Returns the result of rotating the first input and ANDing it with a mask. The +first bit in the mask and the last bit in the mask are obtained from the two +7-bit fields bits [117:123] and bits [125:131] respectively of the second +input. The shift is obtained from the third input in the 7-bit field bits +[125:131] where all bits counted from zero at the left. + +@smallexample +@exdent vector unsigned __int128 vec_sl(vector unsigned __int128 A, vector unsigned __int128 B); +@exdent vector signed __int128 vec_sl(vector signed __int128 A, vector unsigned __int128 B); +@end smallexample + +Result value: Each element of R is obtained by shifting the corresponding element of +A left by the number of bits specified by the corresponding element of B. + +@smallexample +@exdent vector unsigned __int128 vec_sr(vector unsigned __int128 A, vector unsigned __int128 B); +@exdent vector signed __int128 vec_sr(vector signed __int128 A, vector unsigned __int128 B); +@end smallexample + +Result value: Each element of R is obtained by shifting the corresponding element of +A right by the number of bits specified by the corresponding element of B. + +@smallexample +@exdent vector unsigned __int128 vec_sra(vector unsigned __int128 A, vector unsigned __int128 B); +@exdent vector signed __int128 vec_sra(vector signed __int128 A, vector unsigned __int128 B); +@end smallexample + +Result value: Each element of R is obtained by arithmetic shifting the corresponding +element of A right by the number of bits specified by the corresponding element of B. + +@smallexample +@exdent vector unsigned __int128 vec_mule (vector unsigned long long, + vector unsigned long long); +@exdent vector signed __int128 vec_mule (vector signed long long, + vector signed long long); +@end smallexample + +Returns a vector containing a 128-bit integer result of multiplying the even +doubleword elements of the two inputs. + +@smallexample +@exdent vector unsigned __int128 vec_mulo (vector unsigned long long, + vector unsigned long long); +@exdent vector signed __int128 vec_mulo (vector signed long long, + vector signed long long); +@end smallexample + +Returns a vector containing a 128-bit integer result of multiplying the odd +doubleword elements of the two inputs. + +@smallexample +@exdent vector unsigned __int128 vec_div (vector unsigned __int128, + vector unsigned __int128); +@exdent vector signed __int128 vec_div (vector signed __int128, + vector signed __int128); +@end smallexample + +Returns the result of dividing the first operand by the second operand. An +attempt to divide any value by zero or to divide the most negative signed +128-bit integer by negative one results in an undefined value. + +@smallexample +@exdent vector unsigned __int128 vec_dive (vector unsigned __int128, + vector unsigned __int128); +@exdent vector signed __int128 vec_dive (vector signed __int128, + vector signed __int128); +@end smallexample + +The result is produced by shifting the first input left by 128 bits and +dividing by the second. If an attempt is made to divide by zero or the result +is larger than 128 bits, the result is undefined. + +@smallexample +@exdent vector unsigned __int128 vec_mod (vector unsigned __int128, + vector unsigned __int128); +@exdent vector signed __int128 vec_mod (vector signed __int128, + vector signed __int128); +@end smallexample + +The result is the modulo result of dividing the first input by the second +input. + +The following builtins perform 128-bit vector comparisons. The +@code{vec_all_xx}, @code{vec_any_xx}, and @code{vec_cmpxx}, where @code{xx} is +one of the operations @code{eq, ne, gt, lt, ge, le} perform pairwise +comparisons between the elements at the same positions within their two vector +arguments. The @code{vec_all_xx}function returns a non-zero value if and only +if all pairwise comparisons are true. The @code{vec_any_xx} function returns +a non-zero value if and only if at least one pairwise comparison is true. The +@code{vec_cmpxx}function returns a vector of the same type as its two +arguments, within which each element consists of all ones to denote that +specified logical comparison of the corresponding elements was true. +Otherwise, the element of the returned vector contains all zeros. + +@smallexample +vector bool __int128 vec_cmpeq (vector signed __int128, vector signed __int128); +vector bool __int128 vec_cmpeq (vector unsigned __int128, vector unsigned __int128); +vector bool __int128 vec_cmpne (vector signed __int128, vector signed __int128); +vector bool __int128 vec_cmpne (vector unsigned __int128, vector unsigned __int128); +vector bool __int128 vec_cmpgt (vector signed __int128, vector signed __int128); +vector bool __int128 vec_cmpgt (vector unsigned __int128, vector unsigned __int128); +vector bool __int128 vec_cmplt (vector signed __int128, vector signed __int128); +vector bool __int128 vec_cmplt (vector unsigned __int128, vector unsigned __int128); +vector bool __int128 vec_cmpge (vector signed __int128, vector signed __int128); +vector bool __int128 vec_cmpge (vector unsigned __int128, vector unsigned __int128); +vector bool __int128 vec_cmple (vector signed __int128, vector signed __int128); +vector bool __int128 vec_cmple (vector unsigned __int128, vector unsigned __int128); + +int vec_all_eq (vector signed __int128, vector signed __int128); +int vec_all_eq (vector unsigned __int128, vector unsigned __int128); +int vec_all_ne (vector signed __int128, vector signed __int128); +int vec_all_ne (vector unsigned __int128, vector unsigned __int128); +int vec_all_gt (vector signed __int128, vector signed __int128); +int vec_all_gt (vector unsigned __int128, vector unsigned __int128); +int vec_all_lt (vector signed __int128, vector signed __int128); +int vec_all_lt (vector unsigned __int128, vector unsigned __int128); +int vec_all_ge (vector signed __int128, vector signed __int128); +int vec_all_ge (vector unsigned __int128, vector unsigned __int128); +int vec_all_le (vector signed __int128, vector signed __int128); +int vec_all_le (vector unsigned __int128, vector unsigned __int128); + +int vec_any_eq (vector signed __int128, vector signed __int128); +int vec_any_eq (vector unsigned __int128, vector unsigned __int128); +int vec_any_ne (vector signed __int128, vector signed __int128); +int vec_any_ne (vector unsigned __int128, vector unsigned __int128); +int vec_any_gt (vector signed __int128, vector signed __int128); +int vec_any_gt (vector unsigned __int128, vector unsigned __int128); +int vec_any_lt (vector signed __int128, vector signed __int128); +int vec_any_lt (vector unsigned __int128, vector unsigned __int128); +int vec_any_ge (vector signed __int128, vector signed __int128); +int vec_any_ge (vector unsigned __int128, vector unsigned __int128); +int vec_any_le (vector signed __int128, vector signed __int128); +int vec_any_le (vector unsigned __int128, vector unsigned __int128); +@end smallexample + + @node PowerPC Hardware Transactional Memory Built-in Functions @subsection PowerPC Hardware Transactional Memory Built-in Functions GCC provides two interfaces for accessing the Hardware Transactional diff --git a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c new file mode 100644 index 00000000000..042758c8684 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c @@ -0,0 +1,2263 @@ +/* { dg-do run } */ +/* { dg-options "-mcpu=power10 -save-temps" } */ +/* { dg-require-effective-target power10_hw } */ + +/* Check that the expected 128-bit instructions are generated if the processor + supports the 128-bit integer instructions. */ +/* { dg-final { scan-assembler-times {\mvslq\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvsrq\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvsraq\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvrlq\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvrlqnm\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvrlqmi\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvcmpuq\M} 0 } } */ +/* { dg-final { scan-assembler-times {\mvcmpsq\M} 0 } } */ +/* { dg-final { scan-assembler-times {\mvcmpequq\M} 0 } } */ +/* { dg-final { scan-assembler-times {\mvcmpequq.\M} 16 } } */ +/* { dg-final { scan-assembler-times {\mvcmpgtsq\M} 0 } } */ +/* { dg-final { scan-assembler-times {\mvcmpgtsq.\M} 16 } } */ +/* { dg-final { scan-assembler-times {\mvcmpgtuq\M} 0 } } */ +/* { dg-final { scan-assembler-times {\mvcmpgtuq.\M} 16 } } */ +/* { dg-final { scan-assembler-times {\mvmuleud\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmuloud\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulesd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulosd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulld\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdivsq\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdivuq\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdivesq\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdiveuq\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmodsq\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmoduq\M} 1 } } */ + +#include <altivec.h> + +#define DEBUG 0 + +#if DEBUG +#include <stdio.h> +#include <stdlib.h> + + +void print_i128(__int128_t val) +{ + printf(" %lld %llu (0x%llx %llx)", + (signed long long)(val >> 64), + (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF), + (unsigned long long)(val >> 64), + (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF)); +} +#endif + +void abort (void); + +int main () +{ + int i, result_int; + + __int128_t arg1, result; + __uint128_t uarg2; + + vector signed long long int vec_arg1_di, vec_arg2_di; + vector signed long long int vec_result_di, vec_expected_result_di; + vector unsigned long long int vec_uarg1_di, vec_uarg2_di, vec_uarg3_di; + vector unsigned long long int vec_uresult_di; + vector unsigned long long int vec_uexpected_result_di; + + __int128_t expected_result; + __uint128_t uexpected_result; + + vector __int128 vec_arg1, vec_arg2, vec_result; + vector unsigned __int128 vec_uarg1, vec_uarg2, vec_uarg3, vec_uresult; + vector bool __int128 vec_result_bool; + + /* test shift 128-bit integers. + Note, shift amount is given by the lower 7-bits of the shift amount. */ + vec_arg1[0] = 3; + vec_uarg2[0] = 2; + expected_result = vec_arg1[0]*4; + + vec_result = vec_sl (vec_arg1, vec_uarg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_sl(int128, uint128): "); + print_i128(vec_arg1[0]); + printf(" << %lld", vec_uarg2[0] & 0xFF); + printf(" = "); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + arg1 = 3; + uarg2 = 4; + expected_result = arg1*16; + + result = arg1 << uarg2; + + if (result != expected_result) { +#if DEBUG + printf("ERROR: int128 << uint128): "); + print_i128(arg1); + printf(" << %lld", uarg2 & 0xFF); + printf(" = "); + print_i128(result); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 3; + vec_uarg2[0] = 2; + uexpected_result = vec_uarg1[0]*4; + + vec_uresult = vec_sl (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_sl(uint128, uint128): "); + print_i128(vec_uarg1[0]); + printf(" << %lld", vec_uarg2[0] & 0xFF); + printf(" = "); + print_i128(vec_uresult[0]); + printf("\n does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 12; + vec_uarg2[0] = 2; + expected_result = vec_arg1[0]/4; + + vec_result = vec_sr (vec_arg1, vec_uarg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_sr(int128, uint128): "); + print_i128(vec_arg1[0]); + printf(" >> %lld", vec_uarg2[0] & 0xFF); + printf(" = "); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 48; + vec_uarg2[0] = 2; + uexpected_result = vec_uarg1[0]/4; + + vec_uresult = vec_sr (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_sr(uint128, uint128): "); + print_i128(vec_uarg1[0]); + printf(" >> %lld", vec_uarg2[0] & 0xFF); + printf(" = "); + print_i128(vec_uresult[0]); + printf("\n does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + arg1 = 48; + uarg2 = 4; + expected_result = arg1/16; + + result = arg1 >> uarg2; + + if (result != expected_result) { +#if DEBUG + printf("ERROR: int128 >> uint128: "); + print_i128(arg1); + printf(" >> %lld", uarg2 & 0xFF); + printf(" = "); + print_i128(result); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 0x1234567890ABCDEFULL; + vec_arg1[0] = (vec_arg1[0] << 64) | 0xAABBCCDDEEFF1122ULL; + vec_uarg2[0] = 32; + expected_result = 0x0000000012345678ULL; + expected_result = (expected_result << 64) | 0x90ABCDEFAABBCCDDULL; + + vec_result = vec_sra (vec_arg1, vec_uarg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_sra(int128, uint128): "); + print_i128(vec_arg1[0]); + printf(" >> %lld = \n", vec_uarg2[0]); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 0xAABBCCDDEEFF1122ULL; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 0x1234567890ABCDEFULL; + vec_uarg2[0] = 48; + uexpected_result = 0xFFFFFFFFFFFFAABBLL; + uexpected_result = (uexpected_result << 64) | 0xCCDDEEFF11221234ULL; + + vec_uresult = vec_sra (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_sra(uint128, uint128): "); + print_i128(vec_uarg1[0]); + printf(" >> %lld = \n", vec_uarg2[0] & 0xFF); + print_i128(vec_uresult[0]); + printf("\n does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 0x1234567890ABCDEFULL; + vec_arg1[0] = (vec_arg1[0] << 64) | 0xAABBCCDDEEFF1122ULL; + vec_uarg2[0] = 32; + expected_result = 0x90ABCDEFAABBCCDDULL; + expected_result = (expected_result << 64) | 0xEEFF112212345678ULL; + + vec_result = vec_rl (vec_arg1, vec_uarg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_rl(int128, uint128): "); + print_i128(vec_arg1[0]); + printf(" >> %lld = \n", vec_uarg2[0]); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 0xAABBCCDDEEFF1122ULL; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 0x1234567890ABCDEFULL; + vec_uarg2[0] = 48; + uexpected_result = 0x11221234567890ABULL; + uexpected_result = (uexpected_result << 64) | 0xCDEFAABBCCDDEEFFULL; + + vec_uresult = vec_rl (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_rl(uint128, uint128): "); + print_i128(vec_uarg1[0]); + printf(" >> %lld = \n", vec_uarg2[0]); + print_i128(vec_uresult[0]); + printf("\n does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* vec_rlnm(arg1, arg2, arg3) + result - rotate each element of arg1 left by shift in element of arg2. + Then AND with mask whose start/stop bits are specified in element of + arg3. */ + vec_arg1[0] = 0x1234567890ABCDEFULL; + vec_arg1[0] = (vec_arg1[0] << 64) | 0xAABBCCDDEEFF1122ULL; + vec_uarg2[0] = 32; + vec_uarg3[0] = (32 << 8) | 95; + expected_result = 0xaabbccddULL; + expected_result = (expected_result << 64) | 0xeeff112200000000ULL; + + vec_result = vec_rlnm (vec_arg1, vec_uarg2, vec_uarg3); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_rlnm(int128, uint128, uint128): "); + print_i128(vec_arg1[0]); + printf(" << %lld = \n", vec_uarg3[0] & 0xFF); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + + + /* vec_rlnm(arg1, arg2, arg3) + result - rotate each element of arg1 left by shift in element of arg2; + then AND with mask whose start/stop bits are specified in element of + arg3. */ + vec_uarg1[0] = 0xAABBCCDDEEFF1122ULL; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 0x1234567890ABCDEFULL; + vec_uarg2[0] = 48; + vec_uarg3[0] = (8 << 8) | 119; + + uexpected_result = 0x00221234567890ABULL; + uexpected_result = (uexpected_result << 64) | 0xCDEFAABBCCDDEE00ULL; + + vec_uresult = vec_rlnm (vec_uarg1, vec_uarg2, vec_uarg3); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_rlnm(uint128, uint128, uint128): "); + print_i128(vec_uarg1[0]); + printf(" << %lld = \n", vec_uarg3[0] & 0xFF); + print_i128(vec_uresult[0]); + printf("\n does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* vec_rlmi(R, A, B) + Result value: Each element of R is obtained by rotating the corresponding + element of A left by the number of bits specified by the corresponding element + of B. */ + + vec_arg1[0] = 0x1234567890ABCDEFULL; + vec_arg1[0] = (vec_arg1[0] << 64) | 0xAABBCCDDEEFF1122ULL; + vec_arg2[0] = 0x000000000000DEADULL; + vec_arg2[0] = (vec_arg2[0] << 64) | 0x0000BEEF00000000ULL; + vec_uarg3[0] = 96 << 16 | 127 << 8 | 32; + expected_result = 0x000000000000DEADULL; + expected_result = (expected_result << 64) | 0x0000BEEF12345678ULL; + + vec_result = vec_rlmi (vec_arg1, vec_arg2, vec_uarg3); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_rlmi(int128, int128, uint128): "); + print_i128(vec_arg1[0]); + printf(" << %lld = \n", vec_uarg2_di[1] & 0xFF); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* vec_rlmi(R, A, B) + Result value: Each element of R is obtained by rotating the corresponding + element of A left by the number of bits specified by the corresponding element + of B. */ + + vec_uarg1[0] = 0xAABBCCDDEEFF1122ULL; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 0x1234567890ABCDEFULL; + vec_uarg2[0] = 0xDEAD000000000000ULL; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 0x000000000000BEEFULL; + vec_uarg3[0] = 16 << 16 | 111 << 8 | 48; + uexpected_result = 0xDEAD1234567890ABULL; + uexpected_result = (uexpected_result << 64) | 0xCDEFAABBCCDDBEEFULL; + + vec_uresult = vec_rlmi (vec_uarg1, vec_uarg2, vec_uarg3); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_rlmi(uint128, unit128, uint128): "); + print_i128(vec_uarg1[0]); + printf(" << %lld = \n", vec_uarg3[1] & 0xFF); + print_i128(vec_uresult[0]); + printf("\n does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* 128-bit compare tests, result is all 1's if true */ + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1[0] = 2468; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + uexpected_result = 0xFFFFFFFFFFFFFFFFULL; + uexpected_result = (uexpected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpgt (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != uexpected_result) { +#if DEBUG + printf("ERROR: unsigned vec_cmpgt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 12468; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpgt (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed vec_cmpgt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + + vec_arg1[0] = 12468; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = -1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0x0ULL; + + vec_result_bool = vec_cmpeq (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR:not equal signed vec_cmpeq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpeq (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed equal vec_cmpeq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 12468; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0x0ULL; + + vec_result_bool = vec_cmpeq (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned not equal vec_cmpeq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpeq (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: equal unsigned vec_cmpeq ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 12468; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpne (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned not equal vec_cmpne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + expected_result = 0x0ULL; + + vec_result_bool = vec_cmpne (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: equal unsigned vec_cmpne ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 12468; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = -1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpne (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR:not equal signed vec_cmpne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + expected_result = 0x0ULL; + + vec_result_bool = vec_cmpne (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed equal vec_cmpne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 12468; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0x0; + + vec_result_bool = vec_cmplt (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 > arg2 vec_cmplt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 1234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 12468; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmplt (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 < arg2 vec_cmplt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + expected_result = 0x0ULL; + + vec_result_bool = vec_cmplt (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 = arg2 vec_cmplt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 12468; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = -1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0x0; + + vec_result_bool = vec_cmplt (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 > arg2 vec_cmplt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -1234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 12468; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmplt (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 < arg2 vec_cmplt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + expected_result = 0x0ULL; + + vec_result_bool = vec_cmplt (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_cmplt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 12468; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0x0; + + vec_result_bool = vec_cmple (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 > arg2 vec_cmple ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 1234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 12468; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmple (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 < arg2 vec_cmple ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmple (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 = arg2 vec_cmple ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 12468; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = -1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0x0; + + vec_result_bool = vec_cmple (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 > arg2 vec_cmple ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -1234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 12468; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmple (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 < arg2 vec_cmple ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmple (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_cmple ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 12468; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpge (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 > arg2 vec_cmpge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 1234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 12468; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0x0; + + vec_result_bool = vec_cmpge (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 < arg2 vec_cmpge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpge (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 = arg2 vec_cmpge ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 12468; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = -1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpge (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 > arg2 vec_cmpge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -1234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 12468; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0x0; + + vec_result_bool = vec_cmpge (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 < arg2 vec_cmpge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpge (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_cmpge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + +#if 1 + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_all_eq (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_all_eq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_all_eq (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_all_eq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_all_eq (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_all_eq ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_all_eq (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_all_eq ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_all_ne (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_all_ne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_all_ne (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_all_ne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_all_ne (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_all_ne ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_all_ne (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_all_ne ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_all_lt (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_all_lt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_all_lt (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_all_lt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_all_lt (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_all_lt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_all_lt (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_all_lt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_all_le (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_all_le ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_all_le (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_all_le ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_all_le (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_all_le ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_all_le (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_all_le ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_all_gt (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_all_gt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_all_gt (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_all_gt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_all_gt (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_all_gt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_all_gt (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_all_gt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_all_ge (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_all_ge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_all_ge (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_all_ge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_all_ge (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_all_ge ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_all_ge (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_all_ge ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_any_eq (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_any_eq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_any_eq (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_any_eq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_any_eq (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_any_eq ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_any_eq (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_any_eq ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_any_ne (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_any_ne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_any_ne (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_any_ne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_any_ne (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_any_ne ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_any_ne (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_any_ne ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_any_lt (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_any_lt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_any_lt (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_any_lt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_any_lt (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_any_lt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_any_lt (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_any_lt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_any_gt (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_any_gt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_any_gt (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_any_gt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_any_gt (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_any_gt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_any_gt (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_any_gt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_any_le (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_any_le ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_any_le (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_any_le ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_any_le (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_any_le ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_any_le (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_any_le ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_any_ge (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_any_ge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_any_ge (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_any_ge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_any_ge (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_any_ge ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_any_ge (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_any_gt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } +#endif + + /* Vector multiply Even and Odd tests */ + vec_arg1_di[0] = 200; + vec_arg1_di[1] = 400; + vec_arg2_di[0] = 1234; + vec_arg2_di[1] = 4567; + expected_result = vec_arg1_di[0] * vec_arg2_di[0]; + + vec_result = vec_mule (vec_arg1_di, vec_arg2_di); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_mule (signed, signed) failed.\n"); + printf(" vec_arg1_di[0] = %lld\n", vec_arg1_di[0]); + printf(" vec_arg2_di[0] = %lld\n", vec_arg2_di[0]); + printf("Result = "); + print_i128(vec_result[0]); + printf("\nExpected Result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1_di[0] = -200; + vec_arg1_di[1] = -400; + vec_arg2_di[0] = 1234; + vec_arg2_di[1] = 4567; + expected_result = vec_arg1_di[1] * vec_arg2_di[1]; + + vec_result = vec_mulo (vec_arg1_di, vec_arg2_di); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_mulo (signed, signed) failed.\n"); + printf(" vec_arg1_di[1] = %lld\n", vec_arg1_di[1]); + printf(" vec_arg2_di[1] = %lld\n", vec_arg2_di[1]); + printf("Result = "); + print_i128(vec_result[0]); + printf("\nExpected Result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1_di[0] = 200; + vec_uarg1_di[1] = 400; + vec_uarg2_di[0] = 1234; + vec_uarg2_di[1] = 4567; + uexpected_result = vec_uarg1_di[0] * vec_uarg2_di[0]; + + vec_uresult = vec_mule (vec_uarg1_di, vec_uarg2_di); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_mule (unsigned, unsigned) failed.\n"); + printf(" vec_uarg1_di[1] = %lld\n", vec_uarg1_di[1]); + printf(" vec_uarg2_di[1] = %lld\n", vec_uarg2_di[1]); + printf("Result = "); + print_i128(vec_uresult[0]); + printf("\nExpected Result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1_di[0] = 200; + vec_uarg1_di[1] = 400; + vec_uarg2_di[0] = 1234; + vec_uarg2_di[1] = 4567; + uexpected_result = vec_uarg1_di[1] * vec_uarg2_di[1]; + + vec_uresult = vec_mulo (vec_uarg1_di, vec_uarg2_di); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_mulo (unsigned, unsigned) failed.\n"); + printf(" vec_uarg1_di[0] = %lld\n", vec_uarg1_di[0]); + printf(" vec_uarg2_di[0] = %lld\n", vec_uarg2_di[0]); + printf("Result = "); + print_i128(vec_uresult[0]); + printf("\nExpected Result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* Vector Multiply Longword */ + vec_arg1_di[0] = 100; + vec_arg1_di[1] = -123456; + + vec_arg2_di[0] = 123; + vec_arg2_di[1] = 1000; + + vec_expected_result_di[0] = 12300; + vec_expected_result_di[1] = -123456000; + + vec_result_di = vec_arg1_di * vec_arg2_di; + + for (i = 0; i<2; i++) { + if (vec_result_di[i] != vec_expected_result_di[i]) { +#if DEBUG + printf("ERROR: vector multipy [%d] ((long long) %lld) = ", i, + vec_result_di[i]); + printf("\n does not match expected_result [%d] = ((long long) %lld)", i, + vec_expected_result_di[i]); + printf("\n\n"); +#else + abort(); +#endif + } + } + + /* Vector Divide Quadword */ + vec_arg1[0] = -12345678; + vec_arg2[0] = 2; + expected_result = -6172839; + + vec_result = vec_div (vec_arg1, vec_arg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_div (signed, signed) failed.\n"); + printf("vec_arg1[0] = "); + print_i128(vec_arg1[0]); + printf("\nvec_arg2[0] = "); + print_i128(vec_arg2[0]); + printf("\nResult = "); + print_i128(vec_result[0]); + printf("\nExpected result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 24680; + vec_uarg2[0] = 4; + uexpected_result = 6170; + + vec_uresult = vec_div (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_div (unsigned, unsigned) failed.\n"); + printf("vec_uarg1[0] = "); + print_i128(vec_uarg1[0]); + printf("\nvec_uarg2[0] = "); + print_i128(vec_uarg2[0]); + printf("\nResult = "); + print_i128(vec_uresult[0]); + printf("\nExpected result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* Vector Divide Extended Quadword */ + vec_arg1[0] = -20; // has 128-bit of zero concatenated onto it + vec_arg2[0] = 0x2000000000000000; + vec_arg2[0] = vec_arg2[0] << 64; + expected_result = -160; + + vec_result = vec_dive (vec_arg1, vec_arg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_dive (signed, signed) failed.\n"); + printf("vec_arg1[0] = "); + print_i128(vec_arg1[0]); + printf("\nvec_arg2[0] = "); + print_i128(vec_arg2[0]); + printf("\nResult = "); + print_i128(vec_result[0]); + printf("\nExpected result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 20; // has 128-bit of zero concatenated onto it + vec_uarg2[0] = 0x4000000000000000; + vec_uarg2[0] = vec_uarg2[0] << 64; + uexpected_result = 80; + + vec_uresult = vec_dive (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_dive (unsigned, unsigned) failed.\n"); + printf("vec_uarg1[0] = "); + print_i128(vec_uarg1[0]); + printf("\nvec_uarg2[0] = "); + print_i128(vec_uarg2[0]); + printf("\nResult = "); + print_i128(vec_uresult[0]); + printf("\nExpected result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* Vector modulo quad word */ + vec_arg1[0] = -12345675; + vec_arg2[0] = 2; + expected_result = -1; + + vec_result = vec_mod (vec_arg1, vec_arg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_mod (signed, signed) failed.\n"); + printf("vec_arg1[0] = "); + print_i128(vec_arg1[0]); + printf("\nvec_arg2[0] = "); + print_i128(vec_arg2[0]); + printf("\nResult = "); + print_i128(vec_result[0]); + printf("\nExpected result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 24685; + vec_uarg2[0] = 4; + uexpected_result = 1; + + vec_uresult = vec_mod (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_mod (unsigned, unsigned) failed.\n"); + printf("vec_uarg1[0] = "); + print_i128(vec_uarg1[0]); + printf("\nvec_uarg2[0] = "); + print_i128(vec_uarg2[0]); + printf("\nResult = "); + print_i128(vec_uresult[0]); + printf("\nExpected result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + return 0; +}

[2/5,ver4] RS6000: Add 128-bit Integer Operations

Commit Message

Comments

Patch