===================================================================
@@ -15127,6 +15127,37 @@ vector long long vec_vupklsw (vector int
vector unsigned long long vec_vupklsw (vector int);
@end smallexample
+If the ISA 2.07 additions to the vector/scalar (power8-vector)
+instruction set is available, the following additional functions are
+available for 64-bit targets. The option @option{-mvsx-timode} must
+be enabled to use these functions.
+
+@smallexample
+__int128_t vec_vaddcuq (__int128_t, __int128_t);
+__uint128_t vec_vaddcuq (__uint128_t, __uint128_t);
+
+__int128_t vec_vadduqm (__int128_t, __int128_t);
+__uint128_t vec_vadduqm (__uint128_t, __uint128_t);
+
+__int128_t vec_vaddecuq (__int128_t, __int128_t, __int128_t);
+__uint128_t vec_vaddecuq (__uint128_t, __uint128_t, __uint128_t);
+
+__int128_t vec_vaddeuqm (__int128_t, __int128_t, __int128_t);
+__uint128_t vec_vaddeuqm (__uint128_t, __uint128_t, __uint128_t);
+
+__int128_t vec_vsubecuq (__int128_t, __int128_t, __int128_t);
+__uint128_t vec_vsubecuq (__uint128_t, __uint128_t, __uint128_t);
+
+__int128_t vec_vsubeuqm (__int128_t, __int128_t, __int128_t);
+__uint128_t vec_vsubeuqm (__uint128_t, __uint128_t, __uint128_t);
+
+__int128_t vec_vsubcuq (__int128_t, __int128_t);
+__uint128_t vec_vsubcuq (__uint128_t, __uint128_t);
+
+__int128_t vec_vsubuqm (__int128_t, __int128_t);
+__uint128_t vec_vsubuqm (__uint128_t, __uint128_t);
+@end smallexample
+
If the cryptographic instructions are enabled (@option{-mcrypto} or
@option{-mcpu=power8}), the following builtins are enabled.
Property changes on: gcc/testsuite/gcc.target/powerpc/p8vector-int128-1.c
___________________________________________________________________
Added: svn:mergeinfo
Merged /trunk/gcc/testsuite/gcc.target/powerpc/p8vector-int128-1.c:r207736-208333
Property changes on: gcc/testsuite/gcc.target/powerpc/p8vector-int128-2.c
___________________________________________________________________
Added: svn:mergeinfo
Merged /trunk/gcc/testsuite/gcc.target/powerpc/p8vector-int128-2.c:r207736-208333
===================================================================
@@ -1,5 +1,5 @@
/* { dg-do assemble { target { lp64 } } } */
-/* { dg-options "-O2 -fno-align-functions -mtraceback=no -save-temps" } */
+/* { dg-options "-O2 -fno-align-functions -mtraceback=no -save-temps -mcpu=power5" } */
typedef int TImode __attribute__ ((mode (TI)));
@@ -46,6 +46,12 @@ TImode r19 (void *x) { return *(TImode *
TImode r20 (void *x) { return *(TImode *) (x + 32748); }
/* test should really be == 616, see pr54110 */
+/* When TImode is allowed in VSX registers, the allowable address modes for
+ TImode is just a single indirect address in order for the value to be loaded
+ and store in either GPR or VSX registers. This affects the generated code,
+ and it would cause this test to fail, when such an option is used. Fall
+ back to power5 to test the code. */
+
/* { dg-final { object-size text <= 700 } } */
/* { dg-final { scan-assembler-not "(st|l)fd" } } */
/* { dg-final { cleanup-saved-temps "timode_off" } } */
===================================================================
@@ -1,3 +1,16 @@
+2014-03-05 Michael Meissner <meissner@linux.vnet.ibm.com>
+
+ * gcc.target/powerpc/p8vector-int128-1.c: New file to test ISA
+ 2.07 128-bit add/subtract builtins.
+ * gcc.target/powerpc/p8vector-int128-2.c: Likewise.
+
+ * gcc.target/powerpc/timode_off.c: Restrict cpu type to power5,
+ due to when TImode is allowed in VSX registers, the allowable
+ address modes for TImode is just a single indirect address in
+ order for the value to be loaded and store in either GPR or VSX
+ registers. This affects the generated code, and it would cause
+ this test to fail, when such an option is used.
+
2014-03-04 Michael Meissner <meissner@linux.vnet.ibm.com>
Clone branch, subversion id 208334.
===================================================================
@@ -1,3 +1,88 @@
+2014-03-05 Michael Meissner <meissner@linux.vnet.ibm.com>
+
+ * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions):
+ Document vec_vaddcuq, vec_vadduqm, vec_vaddecuq, vec_vaddeuqm,
+ vec_subecuq, vec_subeuqm, vec_vsubcuq, vec_vsubeqm builtins adding
+ 128-bit integer add/subtract to ISA 2.07.
+
+ * config/rs6000/rs6000-protos.h (rs6000_move_128bit_ok_p): Add new
+ declaration.
+ (rs6000_split_128bit_ok_p): Likewise.
+ (rs6000_int128_builtin_fixup): Likewise.
+
+ * gcc/config/rs6000/rs6000-builtin.def (BU_P8V_AV_3): Add new
+ macros to support adding ISA 2.07 3 argument builtins.
+ (BU_P8V_OVERLOAD_3): Likewise.
+ (VADDCUQ): Add ISA 2.07 builtins to support 128-bit integer
+ add/subtract instructions, both as a normal builtin, and as an
+ overloaded builtin.
+ (VADDUQM): Likewise.
+ (VSUBCUQ): Likewise.
+ (VSUBUQM): Likewise.
+ (VADDECUQ): Likewise.
+ (VADDECUQ): Likewise.
+ (VSUBECUQ): Likewise.
+ (VSUBEUQM): Likewise.
+
+ * gcc/config/rs6000-c.c (altivec_overloaded_builtins): Add support
+ for ISA 2.07 overloaded builtins to do 128-bit add and subtract.
+
+ * gcc/config/rs6000.c (rs6000_init_builtins): Initialize state
+ variables for using __int128_t and __uint128_t as arguments to
+ builtins.
+ (rs6000_move_128bit_ok_p): New function to validate TImode/PTImode
+ moves.
+ (rs6000_split_128bit_ok_p): New function to say when it is ok to
+ split TImode/PTImove moves.
+ (rs6000_int128_builtin_fixup): New function to convert int 128-bit
+ add/subtract from using TImode to using V2DImode to allow use of
+ the ISA 2.07 builtins when TImode is not allowed in VSX
+ registers.
+
+ * gcc/config/rs6000/rs6000.h (enum rs6000_builtin_type_index): Add
+ support to allow __int128_t and __uint128_t types as builtin
+ arguments.
+ (intTI_type_internal_node): Likewise.
+ (uintTI_type_internal_node): Likewise.
+
+ * gcc/config/rs6000/altivec.md (UNSPEC_VADDUQM): New unspec
+ literals to allow addition of the ISA 2.07 128-bit add/subtract
+ builtin functions.
+ (UNSPEC_VADDCUQ): Likewise.
+ (UNSPEC_VADDEUQM): Likewise.
+ (UNSPEC_VADDECUQ): Likewise.
+ (UNSPEC_VSUBUQM): Likewise.
+ (UNSPEC_VSUBCUQ): Likewise.
+ (UNSPEC_VSUBEUQM): Likewise.
+ (UNSPEC_VSUBECUQ): Likewise.
+ (VINT128): New iterator for 128-bit add/subtract builtins.
+ (altivec_vadduqm): New ISA 2.07 128-bit add/subtract builtins.
+ (altivec_vadduqm_<mode>): Likewise.
+ (altivec_vaddcuq): Likewise.
+ (altivec_vaddcuq_<mode>): Likewise.
+ (altivec_vaddeuqm): Likewise.
+ (altivec_vaddeuqm_<mode>): Likewise.
+ (altivec_vaddecuq): Likewise.
+ (altivec_vaddecuq_<mode>): Likewise.
+ (altivec_vsubuqm): Likewise.
+ (altivec_vsubuqm_<mode>): Likewise.
+ (altivec_vsubcuq): Likewise.
+ (altivec_vsubcuq_<mode>): Likewise.
+ (altivec_vsubeuqm): Likewise.
+ (altivec_vsubeuqm_<mode>): Likewise.
+ (altivec_vsubecuq): Likewise.
+ (altivec_vsubecuq_<mode>): Likewise.
+
+ * gcc/config/rs6000/altivec.h (vec_vadduqm): If ISA 2.07, add
+ support for 128-bit add/subtract builtins.
+ (vec_vaddcuq): Likewise.
+ (vec_vaddeuqm): Likewise.
+ (vec_vaddecuq): Likewise.
+ (vec_vsubuqm): Likewise.
+ (vec_vsubcuq): Likewise.
+ (vec_vsubeuqm): Likewise.
+ (vec_vsubecuq): Likewise.
+
2014-03-04 Michael Meissner <meissner@linux.vnet.ibm.com>
Clone branch, subversion id 208334.
===================================================================
@@ -51,6 +51,10 @@ extern rtx find_addr_reg (rtx);
extern rtx gen_easy_altivec_constant (rtx);
extern const char *output_vec_const_move (rtx *);
extern const char *rs6000_output_move_128bit (rtx *);
+extern bool rs6000_move_128bit_ok_p (rtx []);
+extern bool rs6000_split_128bit_ok_p (rtx []);
+extern void rs6000_int128_builtin_fixup (rtx [], int, enum machine_mode,
+ enum unspec);
extern void rs6000_expand_vector_init (rtx, rtx);
extern void paired_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int);
===================================================================
@@ -325,6 +325,14 @@
| RS6000_BTC_BINARY), \
CODE_FOR_ ## ICODE) /* ICODE */
+#define BU_P8V_AV_3(ENUM, NAME, ATTR, ICODE) \
+ RS6000_BUILTIN_3 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_altivec_" NAME, /* NAME */ \
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_TERNARY), \
+ CODE_FOR_ ## ICODE) /* ICODE */
+
#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE) \
RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
"__builtin_altivec_" NAME, /* NAME */ \
@@ -359,6 +367,14 @@
| RS6000_BTC_BINARY), \
CODE_FOR_nothing) /* ICODE */
+#define BU_P8V_OVERLOAD_3(ENUM, NAME) \
+ RS6000_BUILTIN_3 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \
+ "__builtin_vec_" NAME, /* NAME */ \
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
+ | RS6000_BTC_TERNARY), \
+ CODE_FOR_nothing) /* ICODE */
+
/* Crypto convenience macros. */
#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE) \
RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
@@ -1332,7 +1348,9 @@ BU_P8V_AV_1 (VPOPCNTD, "vpopcntd",
BU_P8V_AV_1 (VGBBD, "vgbbd", CONST, p8v_vgbbd)
/* 2 argument altivec instructions added in ISA 2.07. */
+BU_P8V_AV_2 (VADDCUQ, "vaddcuq", CONST, altivec_vaddcuq)
BU_P8V_AV_2 (VADDUDM, "vaddudm", CONST, addv2di3)
+BU_P8V_AV_2 (VADDUQM, "vadduqm", CONST, altivec_vadduqm)
BU_P8V_AV_2 (VMINSD, "vminsd", CONST, sminv2di3)
BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3)
BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3)
@@ -1347,7 +1365,9 @@ BU_P8V_AV_2 (VRLD, "vrld", CONST, vrot
BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3)
BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3)
BU_P8V_AV_2 (VSRAD, "vsrad", CONST, vashrv2di3)
+BU_P8V_AV_2 (VSUBCUQ, "vsubcuq", CONST, altivec_vsubcuq)
BU_P8V_AV_2 (VSUBUDM, "vsubudm", CONST, subv2di3)
+BU_P8V_AV_2 (VSUBUQM, "vsubuqm", CONST, altivec_vsubuqm)
BU_P8V_AV_2 (EQV_V16QI, "eqv_v16qi", CONST, eqvv16qi3)
BU_P8V_AV_2 (EQV_V8HI, "eqv_v8hi", CONST, eqvv8hi3)
@@ -1370,6 +1390,12 @@ BU_P8V_AV_2 (ORC_V2DI, "orc_v2di", CONS
BU_P8V_AV_2 (ORC_V4SF, "orc_v4sf", CONST, orcv4sf3)
BU_P8V_AV_2 (ORC_V2DF, "orc_v2df", CONST, orcv2df3)
+/* 3 argument altivec instructions added in ISA 2.07. */
+BU_P8V_AV_3 (VADDEUQM, "vaddeuqm", CONST, altivec_vaddeuqm)
+BU_P8V_AV_3 (VADDECUQ, "vaddecuq", CONST, altivec_vaddecuq)
+BU_P8V_AV_3 (VSUBEUQM, "vsubeuqm", CONST, altivec_vsubeuqm)
+BU_P8V_AV_3 (VSUBECUQ, "vsubecuq", CONST, altivec_vsubecuq)
+
/* Vector comparison instructions added in ISA 2.07. */
BU_P8V_AV_2 (VCMPEQUD, "vcmpequd", CONST, vector_eqv2di)
BU_P8V_AV_2 (VCMPGTSD, "vcmpgtsd", CONST, vector_gtv2di)
@@ -1399,7 +1425,9 @@ BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd")
BU_P8V_OVERLOAD_2 (EQV, "eqv")
BU_P8V_OVERLOAD_2 (NAND, "nand")
BU_P8V_OVERLOAD_2 (ORC, "orc")
+BU_P8V_OVERLOAD_2 (VADDCUQ, "vaddcuq")
BU_P8V_OVERLOAD_2 (VADDUDM, "vaddudm")
+BU_P8V_OVERLOAD_2 (VADDUQM, "vadduqm")
BU_P8V_OVERLOAD_2 (VMAXSD, "vmaxsd")
BU_P8V_OVERLOAD_2 (VMAXUD, "vmaxud")
BU_P8V_OVERLOAD_2 (VMINSD, "vminsd")
@@ -1414,7 +1442,15 @@ BU_P8V_OVERLOAD_2 (VRLD, "vrld")
BU_P8V_OVERLOAD_2 (VSLD, "vsld")
BU_P8V_OVERLOAD_2 (VSRAD, "vsrad")
BU_P8V_OVERLOAD_2 (VSRD, "vsrd")
+BU_P8V_OVERLOAD_2 (VSUBCUQ, "vsubcuq")
BU_P8V_OVERLOAD_2 (VSUBUDM, "vsubudm")
+BU_P8V_OVERLOAD_2 (VSUBUQM, "vsubuqm")
+
+/* ISA 2.07 vector overloaded 3 argument functions. */
+BU_P8V_OVERLOAD_3 (VADDECUQ, "vaddecuq")
+BU_P8V_OVERLOAD_3 (VADDEUQM, "vaddeuqm")
+BU_P8V_OVERLOAD_3 (VSUBECUQ, "vsubecuq")
+BU_P8V_OVERLOAD_3 (VSUBEUQM, "vsubeuqm")
/* 1 argument crypto functions. */
===================================================================
@@ -706,6 +706,10 @@ const struct altivec_builtin_types altiv
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUQM,
+ RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUQM,
+ RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, 0 },
{ ALTIVEC_BUILTIN_VEC_VADDFP, ALTIVEC_BUILTIN_VADDFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
@@ -2327,6 +2331,10 @@ const struct altivec_builtin_types altiv
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUQM,
+ RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUQM,
+ RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, 0 },
{ ALTIVEC_BUILTIN_VEC_VSUBFP, ALTIVEC_BUILTIN_VSUBFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
@@ -3726,6 +3734,11 @@ const struct altivec_builtin_types altiv
{ P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+ { P8V_BUILTIN_VEC_VADDCUQ, P8V_BUILTIN_VADDCUQ,
+ RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI, 0 },
+ { P8V_BUILTIN_VEC_VADDCUQ, P8V_BUILTIN_VADDCUQ,
+ RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, 0 },
+
{ P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
{ P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
@@ -3739,6 +3752,11 @@ const struct altivec_builtin_types altiv
{ P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUQM, P8V_BUILTIN_VADDUQM,
+ RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI, 0 },
+ { P8V_BUILTIN_VEC_VADDUQM, P8V_BUILTIN_VADDUQM,
+ RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, 0 },
+
{ P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
{ P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
@@ -3781,6 +3799,30 @@ const struct altivec_builtin_types altiv
{ P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ,
+ RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI },
+ { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ,
+ RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, RS6000_BTI_UINTTI,
+ RS6000_BTI_UINTTI },
+
+ { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM,
+ RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI },
+ { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM,
+ RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, RS6000_BTI_UINTTI,
+ RS6000_BTI_UINTTI },
+
+ { P8V_BUILTIN_VEC_VSUBECUQ, P8V_BUILTIN_VSUBECUQ,
+ RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI },
+ { P8V_BUILTIN_VEC_VSUBECUQ, P8V_BUILTIN_VSUBECUQ,
+ RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, RS6000_BTI_UINTTI,
+ RS6000_BTI_UINTTI },
+
+ { P8V_BUILTIN_VEC_VSUBEUQM, P8V_BUILTIN_VSUBEUQM,
+ RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI },
+ { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM,
+ RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, RS6000_BTI_UINTTI,
+ RS6000_BTI_UINTTI },
+
{ P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
{ P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
@@ -3900,6 +3942,11 @@ const struct altivec_builtin_types altiv
{ P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBCUQ, P8V_BUILTIN_VSUBCUQ,
+ RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI, 0 },
+ { P8V_BUILTIN_VEC_VSUBCUQ, P8V_BUILTIN_VSUBCUQ,
+ RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, 0 },
+
{ P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
{ P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
@@ -3913,6 +3960,11 @@ const struct altivec_builtin_types altiv
{ P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUQM, P8V_BUILTIN_VSUBUQM,
+ RS6000_BTI_INTTI, RS6000_BTI_INTTI, RS6000_BTI_INTTI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUQM, P8V_BUILTIN_VSUBUQM,
+ RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, 0 },
+
{ P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
{ P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
===================================================================
@@ -13662,6 +13662,8 @@ rs6000_init_builtins (void)
uintSI_type_internal_node = unsigned_intSI_type_node;
intDI_type_internal_node = intDI_type_node;
uintDI_type_internal_node = unsigned_intDI_type_node;
+ intTI_type_internal_node = intTI_type_node;
+ uintTI_type_internal_node = unsigned_intTI_type_node;
float_type_internal_node = float_type_node;
double_type_internal_node = double_type_node;
void_type_internal_node = void_type_node;
@@ -13674,6 +13676,8 @@ rs6000_init_builtins (void)
builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
builtin_mode_to_type[DImode][0] = intDI_type_node;
builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
+ builtin_mode_to_type[TImode][0] = intTI_type_node;
+ builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
builtin_mode_to_type[SFmode][0] = float_type_node;
builtin_mode_to_type[DFmode][0] = double_type_node;
builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
@@ -17209,6 +17213,67 @@ rs6000_output_move_128bit (rtx operands[
gcc_unreachable ();
}
+/* Validate a 128-bit move. */
+bool
+rs6000_move_128bit_ok_p (rtx operands[])
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ return (gpc_reg_operand (operands[0], mode)
+ || gpc_reg_operand (operands[1], mode));
+}
+
+/* Return true if a 128-bit move needs to be split. */
+bool
+rs6000_split_128bit_ok_p (rtx operands[])
+{
+ if (!reload_completed)
+ return false;
+
+ if (!gpr_or_gpr_p (operands[0], operands[1]))
+ return false;
+
+ if (quad_load_store_p (operands[0], operands[1]))
+ return false;
+
+ return true;
+}
+
+
+/* Fix up builtins taking TImode arguments that operate on vsx registers to
+ convert the types to VSX types before issuing the builtin. The arguments
+ are in OPERANDS, number of arguments is N_ARGS, the mode to use for
+ converted types is MODE, and the unspec value to use for building the insn
+ is BUILTIN_UNSPEC. */
+void
+rs6000_int128_builtin_fixup (rtx operands[],
+ int n_args,
+ enum machine_mode mode,
+ enum unspec builtin_unspec)
+{
+ rtx dest_new = gen_lowpart (mode, operands[0]);
+ rtx args_new[3];
+ rtx arg;
+ rtx unspec;
+ int i;
+ rtvec p;
+
+ gcc_assert (IN_RANGE (n_args, 2, 4));
+
+ /* Make copies as vector arguments for each of the arguments. */
+ for (i = 1; i < n_args; i++)
+ {
+ args_new[i-1] = arg = gen_reg_rtx (mode);
+ emit_move_insn (arg, gen_lowpart (mode, operands[i]));
+ }
+
+ /* Generate the UNSPEC. */
+ p = gen_rtvec_v (n_args-1, args_new);
+ unspec = gen_rtx_UNSPEC (mode, p, (int)builtin_unspec);
+
+ emit_insn (gen_rtx_SET (VOIDmode, dest_new, unspec));
+ return;
+}
+
/* Given a comparison operation, return the bit number in CCR to test. We
know this is a valid comparison.
===================================================================
@@ -529,6 +529,7 @@ extern int rs6000_vector_align[];
#define TARGET_XSCVDPSPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
#define TARGET_XSCVSPDPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+#define TARGET_VADDUQM (TARGET_P8_VECTOR && TARGET_POWERPC64)
/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
in power7, so conditionalize them on p8 features. TImode syncs need quad
@@ -2611,6 +2612,8 @@ enum rs6000_builtin_type_index
RS6000_BTI_UINTSI, /* unsigned_intSI_type_node */
RS6000_BTI_INTDI, /* intDI_type_node */
RS6000_BTI_UINTDI, /* unsigned_intDI_type_node */
+ RS6000_BTI_INTTI, /* intTI_type_node */
+ RS6000_BTI_UINTTI, /* unsigned_intTI_type_node */
RS6000_BTI_float, /* float_type_node */
RS6000_BTI_double, /* double_type_node */
RS6000_BTI_void, /* void_type_node */
@@ -2658,6 +2661,8 @@ enum rs6000_builtin_type_index
#define uintSI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTSI])
#define intDI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTDI])
#define uintDI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTDI])
+#define intTI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTTI])
+#define uintTI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTTI])
#define float_type_internal_node (rs6000_builtin_types[RS6000_BTI_float])
#define double_type_internal_node (rs6000_builtin_types[RS6000_BTI_double])
#define void_type_internal_node (rs6000_builtin_types[RS6000_BTI_void])
===================================================================
@@ -136,6 +136,14 @@ (define_c_enum "unspec"
UNSPEC_VMRGL_DIRECT
UNSPEC_VSPLT_DIRECT
UNSPEC_VSUMSWS_DIRECT
+ UNSPEC_VADDUQM
+ UNSPEC_VADDCUQ
+ UNSPEC_VADDEUQM
+ UNSPEC_VADDECUQ
+ UNSPEC_VSUBUQM
+ UNSPEC_VSUBCUQ
+ UNSPEC_VSUBEUQM
+ UNSPEC_VSUBECUQ
])
(define_c_enum "unspecv"
@@ -176,6 +184,11 @@ (define_mode_attr VP_small [(V2DI "V4SI"
(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")])
(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")])
+;; Iterator for 128-bit integer types. TImode might not be allowed in Altivec
+;; registers, so we also define the operators for V2DImode and use that as a
+;; proxy.
+(define_mode_iterator VINT128 [TI V2DI])
+
;; Vector move instructions.
(define_insn "*altivec_mov<mode>"
[(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,*Y,*r,*r,v,v")
@@ -3226,3 +3239,209 @@ (define_insn "p8v_vgbbd"
"vgbbd %0,%1"
[(set_attr "length" "4")
(set_attr "type" "vecsimple")])
+
+;; 128-bit binary integer arithmetic
+;; Use unspec to force things to use the Altivec/VMX registers
+;; If TImode can't go in Altivec registers, we need to convert
+;; the type to V2DImode to use the instruction.
+
+(define_expand "altivec_vadduqm"
+ [(set (match_operand:TI 0 "register_operand" "")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "v")
+ (match_operand:TI 2 "register_operand" "v")]
+ UNSPEC_VADDUQM))]
+ "TARGET_VADDUQM"
+{
+ if (!TARGET_VSX_TIMODE)
+ {
+ rs6000_int128_builtin_fixup (operands, 3, V2DImode, UNSPEC_VADDUQM);
+ DONE;
+ }
+})
+
+(define_insn "altivec_vadduqm_<mode>"
+ [(set (match_operand:VINT128 0 "register_operand" "=v")
+ (unspec:VINT128 [(match_operand:VINT128 1 "register_operand" "v")
+ (match_operand:VINT128 2 "register_operand" "v")]
+ UNSPEC_VADDUQM))]
+ "TARGET_VADDUQM && VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "vadduqm %0,%1,%2"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_expand "altivec_vaddcuq"
+ [(set (match_operand:TI 0 "register_operand" "")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "v")
+ (match_operand:TI 2 "register_operand" "v")]
+ UNSPEC_VADDCUQ))]
+ "TARGET_VADDUQM"
+{
+ if (!TARGET_VSX_TIMODE)
+ {
+ rs6000_int128_builtin_fixup (operands, 3, V2DImode, UNSPEC_VADDCUQ);
+ DONE;
+ }
+})
+
+(define_insn "altivec_vaddcuq_<mode>"
+ [(set (match_operand:VINT128 0 "register_operand" "=v")
+ (unspec:VINT128 [(match_operand:VINT128 1 "register_operand" "v")
+ (match_operand:VINT128 2 "register_operand" "v")]
+ UNSPEC_VADDCUQ))]
+ "TARGET_VADDUQM && VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "vaddcuq %0,%1,%2"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_expand "altivec_vsubuqm"
+ [(set (match_operand:TI 0 "register_operand" "")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "v")
+ (match_operand:TI 2 "register_operand" "v")]
+ UNSPEC_VSUBUQM))]
+ "TARGET_VADDUQM"
+{
+ if (!TARGET_VSX_TIMODE)
+ {
+ rs6000_int128_builtin_fixup (operands, 3, V2DImode, UNSPEC_VSUBUQM);
+ DONE;
+ }
+})
+
+(define_insn "altivec_vsubuqm_<mode>"
+ [(set (match_operand:VINT128 0 "register_operand" "=v")
+ (unspec:VINT128 [(match_operand:VINT128 1 "register_operand" "v")
+ (match_operand:VINT128 2 "register_operand" "v")]
+ UNSPEC_VSUBUQM))]
+ "TARGET_VADDUQM && VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "vsubuqm %0,%1,%2"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_expand "altivec_vsubcuq"
+ [(set (match_operand:TI 0 "register_operand" "")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "v")
+ (match_operand:TI 2 "register_operand" "v")]
+ UNSPEC_VSUBCUQ))]
+ "TARGET_VADDUQM"
+{
+ if (!TARGET_VSX_TIMODE)
+ {
+ rs6000_int128_builtin_fixup (operands, 3, V2DImode, UNSPEC_VSUBCUQ);
+ DONE;
+ }
+})
+
+(define_insn "altivec_vsubcuq_<mode>"
+ [(set (match_operand:VINT128 0 "register_operand" "=v")
+ (unspec:VINT128 [(match_operand:VINT128 1 "register_operand" "v")
+ (match_operand:VINT128 2 "register_operand" "v")]
+ UNSPEC_VSUBCUQ))]
+ "TARGET_VADDUQM && VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "vsubcuq %0,%1,%2"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_expand "altivec_vaddeuqm"
+ [(set (match_operand:TI 0 "register_operand" "")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "v")
+ (match_operand:TI 2 "register_operand" "v")
+ (match_operand:TI 3 "register_operand" "v")]
+ UNSPEC_VADDEUQM))]
+ "TARGET_VADDUQM"
+{
+ if (!TARGET_VSX_TIMODE)
+ {
+ rs6000_int128_builtin_fixup (operands, 4, V2DImode, UNSPEC_VADDEUQM);
+ DONE;
+ }
+})
+
+(define_insn "altivec_vaddeuqm_<mode>"
+ [(set (match_operand:VINT128 0 "register_operand" "=v")
+ (unspec:VINT128 [(match_operand:VINT128 1 "register_operand" "v")
+ (match_operand:VINT128 2 "register_operand" "v")
+ (match_operand:VINT128 3 "register_operand" "v")]
+ UNSPEC_VADDEUQM))]
+ "TARGET_VADDUQM && VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "vaddeuqm %0,%1,%2,%3"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_expand "altivec_vaddecuq"
+ [(set (match_operand:TI 0 "register_operand" "")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "v")
+ (match_operand:TI 2 "register_operand" "v")
+ (match_operand:TI 3 "register_operand" "v")]
+ UNSPEC_VADDECUQ))]
+ "TARGET_VADDUQM"
+{
+ if (!TARGET_VSX_TIMODE)
+ {
+ rs6000_int128_builtin_fixup (operands, 4, V2DImode, UNSPEC_VADDECUQ);
+ DONE;
+ }
+})
+
+(define_insn "altivec_vaddecuq_<mode>"
+ [(set (match_operand:VINT128 0 "register_operand" "=v")
+ (unspec:VINT128 [(match_operand:VINT128 1 "register_operand" "v")
+ (match_operand:VINT128 2 "register_operand" "v")
+ (match_operand:VINT128 3 "register_operand" "v")]
+ UNSPEC_VADDECUQ))]
+ "TARGET_VADDUQM && VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "vaddecuq %0,%1,%2,%3"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_expand "altivec_vsubeuqm"
+ [(set (match_operand:TI 0 "register_operand" "")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "v")
+ (match_operand:TI 2 "register_operand" "v")
+ (match_operand:TI 3 "register_operand" "v")]
+ UNSPEC_VSUBEUQM))]
+ "TARGET_VADDUQM"
+{
+ if (!TARGET_VSX_TIMODE)
+ {
+ rs6000_int128_builtin_fixup (operands, 4, V2DImode, UNSPEC_VSUBEUQM);
+ DONE;
+ }
+})
+
+(define_insn "altivec_vsubeuqm_<mode>"
+ [(set (match_operand:VINT128 0 "register_operand" "=v")
+ (unspec:VINT128 [(match_operand:VINT128 1 "register_operand" "v")
+ (match_operand:VINT128 2 "register_operand" "v")
+ (match_operand:VINT128 3 "register_operand" "v")]
+ UNSPEC_VSUBEUQM))]
+ "TARGET_VADDUQM && VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "vsubeuqm %0,%1,%2,%3"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_expand "altivec_vsubecuq"
+ [(set (match_operand:TI 0 "register_operand" "")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "v")
+ (match_operand:TI 2 "register_operand" "v")
+ (match_operand:TI 3 "register_operand" "v")]
+ UNSPEC_VSUBECUQ))]
+ "TARGET_VADDUQM"
+{
+ if (!TARGET_VSX_TIMODE)
+ {
+ rs6000_int128_builtin_fixup (operands, 4, V2DImode, UNSPEC_VSUBECUQ);
+ DONE;
+ }
+})
+
+(define_insn "altivec_vsubecuq_<mode>"
+ [(set (match_operand:VINT128 0 "register_operand" "=v")
+ (unspec:VINT128 [(match_operand:VINT128 1 "register_operand" "v")
+ (match_operand:VINT128 2 "register_operand" "v")
+ (match_operand:VINT128 3 "register_operand" "v")]
+ UNSPEC_VSUBECUQ))]
+ "TARGET_VADDUQM && VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "vsubecuq %0,%1,%2,%3"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
===================================================================
@@ -10330,8 +10330,7 @@ (define_insn "*mov<mode>_ppc64"
[(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r")
(match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,n"))]
"(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
- && (gpc_reg_operand (operands[0], <MODE>mode)
- || gpc_reg_operand (operands[1], <MODE>mode)))"
+ && rs6000_move_128bit_ok_p (operands))"
{
return rs6000_output_move_128bit (operands);
}
@@ -10369,10 +10368,7 @@ (define_split
(define_split
[(set (match_operand:TI2 0 "nonimmediate_operand" "")
(match_operand:TI2 1 "input_operand" ""))]
- "reload_completed
- && gpr_or_gpr_p (operands[0], operands[1])
- && !direct_move_p (operands[0], operands[1])
- && !quad_load_store_p (operands[0], operands[1])"
+ "rs6000_split_128bit_ok_p (operands)"
[(pc)]
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
===================================================================
@@ -326,12 +326,19 @@
#define vec_eqv __builtin_vec_eqv
#define vec_nand __builtin_vec_nand
#define vec_orc __builtin_vec_orc
+#define vec_vaddcuq __builtin_vec_vaddcuq
#define vec_vaddudm __builtin_vec_vaddudm
+#define vec_vadduqm __builtin_vec_vadduqm
+#define vec_vbpermq __builtin_vec_vbpermq
#define vec_vclz __builtin_vec_vclz
#define vec_vclzb __builtin_vec_vclzb
#define vec_vclzd __builtin_vec_vclzd
#define vec_vclzh __builtin_vec_vclzh
#define vec_vclzw __builtin_vec_vclzw
+#define vec_vaddecuq __builtin_vec_vaddecuq
+#define vec_vaddeuqm __builtin_vec_vaddeuqm
+#define vec_vsubecuq __builtin_vec_vsubecuq
+#define vec_vsubeuqm __builtin_vec_vsubeuqm
#define vec_vgbbd __builtin_vec_vgbbd
#define vec_vmaxsd __builtin_vec_vmaxsd
#define vec_vmaxud __builtin_vec_vmaxud
@@ -352,7 +359,9 @@
#define vec_vsld __builtin_vec_vsld
#define vec_vsrad __builtin_vec_vsrad
#define vec_vsrd __builtin_vec_vsrd
+#define vec_vsubcuq __builtin_vec_vsubcuq
#define vec_vsubudm __builtin_vec_vsubudm
+#define vec_vsubuqm __builtin_vec_vsubuqm
#define vec_vupkhsw __builtin_vec_vupkhsw
#define vec_vupklsw __builtin_vec_vupklsw
#endif