diff mbox

, Add DFP, extended divide, BCD builtins to PowerPC

Message ID 20140422202119.GA6785@ibm-tiger.the-meissners.org
State New
Headers show

Commit Message

Michael Meissner April 22, 2014, 8:21 p.m. UTC
This patch adds the last set of instructions in the ISA 2.05 (power6), ISA 2.06
(power7), and ISA 2.07 (power8) specifications as builtins.  As far as I know,
this completes the set of user space instructions that users might want to use
to the GCC compiler.  In addition, I added some 128-bit pack and unpack
functions for the use of library maintainers that need to deal with the 128-bit
types as two 64-bit words.

The builtins added are:

1. Extended divide instructions added in ISA 2.06:
==================================================

int __builtin_divwe (int, int);
int __builtin_divweo (int, int);
unsigned int __builtin_divweu (unsigned int, unsigned int);
unsigned int __builtin_divweuo (unsigned int, unsigned int);

long __builtin_divde (long, long);
long __builtin_divdeo (long, long);
unsigned long __builtin_divdeu (unsigned long, unsigned long);
unsigned long __builtin_divdeuo (unsigned long, unsigned long);

2. BCD instructions added in ISA 2.06:
======================================

unsigned int cdtbcd (unsigned int);
unsigned int cbcdtd (unsigned int);
unsigned int addg6s (unsigned int, unsigned int);

3. Decimal floating point instructions added in ISA 2.05:
=========================================================

_Decimal64 __builtin_dxex (_Decimal64);
_Decimal128 __builtin_dxexq (_Decimal128);
_Decimal64 __builtin_ddedpd (int, _Decimal64);
_Decimal128 __builtin_ddedpdq (int, _Decimal128);
_Decimal64 __builtin_denbcd (int, _Decimal64);
_Decimal128 __builtin_denbcdq (int, _Decimal128);
_Decimal64 __builtin_diex (_Decimal64, _Decimal64);
_Decimal128 _builtin_diexq (_Decimal128, _Decimal128);
_Decimal64 __builtin_dscli (_Decimal64, int);
_Decimal128 __builitn_dscliq (_Decimal128, int);
_Decimal64 __builtin_dscri (_Decimal64, int);
_Decimal128 __builitn_dscriq (_Decimal128, int);

4. 128-bit pack/unpack functions:
=================================

double __builtin_unpack_longdouble (long double, int);
double __builtin_longdouble_dw0 (long double);
double __builtin_longdouble_dw1 (long double);
long double __builtin_pack_longdouble (double, double);

unsigned long long __builtin_unpack_dec128 (_Decimal128, int);
_Decimal128 __builtin_pack_dec128 (unsigned long long, unsigned long long);

unsigned long long __builtin_unpack_vector_int128 (vector __int128_t, int);
vector __int128_t __builtin_pack_vector_int128 (unsigned long long,
                                                unsigned long long);

5. BCD add/subtract instructions added in ISA 2.07:
===================================================

vector __int128_t __builtin_bcdadd (vector __int128_t, vector__int128_t);
int __builtin_bcdadd_lt (vector __int128_t, vector__int128_t);
int __builtin_bcdadd_eq (vector __int128_t, vector__int128_t);
int __builtin_bcdadd_gt (vector __int128_t, vector__int128_t);
int __builtin_bcdadd_ov (vector __int128_t, vector__int128_t);
vector __int128_t bcdsub (vector __int128_t, vector__int128_t);
int __builtin_bcdsub_lt (vector __int128_t, vector__int128_t);
int __builtin_bcdsub_eq (vector __int128_t, vector__int128_t);
int __builtin_bcdsub_gt (vector __int128_t, vector__int128_t);
int __builtin_bcdsub_ov (vector __int128_t, vector__int128_t);

Note, there are two minor bugs that I found in writing the test cases, that I
will submit as bugzillas to be fixed eventually.

The first bug is the PowerPC extension: 'vector __int128_t' does not seem to
work with LTO, which shows up when I wrote the test pack01.c.  I suspect this
is some problem with the way 'vector' is implemented as a conditional keyword,
along with the C family private type __int128_t that doesn't work together.  I
used the vector_size attribute instead of the 'vector' keyword to work around
this.

The second bug is that pack01.c won't generate the correct code on power7
little endian (the words get swapped in going between GPRs and VSX registers).
On power8 it works, since we have the direct move instructions.  Since little
endian powerpc 64-bit is officially only supported on power8 and above, I just
made the test require power8.

I've done bootstrap and make check with no regressions on power7 big endian,
power8 big endian, and power8 little endian systems.  Can I install the
patches?  I would prefer to upgrade both 4.8 and 4.9 as well, since we have
moved the power8 enablement to those branches.

If thse patches are too big, I can re-forumulate them as a series of smaller
patches.

Comments

David Edelsohn April 24, 2014, 6:42 p.m. UTC | #1
On Tue, Apr 22, 2014 at 4:21 PM, Michael Meissner
<meissner@linux.vnet.ibm.com> wrote:
> This patch adds the last set of instructions in the ISA 2.05 (power6), ISA 2.06
> (power7), and ISA 2.07 (power8) specifications as builtins.  As far as I know,
> this completes the set of user space instructions that users might want to use
> to the GCC compiler.  In addition, I added some 128-bit pack and unpack
> functions for the use of library maintainers that need to deal with the 128-bit
> types as two 64-bit words.
>
> The builtins added are:
>
> 1. Extended divide instructions added in ISA 2.06:
> ==================================================
>
> int __builtin_divwe (int, int);
> int __builtin_divweo (int, int);
> unsigned int __builtin_divweu (unsigned int, unsigned int);
> unsigned int __builtin_divweuo (unsigned int, unsigned int);
>
> long __builtin_divde (long, long);
> long __builtin_divdeo (long, long);
> unsigned long __builtin_divdeu (unsigned long, unsigned long);
> unsigned long __builtin_divdeuo (unsigned long, unsigned long);
>
> 2. BCD instructions added in ISA 2.06:
> ======================================
>
> unsigned int cdtbcd (unsigned int);
> unsigned int cbcdtd (unsigned int);
> unsigned int addg6s (unsigned int, unsigned int);
>
> 3. Decimal floating point instructions added in ISA 2.05:
> =========================================================
>
> _Decimal64 __builtin_dxex (_Decimal64);
> _Decimal128 __builtin_dxexq (_Decimal128);
> _Decimal64 __builtin_ddedpd (int, _Decimal64);
> _Decimal128 __builtin_ddedpdq (int, _Decimal128);
> _Decimal64 __builtin_denbcd (int, _Decimal64);
> _Decimal128 __builtin_denbcdq (int, _Decimal128);
> _Decimal64 __builtin_diex (_Decimal64, _Decimal64);
> _Decimal128 _builtin_diexq (_Decimal128, _Decimal128);
> _Decimal64 __builtin_dscli (_Decimal64, int);
> _Decimal128 __builitn_dscliq (_Decimal128, int);
> _Decimal64 __builtin_dscri (_Decimal64, int);
> _Decimal128 __builitn_dscriq (_Decimal128, int);
>
> 4. 128-bit pack/unpack functions:
> =================================
>
> double __builtin_unpack_longdouble (long double, int);
> double __builtin_longdouble_dw0 (long double);
> double __builtin_longdouble_dw1 (long double);
> long double __builtin_pack_longdouble (double, double);
>
> unsigned long long __builtin_unpack_dec128 (_Decimal128, int);
> _Decimal128 __builtin_pack_dec128 (unsigned long long, unsigned long long);
>
> unsigned long long __builtin_unpack_vector_int128 (vector __int128_t, int);
> vector __int128_t __builtin_pack_vector_int128 (unsigned long long,
>                                                 unsigned long long);
>
> 5. BCD add/subtract instructions added in ISA 2.07:
> ===================================================
>
> vector __int128_t __builtin_bcdadd (vector __int128_t, vector__int128_t);
> int __builtin_bcdadd_lt (vector __int128_t, vector__int128_t);
> int __builtin_bcdadd_eq (vector __int128_t, vector__int128_t);
> int __builtin_bcdadd_gt (vector __int128_t, vector__int128_t);
> int __builtin_bcdadd_ov (vector __int128_t, vector__int128_t);
> vector __int128_t bcdsub (vector __int128_t, vector__int128_t);
> int __builtin_bcdsub_lt (vector __int128_t, vector__int128_t);
> int __builtin_bcdsub_eq (vector __int128_t, vector__int128_t);
> int __builtin_bcdsub_gt (vector __int128_t, vector__int128_t);
> int __builtin_bcdsub_ov (vector __int128_t, vector__int128_t);
>
> Note, there are two minor bugs that I found in writing the test cases, that I
> will submit as bugzillas to be fixed eventually.
>
> The first bug is the PowerPC extension: 'vector __int128_t' does not seem to
> work with LTO, which shows up when I wrote the test pack01.c.  I suspect this
> is some problem with the way 'vector' is implemented as a conditional keyword,
> along with the C family private type __int128_t that doesn't work together.  I
> used the vector_size attribute instead of the 'vector' keyword to work around
> this.
>
> The second bug is that pack01.c won't generate the correct code on power7
> little endian (the words get swapped in going between GPRs and VSX registers).
> On power8 it works, since we have the direct move instructions.  Since little
> endian powerpc 64-bit is officially only supported on power8 and above, I just
> made the test require power8.
>
> I've done bootstrap and make check with no regressions on power7 big endian,
> power8 big endian, and power8 little endian systems.  Can I install the
> patches?  I would prefer to upgrade both 4.8 and 4.9 as well, since we have
> moved the power8 enablement to those branches.

This patch is okay.

Please allow a few days on trunk before backporting to ensure that it
does not expose problem on other configurations.

The div[dw]e instructions could be utilized by divtidi3 and divdisi3,
but GCC does not have the infrastructure for that.

Thanks, David
diff mbox

Patch

Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi	(revision 209549)
+++ gcc/doc/extend.texi	(working copy)
@@ -12787,9 +12787,12 @@  float __builtin_recipdivf (float, float)
 float __builtin_rsqrtf (float);
 double __builtin_recipdiv (double, double);
 double __builtin_rsqrt (double);
-long __builtin_bpermd (long, long);
 uint64_t __builtin_ppc_get_timebase ();
 unsigned long __builtin_ppc_mftb ();
+double __builtin_unpack_longdouble (long double, int);
+double __builtin_longdouble_dw0 (long double);
+double __builtin_longdouble_dw1 (long double);
+long double __builtin_pack_longdouble (double, double);
 @end smallexample
 
 The @code{vec_rsqrt}, @code{__builtin_rsqrt}, and
@@ -12809,6 +12812,57 @@  The @code{__builtin_ppc_mftb} function a
 returns the Time Base Register value as an unsigned long, throwing away
 the most significant word on 32-bit environments.
 
+The following built-in functions are available for the PowerPC family
+of processors, starting with ISA 2.06 or later (@option{-mcpu=power7}
+or @option{-mpopcntd}):
+@smallexample
+long __builtin_bpermd (long, long);
+int __builtin_divwe (int, int);
+int __builtin_divweo (int, int);
+unsigned int __builtin_divweu (unsigned int, unsigned int);
+unsigned int __builtin_divweuo (unsigned int, unsigned int);
+long __builtin_divde (long, long);
+long __builtin_divdeo (long, long);
+unsigned long __builtin_divdeu (unsigned long, unsigned long);
+unsigned long __builtin_divdeuo (unsigned long, unsigned long);
+unsigned int cdtbcd (unsigned int);
+unsigned int cbcdtd (unsigned int);
+unsigned int addg6s (unsigned int, unsigned int);
+@end smallexample
+
+The @code{__builtin_divde}, @code{__builtin_divdeo},
+@code{__builitin_divdeu}, @code{__builtin_divdeou} functions require a
+64-bit environment support ISA 2.06 or later.
+
+The following built-in functions are available for the PowerPC family
+of processors when hardware decimal floating point
+(@option{-mhard-dfp}) is available:
+@smallexample
+_Decimal64 __builtin_dxex (_Decimal64);
+_Decimal128 __builtin_dxexq (_Decimal128);
+_Decimal64 __builtin_ddedpd (int, _Decimal64);
+_Decimal128 __builtin_ddedpdq (int, _Decimal128);
+_Decimal64 __builtin_denbcd (int, _Decimal64);
+_Decimal128 __builtin_denbcdq (int, _Decimal128);
+_Decimal64 __builtin_diex (_Decimal64, _Decimal64);
+_Decimal128 _builtin_diexq (_Decimal128, _Decimal128);
+_Decimal64 __builtin_dscli (_Decimal64, int);
+_Decimal128 __builitn_dscliq (_Decimal128, int);
+_Decimal64 __builtin_dscri (_Decimal64, int);
+_Decimal128 __builitn_dscriq (_Decimal128, int);
+unsigned long long __builtin_unpack_dec128 (_Decimal128, int);
+_Decimal128 __builtin_pack_dec128 (unsigned long long, unsigned long long);
+@end smallexample
+
+The following built-in functions are available for the PowerPC family
+of processors when the Vector Scalar (vsx) instruction set is
+available:
+@smallexample
+unsigned long long __builtin_unpack_vector_int128 (vector __int128_t, int);
+vector __int128_t __builtin_pack_vector_int128 (unsigned long long,
+                                                unsigned long long);
+@end smallexample
+
 @node PowerPC AltiVec/VSX Built-in Functions
 @subsection PowerPC AltiVec Built-in Functions
 
@@ -15220,6 +15274,17 @@  vector __uint128_t vec_vsubcuq (vector _
 
 __int128_t vec_vsubuqm (__int128_t, __int128_t);
 __uint128_t vec_vsubuqm (__uint128_t, __uint128_t);
+
+vector __int128_t __builtin_bcdadd (vector __int128_t, vector__int128_t);
+int __builtin_bcdadd_lt (vector __int128_t, vector__int128_t);
+int __builtin_bcdadd_eq (vector __int128_t, vector__int128_t);
+int __builtin_bcdadd_gt (vector __int128_t, vector__int128_t);
+int __builtin_bcdadd_ov (vector __int128_t, vector__int128_t);
+vector __int128_t bcdsub (vector __int128_t, vector__int128_t);
+int __builtin_bcdsub_lt (vector __int128_t, vector__int128_t);
+int __builtin_bcdsub_eq (vector __int128_t, vector__int128_t);
+int __builtin_bcdsub_gt (vector __int128_t, vector__int128_t);
+int __builtin_bcdsub_ov (vector __int128_t, vector__int128_t);
 @end smallexample
 
 If the cryptographic instructions are enabled (@option{-mcrypto} or
Index: gcc/config/rs6000/predicates.md
===================================================================
--- gcc/config/rs6000/predicates.md	(revision 209549)
+++ gcc/config/rs6000/predicates.md	(working copy)
@@ -171,6 +171,11 @@  (define_predicate "const_0_to_1_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 1)")))
 
+;; Match op = 0..3.
+(define_predicate "const_0_to_3_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 3)")))
+
 ;; Match op = 2 or op = 3.
 (define_predicate "const_2_to_3_operand"
   (and (match_code "const_int")
Index: gcc/config/rs6000/rs6000-builtin.def
===================================================================
--- gcc/config/rs6000/rs6000-builtin.def	(revision 209549)
+++ gcc/config/rs6000/rs6000-builtin.def	(working copy)
@@ -570,6 +570,75 @@ 
 		    MASK,				/* MASK */	\
 		    (ATTR | RS6000_BTC_SPECIAL),	/* ATTR */	\
 		    CODE_FOR_nothing)			/* ICODE */
+
+
+/* Decimal floating point builtins for instructions.  */
+#define BU_DFP_MISC_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_DFP,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_DFP_MISC_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_DFP,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+
+/* Miscellaneous builtins for instructions added in ISA 2.06.  These
+   instructions don't require either the DFP or VSX options, just the basic ISA
+   2.06 (popcntd) enablement since they operate on general purpose
+   registers.  */
+#define BU_P7_MISC_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_POPCNTD,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P7_MISC_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_POPCNTD,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+
+/* Miscellaneous builtins for instructions added in ISA 2.07.  These
+   instructions do require the ISA 2.07 vector support, but they aren't vector
+   instructions.  */
+#define BU_P8V_MISC_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_3 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+/* Miscellaneous builtins.  */
+#define BU_MISC_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_ALWAYS,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_MISC_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_ALWAYS,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
 #endif
 
 /* Insure 0 is not a legitimate index.  */
@@ -1412,10 +1481,10 @@  BU_P8V_AV_2 (ORC_V4SF,		"orc_v4sf",	CONS
 BU_P8V_AV_2 (ORC_V2DF,		"orc_v2df",	CONST,	orcv2df3)
 
 /* 3 argument altivec instructions added in ISA 2.07.  */
-BU_P8V_AV_3 (VADDEUQM,		"vaddeuqm",	 CONST,	altivec_vaddeuqm)
-BU_P8V_AV_3 (VADDECUQ,		"vaddecuq",	 CONST,	altivec_vaddecuq)
-BU_P8V_AV_3 (VSUBEUQM,		"vsubeuqm",	 CONST,	altivec_vsubeuqm)
-BU_P8V_AV_3 (VSUBECUQ,		"vsubecuq",	 CONST,	altivec_vsubecuq)
+BU_P8V_AV_3 (VADDEUQM,		"vaddeuqm",	CONST,	altivec_vaddeuqm)
+BU_P8V_AV_3 (VADDECUQ,		"vaddecuq",	CONST,	altivec_vaddecuq)
+BU_P8V_AV_3 (VSUBEUQM,		"vsubeuqm",	CONST,	altivec_vsubeuqm)
+BU_P8V_AV_3 (VSUBECUQ,		"vsubecuq",	CONST,	altivec_vsubecuq)
 
 /* Vector comparison instructions added in ISA 2.07.  */
 BU_P8V_AV_2 (VCMPEQUD,		"vcmpequd",	CONST,	vector_eqv2di)
@@ -1475,6 +1544,64 @@  BU_P8V_OVERLOAD_3 (VSUBECUQ,	"vsubecuq")
 BU_P8V_OVERLOAD_3 (VSUBEUQM,	"vsubeuqm")
 
 
+/* 2 argument extended divide functions added in ISA 2.06.  */
+BU_P7_MISC_2 (DIVWE,		"divwe",	CONST,	dive_si)
+BU_P7_MISC_2 (DIVWEO,		"divweo",	CONST,	diveo_si)
+BU_P7_MISC_2 (DIVWEU,		"divweu",	CONST,	diveu_si)
+BU_P7_MISC_2 (DIVWEUO,		"divweuo",	CONST,	diveuo_si)
+BU_P7_MISC_2 (DIVDE,		"divde",	CONST,	dive_di)
+BU_P7_MISC_2 (DIVDEO,		"divdeo",	CONST,	diveo_di)
+BU_P7_MISC_2 (DIVDEU,		"divdeu",	CONST,	diveu_di)
+BU_P7_MISC_2 (DIVDEUO,		"divdeuo",	CONST,	diveuo_di)
+
+/* 1 argument DFP (decimal floating point) functions added in ISA 2.05.  */
+BU_DFP_MISC_1 (DXEX,		"dxex",		CONST,	dfp_dxex_dd)
+BU_DFP_MISC_1 (DXEXQ,		"dxexq",	CONST,	dfp_dxex_td)
+
+/* 2 argument DFP (decimal floating point) functions added in ISA 2.05.  */
+BU_DFP_MISC_2 (DDEDPD,		"ddedpd",	CONST,	dfp_ddedpd_dd)
+BU_DFP_MISC_2 (DDEDPDQ,		"ddedpdq",	CONST,	dfp_ddedpd_td)
+BU_DFP_MISC_2 (DENBCD,		"denbcd",	CONST,	dfp_denbcd_dd)
+BU_DFP_MISC_2 (DENBCDQ,		"denbcdq",	CONST,	dfp_denbcd_td)
+BU_DFP_MISC_2 (DIEX,		"diex",		CONST,	dfp_diex_dd)
+BU_DFP_MISC_2 (DIEXQ,		"diexq",	CONST,	dfp_diex_td)
+BU_DFP_MISC_2 (DSCLI,		"dscli",	CONST,	dfp_dscli_dd)
+BU_DFP_MISC_2 (DSCLIQ,		"dscliq",	CONST,	dfp_dscli_td)
+BU_DFP_MISC_2 (DSCRI,		"dscri",	CONST,	dfp_dscri_dd)
+BU_DFP_MISC_2 (DSCRIQ,		"dscriq",	CONST,	dfp_dscri_td)
+
+/* 1 argument BCD functions added in ISA 2.06.  */
+BU_P7_MISC_1 (CDTBCD,		"cdtbcd",	CONST,	cdtbcd)
+BU_P7_MISC_1 (CBCDTD,		"cbcdtd",	CONST,	cbcdtd)
+
+/* 2 argument BCD functions added in ISA 2.06.  */
+BU_P7_MISC_2 (ADDG6S,		"addg6s",	CONST,	addg6s)
+
+/* 3 argument BCD functions added in ISA 2.07.  */
+BU_P8V_MISC_3 (BCDADD,		"bcdadd",	CONST,	bcdadd)
+BU_P8V_MISC_3 (BCDADD_LT,	"bcdadd_lt",	CONST,	bcdadd_lt)
+BU_P8V_MISC_3 (BCDADD_EQ,	"bcdadd_eq",	CONST,	bcdadd_eq)
+BU_P8V_MISC_3 (BCDADD_GT,	"bcdadd_gt",	CONST,	bcdadd_gt)
+BU_P8V_MISC_3 (BCDADD_OV,	"bcdadd_ov",	CONST,	bcdadd_unordered)
+BU_P8V_MISC_3 (BCDSUB,		"bcdsub",	CONST,	bcdsub)
+BU_P8V_MISC_3 (BCDSUB_LT,	"bcdsub_lt",	CONST,	bcdsub_lt)
+BU_P8V_MISC_3 (BCDSUB_EQ,	"bcdsub_eq",	CONST,	bcdsub_eq)
+BU_P8V_MISC_3 (BCDSUB_GT,	"bcdsub_gt",	CONST,	bcdsub_gt)
+BU_P8V_MISC_3 (BCDSUB_OV,	"bcdsub_ov",	CONST,	bcdsub_unordered)
+
+/* 2 argument pack/unpack 128-bit floating point types.  */
+BU_DFP_MISC_2 (PACK_TD,		"pack_dec128",		CONST,	packtd)
+BU_DFP_MISC_2 (UNPACK_TD,	"unpack_dec128",	CONST,	unpacktd)
+
+BU_MISC_2 (PACK_TF,		"pack_longdouble",	CONST,	packtf)
+BU_MISC_2 (UNPACK_TF,		"unpack_longdouble",	CONST,	unpacktf)
+BU_MISC_1 (UNPACK_TF_0,		"longdouble_dw0",	CONST,	unpacktf_0)
+BU_MISC_1 (UNPACK_TF_1,		"longdouble_dw1",	CONST,	unpacktf_1)
+
+BU_P7_MISC_2 (PACK_V1TI,	"pack_vector_int128",	CONST,	packv1ti)
+BU_P7_MISC_2 (UNPACK_V1TI,	"unpack_vector_int128",	CONST,	unpackv1ti)
+
+
 /* 1 argument crypto functions.  */
 BU_CRYPTO_1 (VSBOX,		"vsbox",	  CONST, crypto_vsbox)
 
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 209549)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -3038,7 +3038,8 @@  rs6000_builtin_mask_calculate (void)
 	  | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL      : 0)
 	  | ((TARGET_P8_VECTOR)		    ? RS6000_BTM_P8_VECTOR : 0)
 	  | ((TARGET_CRYPTO)		    ? RS6000_BTM_CRYPTO	   : 0)
-	  | ((TARGET_HTM)		    ? RS6000_BTM_HTM	   : 0));
+	  | ((TARGET_HTM)		    ? RS6000_BTM_HTM	   : 0)
+	  | ((TARGET_DFP)		    ? RS6000_BTM_DFP	   : 0));
 }
 
 /* Override command line options.  Mostly we process the processor type and
@@ -12402,7 +12403,15 @@  rs6000_expand_ternop_builtin (enum insn_
 	}
     }
   else if (icode == CODE_FOR_vsx_set_v2df
-           || icode == CODE_FOR_vsx_set_v2di)
+           || icode == CODE_FOR_vsx_set_v2di
+	   || icode == CODE_FOR_bcdadd
+	   || icode == CODE_FOR_bcdadd_lt
+	   || icode == CODE_FOR_bcdadd_eq
+	   || icode == CODE_FOR_bcdadd_gt
+	   || icode == CODE_FOR_bcdsub
+	   || icode == CODE_FOR_bcdsub_lt
+	   || icode == CODE_FOR_bcdsub_eq
+	   || icode == CODE_FOR_bcdsub_gt)
     {
       /* Only allow 1-bit unsigned literals.  */
       STRIP_NOPS (arg2);
@@ -12413,6 +12422,44 @@  rs6000_expand_ternop_builtin (enum insn_
 	  return const0_rtx;
 	}
     }
+  else if (icode == CODE_FOR_dfp_ddedpd_dd
+           || icode == CODE_FOR_dfp_ddedpd_td)
+    {
+      /* Only allow 2-bit unsigned literals where the value is 0 or 2.  */
+      STRIP_NOPS (arg0);
+      if (TREE_CODE (arg0) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg2) & ~0x3)
+	{
+	  error ("argument 1 must be 0 or 2");
+	  return const0_rtx;
+	}
+    }
+  else if (icode == CODE_FOR_dfp_denbcd_dd
+	   || icode == CODE_FOR_dfp_denbcd_td)
+    {
+      /* Only allow 1-bit unsigned literals.  */
+      STRIP_NOPS (arg0);
+      if (TREE_CODE (arg0) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg0) & ~0x1)
+	{
+	  error ("argument 1 must be a 1-bit unsigned literal");
+	  return const0_rtx;
+	}
+    }
+  else if (icode == CODE_FOR_dfp_dscli_dd
+           || icode == CODE_FOR_dfp_dscli_td
+	   || icode == CODE_FOR_dfp_dscri_dd
+	   || icode == CODE_FOR_dfp_dscri_td)
+    {
+      /* Only allow 6-bit unsigned literals.  */
+      STRIP_NOPS (arg1);
+      if (TREE_CODE (arg1) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg1) & ~0x3f)
+	{
+	  error ("argument 2 must be a 6-bit unsigned literal");
+	  return const0_rtx;
+	}
+    }
   else if (icode == CODE_FOR_crypto_vshasigmaw
 	   || icode == CODE_FOR_crypto_vshasigmad)
     {
@@ -13504,6 +13551,14 @@  rs6000_invalid_builtin (enum rs6000_buil
     error ("Builtin function %s requires the -mpaired option", name);
   else if ((fnmask & RS6000_BTM_SPE) != 0)
     error ("Builtin function %s requires the -mspe option", name);
+  else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
+	   == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
+    error ("Builtin function %s requires the -mhard-dfp and"
+	   "-mpower8-vector options", name);
+  else if ((fnmask & RS6000_BTM_DFP) != 0)
+    error ("Builtin function %s requires the -mhard-dfp option", name);
+  else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
+    error ("Builtin function %s requires the -mpower8-vector option", name);
   else
     error ("Builtin function %s is not supported with the current options",
 	   name);
@@ -13783,6 +13838,9 @@  rs6000_init_builtins (void)
   uintTI_type_internal_node = unsigned_intTI_type_node;
   float_type_internal_node = float_type_node;
   double_type_internal_node = double_type_node;
+  long_double_type_internal_node = long_double_type_node;
+  dfloat64_type_internal_node = dfloat64_type_node;
+  dfloat128_type_internal_node = dfloat128_type_node;
   void_type_internal_node = void_type_node;
 
   /* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -13797,6 +13855,9 @@  rs6000_init_builtins (void)
   builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
   builtin_mode_to_type[SFmode][0] = float_type_node;
   builtin_mode_to_type[DFmode][0] = double_type_node;
+  builtin_mode_to_type[TFmode][0] = long_double_type_node;
+  builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
+  builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
   builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
   builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
   builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
@@ -14889,6 +14950,8 @@  builtin_function_type (enum machine_mode
       /* unsigned 1 argument functions.  */
     case CRYPTO_BUILTIN_VSBOX:
     case P8V_BUILTIN_VGBBD:
+    case MISC_BUILTIN_CDTBCD:
+    case MISC_BUILTIN_CBCDTD:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       break;
@@ -14907,6 +14970,11 @@  builtin_function_type (enum machine_mode
     case CRYPTO_BUILTIN_VPMSUMW:
     case CRYPTO_BUILTIN_VPMSUMD:
     case CRYPTO_BUILTIN_VPMSUM:
+    case MISC_BUILTIN_ADDG6S:
+    case MISC_BUILTIN_DIVWEU:
+    case MISC_BUILTIN_DIVWEUO:
+    case MISC_BUILTIN_DIVDEU:
+    case MISC_BUILTIN_DIVDEUO:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
@@ -14968,9 +15036,18 @@  builtin_function_type (enum machine_mode
       /* signed args, unsigned return.  */
     case VSX_BUILTIN_XVCVDPUXDS_UNS:
     case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
+    case MISC_BUILTIN_UNPACK_TD:
+    case MISC_BUILTIN_UNPACK_V1TI:
       h.uns_p[0] = 1;
       break;
 
+      /* unsigned arguments for 128-bit pack instructions.  */
+    case MISC_BUILTIN_PACK_TD:
+    case MISC_BUILTIN_PACK_V1TI:
+      h.uns_p[1] = 1;
+      h.uns_p[2] = 1;
+      break;
+
     default:
       break;
     }
@@ -31224,6 +31301,7 @@  static struct rs6000_opt_mask const rs60
   { "power8-vector",	 RS6000_BTM_P8_VECTOR,	false, false },
   { "crypto",		 RS6000_BTM_CRYPTO,	false, false },
   { "htm",		 RS6000_BTM_HTM,	false, false },
+  { "hard-dfp",		 RS6000_BTM_DFP,	false, false },
 };
 
 /* Option variables that we want to support inside attribute((target)) and
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h	(revision 209549)
+++ gcc/config/rs6000/rs6000.h	(working copy)
@@ -2516,6 +2516,7 @@  extern int frame_pointer_needed;
 #define RS6000_BTM_FRSQRTES	MASK_POPCNTB	/* FRSQRTES instruction.  */
 #define RS6000_BTM_POPCNTD	MASK_POPCNTD	/* Target supports ISA 2.06.  */
 #define RS6000_BTM_CELL		MASK_FPRND	/* Target is cell powerpc.  */
+#define RS6000_BTM_DFP		MASK_DFP	/* Decimal floating point.  */
 
 #define RS6000_BTM_COMMON	(RS6000_BTM_ALTIVEC			\
 				 | RS6000_BTM_VSX			\
@@ -2527,7 +2528,8 @@  extern int frame_pointer_needed;
 				 | RS6000_BTM_FRSQRTES			\
 				 | RS6000_BTM_HTM			\
 				 | RS6000_BTM_POPCNTD			\
-				 | RS6000_BTM_CELL)
+				 | RS6000_BTM_CELL			\
+				 | RS6000_BTM_DFP)
 
 /* Define builtin enum index.  */
 
@@ -2622,6 +2624,9 @@  enum rs6000_builtin_type_index
   RS6000_BTI_UINTTI,		 /* unsigned_intTI_type_node */
   RS6000_BTI_float,	         /* float_type_node */
   RS6000_BTI_double,	         /* double_type_node */
+  RS6000_BTI_long_double,        /* long_double_type_node */
+  RS6000_BTI_dfloat64,		 /* dfloat64_type_node */
+  RS6000_BTI_dfloat128,		 /* dfloat128_type_node */
   RS6000_BTI_void,	         /* void_type_node */
   RS6000_BTI_MAX
 };
@@ -2673,6 +2678,9 @@  enum rs6000_builtin_type_index
 #define uintTI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTTI])
 #define float_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_float])
 #define double_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_double])
+#define long_double_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_long_double])
+#define dfloat64_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_dfloat64])
+#define dfloat128_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_dfloat128])
 #define void_type_internal_node		 (rs6000_builtin_types[RS6000_BTI_void])
 
 extern GTY(()) tree rs6000_builtin_types[RS6000_BTI_MAX];
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md	(revision 209549)
+++ gcc/config/rs6000/altivec.md	(working copy)
@@ -143,6 +143,9 @@  (define_c_enum "unspec"
    UNSPEC_VSUBEUQM
    UNSPEC_VSUBECUQ
    UNSPEC_VBPERMQ
+   UNSPEC_BCDADD
+   UNSPEC_BCDSUB
+   UNSPEC_BCD_OVERFLOW
 ])
 
 (define_c_enum "unspecv"
@@ -3334,3 +3337,112 @@  (define_insn "altivec_vbpermq"
   "vbpermq %0,%1,%2"
   [(set_attr "length" "4")
    (set_attr "type" "vecsimple")])
+
+;; Decimal Integer operations
+(define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB])
+
+(define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add")
+			      (UNSPEC_BCDSUB "sub")])
+
+(define_code_iterator BCD_TEST [eq lt gt unordered])
+
+(define_insn "bcd<bcd_add_sub>"
+  [(set (match_operand:V1TI 0 "register_operand" "")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "")
+		      (match_operand:V1TI 2 "register_operand" "")
+		      (match_operand:QI 3 "const_0_to_1_operand" "")]
+		     UNSPEC_BCD_ADD_SUB))
+   (clobber (reg:CCFP 74))]
+  "TARGET_P8_VECTOR"
+  "bcd<bcd_add_sub>. %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Use a floating point type (V2DFmode) for the compare to set CR6 so that we
+;; can use the unordered test for BCD nans and add/subtracts that overflow.  An
+;; UNORDERED test on an integer type (like V1TImode) is not defined.  The type
+;; probably should be one that can go in the VMX (Altivec) registers, so we
+;; can't use DDmode or DFmode.
+(define_insn "*bcd<bcd_add_sub>_test"
+  [(set (reg:CCFP 74)
+	(compare:CCFP
+	 (unspec:V2DF [(match_operand:V1TI 1 "register_operand" "v")
+		       (match_operand:V1TI 2 "register_operand" "v")
+		       (match_operand:QI 3 "const_0_to_1_operand" "i")]
+		      UNSPEC_BCD_ADD_SUB)
+	 (match_operand:V2DF 4 "zero_constant" "j")))
+   (clobber (match_scratch:V1TI 0 "=v"))]
+  "TARGET_P8_VECTOR"
+  "bcd<bcd_add_sub>. %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "*bcd<bcd_add_sub>_test2"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")
+		      (match_operand:QI 3 "const_0_to_1_operand" "i")]
+		     UNSPEC_BCD_ADD_SUB))
+   (set (reg:CCFP 74)
+	(compare:CCFP
+	 (unspec:V2DF [(match_dup 1)
+		       (match_dup 2)
+		       (match_dup 3)]
+		      UNSPEC_BCD_ADD_SUB)
+	 (match_operand:V2DF 4 "zero_constant" "j")))]
+  "TARGET_P8_VECTOR"
+  "bcd<bcd_add_sub>. %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_expand "bcd<bcd_add_sub>_<code>"
+  [(parallel [(set (reg:CCFP 74)
+		   (compare:CCFP
+		    (unspec:V2DF [(match_operand:V1TI 1 "register_operand" "")
+				  (match_operand:V1TI 2 "register_operand" "")
+				  (match_operand:QI 3 "const_0_to_1_operand" "")]
+				 UNSPEC_BCD_ADD_SUB)
+		    (match_dup 4)))
+	      (clobber (match_scratch:V1TI 5 ""))])
+   (set (match_operand:SI 0 "register_operand" "")
+	(BCD_TEST:SI (reg:CCFP 74)
+		     (const_int 0)))]
+  "TARGET_P8_VECTOR"
+{
+  operands[4] = CONST0_RTX (V2DFmode);
+})
+
+;; Peephole2 pattern to combine a bcdadd/bcdsub that calculates the value and
+;; the bcdadd/bcdsub that tests the value.  The combiner won't work since
+;; CR6 is a hard coded register.  Unfortunately, all of the Altivec predicate
+;; support is hard coded to use the fixed register CR6 instead of creating
+;; a register class for CR6.
+
+(define_peephole2
+  [(parallel [(set (match_operand:V1TI 0 "register_operand" "")
+		   (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "")
+				 (match_operand:V1TI 2 "register_operand" "")
+				 (match_operand:QI 3 "const_0_to_1_operand" "")]
+				UNSPEC_BCD_ADD_SUB))
+	      (clobber (reg:CCFP 74))])
+   (parallel [(set (reg:CCFP 74)
+		   (compare:CCFP
+		    (unspec:V2DF [(match_dup 1)
+				  (match_dup 2)
+				  (match_dup 3)]
+				 UNSPEC_BCD_ADD_SUB)
+		    (match_operand:V2DF 4 "zero_constant" "")))
+	      (clobber (match_operand:V1TI 5 "register_operand" ""))])]
+  "TARGET_P8_VECTOR"
+  [(parallel [(set (match_dup 0)
+		   (unspec:V1TI [(match_dup 1)
+				 (match_dup 2)
+				 (match_dup 3)]
+				UNSPEC_BCD_ADD_SUB))
+	      (set (reg:CCFP 74)
+		   (compare:CCFP
+		    (unspec:V2DF [(match_dup 1)
+				  (match_dup 2)
+				  (match_dup 3)]
+				 UNSPEC_BCD_ADD_SUB)
+		    (match_dup 4)))])])
Index: gcc/config/rs6000/dfp.md
===================================================================
--- gcc/config/rs6000/dfp.md	(revision 209549)
+++ gcc/config/rs6000/dfp.md	(working copy)
@@ -322,3 +322,72 @@  (define_insn "fixtddi2"
   "TARGET_DFP"
   "dctfixq %0,%1"
   [(set_attr "type" "fp")])
+
+
+;; Decimal builtin support
+
+(define_c_enum "unspec"
+  [UNSPEC_DDEDPD
+   UNSPEC_DENBCD
+   UNSPEC_DXEX
+   UNSPEC_DIEX
+   UNSPEC_DSCLI
+   UNSPEC_DSCRI])
+
+(define_mode_iterator D64_D128 [DD TD])
+
+(define_mode_attr dfp_suffix [(DD "")
+			      (TD "q")])
+
+(define_insn "dfp_ddedpd_<mode>"
+  [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
+	(unspec:D64_D128 [(match_operand:QI 1 "const_0_to_3_operand" "i")
+			  (match_operand:D64_D128 2 "gpc_reg_operand" "d")]
+			 UNSPEC_DDEDPD))]
+  "TARGET_DFP"
+  "ddedpd<dfp_suffix> %1,%0,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "dfp_denbcd_<mode>"
+  [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
+	(unspec:D64_D128 [(match_operand:QI 1 "const_0_to_1_operand" "i")
+			  (match_operand:D64_D128 2 "gpc_reg_operand" "d")]
+			 UNSPEC_DENBCD))]
+  "TARGET_DFP"
+  "denbcd<dfp_suffix> %1,%0,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "dfp_dxex_<mode>"
+  [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
+	(unspec:D64_D128 [(match_operand:D64_D128 1 "gpc_reg_operand" "d")]
+			 UNSPEC_DXEX))]
+  "TARGET_DFP"
+  "dxex<dfp_suffix> %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "dfp_diex_<mode>"
+  [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
+	(unspec:D64_D128 [(match_operand:D64_D128 1 "gpc_reg_operand" "d")
+			  (match_operand:D64_D128 2 "gpc_reg_operand" "d")]
+			 UNSPEC_DXEX))]
+  "TARGET_DFP"
+  "diex<dfp_suffix> %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "dfp_dscli_<mode>"
+  [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
+	(unspec:D64_D128 [(match_operand:D64_D128 1 "gpc_reg_operand" "d")
+			  (match_operand:QI 2 "immediate_operand" "i")]
+			 UNSPEC_DSCLI))]
+  "TARGET_DFP"
+  "dscli<dfp_suffix> %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "dfp_dscri_<mode>"
+  [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
+	(unspec:D64_D128 [(match_operand:D64_D128 1 "gpc_reg_operand" "d")
+			  (match_operand:QI 2 "immediate_operand" "i")]
+			 UNSPEC_DSCRI))]
+  "TARGET_DFP"
+  "dscri<dfp_suffix> %0,%1,%2"
+  [(set_attr "type" "fp")])
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 209549)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -125,6 +125,15 @@  (define_c_enum "unspec"
    UNSPEC_P8V_MTVSRD
    UNSPEC_P8V_XXPERMDI
    UNSPEC_P8V_RELOAD_FROM_VSX
+   UNSPEC_ADDG6S
+   UNSPEC_CDTBCD
+   UNSPEC_CBCDTD
+   UNSPEC_DIVE
+   UNSPEC_DIVEO
+   UNSPEC_DIVEU
+   UNSPEC_DIVEUO
+   UNSPEC_UNPACK_128BIT
+   UNSPEC_PACK_128BIT
   ])
 
 ;;
@@ -481,6 +490,10 @@  (define_mode_attr BOOL_REGS_AND_CR0	[(TI
 					 (V2DF	"X,X,X,X,X")
 					 (V1TI	"X,X,X,X,X")])
 
+;; Mode attribute to give the correct type for integer divides
+(define_mode_attr idiv_ldiv [(SI "idiv")
+			     (DI "ldiv")])
+
 
 ;; Start with fixed-point load and store insns.  Here we put only the more
 ;; complex forms.  Basic data transfer is done later.
@@ -2755,10 +2768,7 @@  (define_insn "udiv<mode>3"
 		  (match_operand:GPR 2 "gpc_reg_operand" "r")))]
   ""
   "div<wd>u %0,%1,%2"
-   [(set (attr "type")
-      (cond [(match_operand:SI 0 "" "")
-		(const_string "idiv")]
-	(const_string "ldiv")))])
+   [(set_attr "type" "<idiv_ldiv>")])
 
 
 ;; For powers of two we can do srai/aze for divide and then adjust for
@@ -2782,10 +2792,7 @@  (define_insn "*div<mode>3"
 		 (match_operand:GPR 2 "gpc_reg_operand" "r")))]
   ""
   "div<wd> %0,%1,%2"
-  [(set (attr "type")
-     (cond [(match_operand:SI 0 "" "")
-		(const_string "idiv")]
-	(const_string "ldiv")))])
+  [(set_attr "type" "<idiv_ldiv>")])
 
 (define_expand "mod<mode>3"
   [(use (match_operand:GPR 0 "gpc_reg_operand" ""))
@@ -15735,6 +15742,191 @@  (define_peephole2
 })
 
 
+;; Miscellaneous ISA 2.06 (power7) instructions
+(define_insn "addg6s"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")]
+		   UNSPEC_ADDG6S))]
+  "TARGET_POPCNTD"
+  "addg6s %0,%1,%2"
+  [(set_attr "type" "integer")
+   (set_attr "length" "4")])
+
+(define_insn "cdtbcd"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_CDTBCD))]
+  "TARGET_POPCNTD"
+  "cdtbcd %0,%1"
+  [(set_attr "type" "integer")
+   (set_attr "length" "4")])
+
+(define_insn "cbcdtd"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_CBCDTD))]
+  "TARGET_POPCNTD"
+  "cbcdtd %0,%1"
+  [(set_attr "type" "integer")
+   (set_attr "length" "4")])
+
+(define_int_iterator UNSPEC_DIV_EXTEND [UNSPEC_DIVE
+					UNSPEC_DIVEO
+					UNSPEC_DIVEU
+					UNSPEC_DIVEUO])
+
+(define_int_attr div_extend [(UNSPEC_DIVE	"e")
+			     (UNSPEC_DIVEO	"eo")
+			     (UNSPEC_DIVEU	"eu")
+			     (UNSPEC_DIVEUO	"euo")])
+
+(define_insn "div<div_extend>_<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+	(unspec:GPR [(match_operand:GPR 1 "register_operand" "r")
+		     (match_operand:GPR 2 "register_operand" "r")]
+		    UNSPEC_DIV_EXTEND))]
+  "TARGET_POPCNTD"
+  "div<wd><div_extend> %0,%1,%2"
+  [(set_attr "type" "<idiv_ldiv>")])
+
+
+;; Pack/unpack 128-bit floating point types that take 2 scalar registers
+
+; Type of the 64-bit part when packing/unpacking 128-bit floating point types
+(define_mode_attr FP128_64 [(TF "DF") (TD "DI")])
+
+(define_expand "unpack<mode>"
+  [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "")
+	(unspec:<FP128_64>
+	 [(match_operand:FMOVE128 1 "register_operand" "")
+	  (match_operand:QI 2 "const_0_to_1_operand" "")]
+	 UNSPEC_UNPACK_128BIT))]
+  ""
+  "")
+
+;; The Advance Toolchain 7.0-3 added private builtins: __builtin_longdouble_dw0
+;; and __builtin_longdouble_dw1 to optimize glibc.  Add support for these
+;; builtins here.
+
+(define_expand "unpacktf_0"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(unspec:DF [(match_operand:TF 1 "register_operand" "")
+		    (const_int 0)]
+	 UNSPEC_UNPACK_128BIT))]
+  ""
+  "")
+
+(define_expand "unpacktf_1"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(unspec:DF [(match_operand:TF 1 "register_operand" "")
+		    (const_int 1)]
+	 UNSPEC_UNPACK_128BIT))]
+  ""
+  "")
+
+(define_insn_and_split "unpack<mode>_dm"
+  [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "=d,m,d,r,m")
+	(unspec:<FP128_64>
+	 [(match_operand:FMOVE128 1 "register_operand" "d,d,r,d,r")
+	  (match_operand:QI 2 "const_0_to_1_operand" "i,i,i,i,i")]
+	 UNSPEC_UNPACK_128BIT))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  unsigned fp_regno = REGNO (operands[1]) + UINTVAL (operands[2]);
+
+  if (REG_P (operands[0]) && REGNO (operands[0]) == fp_regno)
+    {
+      emit_note (NOTE_INSN_DELETED);
+      DONE;
+    }
+
+  operands[3] = gen_rtx_REG (<FP128_64>mode, fp_regno);
+}
+  [(set_attr "type" "fp,fpstore,mffgpr,mftgpr,store")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "unpack<mode>_nodm"
+  [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "=d,m")
+	(unspec:<FP128_64>
+	 [(match_operand:FMOVE128 1 "register_operand" "d,d")
+	  (match_operand:QI 2 "const_0_to_1_operand" "i,i")]
+	 UNSPEC_UNPACK_128BIT))]
+  "!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  unsigned fp_regno = REGNO (operands[1]) + UINTVAL (operands[2]);
+
+  if (REG_P (operands[0]) && REGNO (operands[0]) == fp_regno)
+    {
+      emit_note (NOTE_INSN_DELETED);
+      DONE;
+    }
+
+  operands[3] = gen_rtx_REG (<FP128_64>mode, fp_regno);
+}
+  [(set_attr "type" "fp,fpstore")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "pack<mode>"
+  [(set (match_operand:FMOVE128 0 "register_operand" "=d,&d")
+	(unspec:FMOVE128
+	 [(match_operand:<FP128_64> 1 "register_operand" "0,d")
+	  (match_operand:<FP128_64> 2 "register_operand" "d,d")]
+	 UNSPEC_PACK_128BIT))]
+  ""
+  "@
+   fmr %L0,%2
+   #"
+  "&& reload_completed && REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 4) (match_dup 2))]
+{
+  unsigned dest_hi = REGNO (operands[0]);
+  unsigned dest_lo = dest_hi + 1;
+
+  gcc_assert (!IN_RANGE (REGNO (operands[1]), dest_hi, dest_lo));
+  gcc_assert (!IN_RANGE (REGNO (operands[2]), dest_hi, dest_lo));
+
+  operands[3] = gen_rtx_REG (<FP128_64>mode, dest_hi);
+  operands[4] = gen_rtx_REG (<FP128_64>mode, dest_lo);
+}
+  [(set_attr "type" "fp,fp")
+   (set_attr "length" "4,8")])
+
+(define_insn "unpackv1ti"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+	(unspec:DI [(match_operand:V1TI 1 "register_operand" "0,wa")
+		    (match_operand:QI 2 "const_0_to_1_operand" "O,i")]
+	 UNSPEC_UNPACK_128BIT))]
+  "TARGET_VSX"
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]) && INTVAL (operands[2]) == 0)
+    return ASM_COMMENT_START " xxpermdi to same register";
+
+  operands[3] = GEN_INT (INTVAL (operands[2]) == 0 ? 0 : 3);
+  return "xxpermdi %x0,%x1,%x1,%3";
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "4")])
+
+(define_insn "packv1ti"
+  [(set (match_operand:V1TI 0 "register_operand" "=wa")
+	(unspec:V1TI
+	 [(match_operand:DI 1 "register_operand" "d")
+	  (match_operand:DI 2 "register_operand" "d")]
+	 UNSPEC_PACK_128BIT))]
+  "TARGET_VSX"
+  "xxpermdi %x0,%x1,%x2,0"
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "4")])
+
+
 
 (include "sync.md")
 (include "vector.md")
Index: gcc/testsuite/gcc.target/powerpc/dfp-builtin-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/dfp-builtin-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/dfp-builtin-1.c	(revision 0)
@@ -0,0 +1,88 @@ 
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mcpu=power7 -O2" } */
+/* { dg-final { scan-assembler-times "ddedpd " 4    } } */
+/* { dg-final { scan-assembler-times "denbcd " 2    } } */
+/* { dg-final { scan-assembler-times "dxex "   1    } } */
+/* { dg-final { scan-assembler-times "diex "   1    } } */
+/* { dg-final { scan-assembler-times "dscli "  2    } } */
+/* { dg-final { scan-assembler-times "dscri "  2    } } */
+/* { dg-final { scan-assembler-not   "bl __builtin" } } */
+/* { dg-final { scan-assembler-not   "dctqpq"       } } */
+/* { dg-final { scan-assembler-not   "drdpq"        } } */
+/* { dg-final { scan-assembler-not   "stfd"         } } */
+/* { dg-final { scan-assembler-not   "lfd"          } } */
+
+_Decimal64
+do_dedpd_0 (_Decimal64 a)
+{
+  return __builtin_ddedpd (0, a);
+}
+
+_Decimal64
+do_dedpd_1 (_Decimal64 a)
+{
+  return __builtin_ddedpd (1, a);
+}
+
+_Decimal64
+do_dedpd_2 (_Decimal64 a)
+{
+  return __builtin_ddedpd (2, a);
+}
+
+_Decimal64
+do_dedpd_3 (_Decimal64 a)
+{
+  return __builtin_ddedpd (3, a);
+}
+
+_Decimal64
+do_enbcd_0 (_Decimal64 a)
+{
+  return __builtin_denbcd (0, a);
+}
+
+_Decimal64
+do_enbcd_1 (_Decimal64 a)
+{
+  return __builtin_denbcd (1, a);
+}
+
+_Decimal64
+do_xex (_Decimal64 a)
+{
+  return __builtin_dxex (a);
+}
+
+_Decimal64
+do_iex (_Decimal64 a, _Decimal64 b)
+{
+  return __builtin_diex (a, b);
+}
+
+_Decimal64
+do_scli_1 (_Decimal64 a)
+{
+  return __builtin_dscli (a, 1);
+}
+
+_Decimal64
+do_scli_10 (_Decimal64 a)
+{
+  return __builtin_dscli (a, 10);
+}
+
+_Decimal64
+do_scri_1 (_Decimal64 a)
+{
+  return __builtin_dscri (a, 1);
+}
+
+_Decimal64
+do_scri_10 (_Decimal64 a)
+{
+  return __builtin_dscri (a, 10);
+}
Index: gcc/testsuite/gcc.target/powerpc/dfp-builtin-2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/dfp-builtin-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/dfp-builtin-2.c	(revision 0)
@@ -0,0 +1,88 @@ 
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mcpu=power7 -O2" } */
+/* { dg-final { scan-assembler-times "ddedpdq " 4    } } */
+/* { dg-final { scan-assembler-times "denbcdq " 2    } } */
+/* { dg-final { scan-assembler-times "dxexq "   1    } } */
+/* { dg-final { scan-assembler-times "diexq "   1    } } */
+/* { dg-final { scan-assembler-times "dscliq "  2    } } */
+/* { dg-final { scan-assembler-times "dscriq "  2    } } */
+/* { dg-final { scan-assembler-not    "bl __builtin" } } */
+/* { dg-final { scan-assembler-not   "dctqpq"        } } */
+/* { dg-final { scan-assembler-not   "drdpq"         } } */
+/* { dg-final { scan-assembler-not   "stfd"          } } */
+/* { dg-final { scan-assembler-not   "lfd"           } } */
+
+_Decimal128
+do_dedpdq_0 (_Decimal128 a)
+{
+  return __builtin_ddedpdq (0, a);
+}
+
+_Decimal128
+do_dedpdq_1 (_Decimal128 a)
+{
+  return __builtin_ddedpdq (1, a);
+}
+
+_Decimal128
+do_dedpdq_2 (_Decimal128 a)
+{
+  return __builtin_ddedpdq (2, a);
+}
+
+_Decimal128
+do_dedpdq_3 (_Decimal128 a)
+{
+  return __builtin_ddedpdq (3, a);
+}
+
+_Decimal128
+do_enbcdq_0 (_Decimal128 a)
+{
+  return __builtin_denbcdq (0, a);
+}
+
+_Decimal128
+do_enbcdq_1 (_Decimal128 a)
+{
+  return __builtin_denbcdq (1, a);
+}
+
+_Decimal128
+do_xexq (_Decimal128 a)
+{
+  return __builtin_dxexq (a);
+}
+
+_Decimal128
+do_iexq (_Decimal128 a, _Decimal128 b)
+{
+  return __builtin_diexq (a, b);
+}
+
+_Decimal128
+do_scliq_1 (_Decimal128 a)
+{
+  return __builtin_dscliq (a, 1);
+}
+
+_Decimal128
+do_scliq_10 (_Decimal128 a)
+{
+  return __builtin_dscliq (a, 10);
+}
+
+_Decimal128
+do_scriq_1 (_Decimal128 a)
+{
+  return __builtin_dscriq (a, 1);
+}
+
+_Decimal128
+do_scriq_10 (_Decimal128 a)
+{
+  return __builtin_dscriq (a, 10);
+}
Index: gcc/testsuite/gcc.target/powerpc/bcd-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/bcd-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/bcd-1.c	(revision 0)
@@ -0,0 +1,27 @@ 
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mcpu=power7 -O2" } */
+/* { dg-final { scan-assembler-times "cdtbcd " 1 } } */
+/* { dg-final { scan-assembler-times "cbcdtd " 1 } } */
+/* { dg-final { scan-assembler-times "addg6s " 1 } } */
+/* { dg-final { scan-assembler-not    "bl __builtin" } } */
+
+unsigned int
+to_bcd (unsigned int a)
+{
+  return __builtin_cdtbcd (a);
+}
+
+unsigned int
+from_bcd (unsigned int a)
+{
+  return __builtin_cbcdtd (a);
+}
+
+unsigned int
+bcd_arith (unsigned int a, unsigned int b)
+{
+  return __builtin_addg6s (a, b);
+}
Index: gcc/testsuite/gcc.target/powerpc/extend-divide-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/extend-divide-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/extend-divide-1.c	(revision 0)
@@ -0,0 +1,34 @@ 
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mcpu=power7 -O2" } */
+/* { dg-final { scan-assembler-times "divwe "   1 } } */
+/* { dg-final { scan-assembler-times "divweo "  1 } } */
+/* { dg-final { scan-assembler-times "divweu "  1 } } */
+/* { dg-final { scan-assembler-times "divweuo " 1 } } */
+/* { dg-final { scan-assembler-not    "bl __builtin" } } */
+
+int
+div_we (int a, int b)
+{
+  return __builtin_divwe (a, b);
+}
+
+int
+div_weo (int a, int b)
+{
+  return __builtin_divweo (a, b);
+}
+
+unsigned int
+div_weu (unsigned int a, unsigned int b)
+{
+  return __builtin_divweu (a, b);
+}
+
+unsigned int
+div_weuo (unsigned int a, unsigned int b)
+{
+  return __builtin_divweuo (a, b);
+}
Index: gcc/testsuite/gcc.target/powerpc/bcd-2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/bcd-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/bcd-2.c	(revision 0)
@@ -0,0 +1,44 @@ 
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+/* { dg-final { scan-assembler-times "bcdadd\[.\] " 2 } } */
+/* { dg-final { scan-assembler-times "bcdsub\[.\] " 2 } } */
+/* { dg-final { scan-assembler-not   "bl __builtin"   } } */
+/* { dg-final { scan-assembler-not   "mtvsr"   	      } } */
+/* { dg-final { scan-assembler-not   "mfvsr"   	      } } */
+/* { dg-final { scan-assembler-not   "lvx"     	      } } */
+/* { dg-final { scan-assembler-not   "lxvw4x"  	      } } */
+/* { dg-final { scan-assembler-not   "lxvd2x"  	      } } */
+/* { dg-final { scan-assembler-not   "stvx"    	      } } */
+/* { dg-final { scan-assembler-not   "stxvw4x" 	      } } */
+/* { dg-final { scan-assembler-not   "stxvd2x" 	      } } */
+
+typedef __int128_t __attribute__((__vector_size__(16)))	vector_128_t;
+typedef __int128_t					scalar_128_t;
+typedef	unsigned long long				scalar_64_t;
+
+vector_128_t
+do_add_0 (vector_128_t a, vector_128_t b)
+{
+  return __builtin_bcdadd (a, b, 0);
+}
+
+vector_128_t
+do_add_1 (vector_128_t a, vector_128_t b)
+{
+  return __builtin_bcdadd (a, b, 1);
+}
+
+vector_128_t
+do_sub_0 (vector_128_t a, vector_128_t b)
+{
+  return __builtin_bcdsub (a, b, 0);
+}
+
+vector_128_t
+do_sub_1 (vector_128_t a, vector_128_t b)
+{
+  return __builtin_bcdsub (a, b, 1);
+}
Index: gcc/testsuite/gcc.target/powerpc/extend-divide-2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/extend-divide-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/extend-divide-2.c	(revision 0)
@@ -0,0 +1,34 @@ 
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mcpu=power7 -O2" } */
+/* { dg-final { scan-assembler-times "divde "   1 } } */
+/* { dg-final { scan-assembler-times "divdeo "  1 } } */
+/* { dg-final { scan-assembler-times "divdeu "  1 } } */
+/* { dg-final { scan-assembler-times "divdeuo " 1 } } */
+/* { dg-final { scan-assembler-not    "bl __builtin" } } */
+
+long
+div_de (long a, long b)
+{
+  return __builtin_divde (a, b);
+}
+
+long
+div_deo (long a, long b)
+{
+  return __builtin_divdeo (a, b);
+}
+
+unsigned long
+div_deu (unsigned long a, unsigned long b)
+{
+  return __builtin_divdeu (a, b);
+}
+
+unsigned long
+div_deuo (unsigned long a, unsigned long b)
+{
+  return __builtin_divdeuo (a, b);
+}
Index: gcc/testsuite/gcc.target/powerpc/pack01.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pack01.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pack01.c	(revision 0)
@@ -0,0 +1,91 @@ 
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+typedef __int128_t __attribute__((__vector_size__(16)))	vector_128_t;
+typedef __int128_t					scalar_128_t;
+typedef	unsigned long long				scalar_64_t;
+
+volatile scalar_64_t one = 1;
+volatile scalar_64_t two = 2;
+
+int
+main (void)
+{
+  scalar_128_t a = (((scalar_128_t)one) << 64) | ((scalar_128_t)two);
+  vector_128_t v1 = (vector_128_t) { a };
+  vector_128_t v2 = __builtin_pack_vector_int128 (one, two);
+  scalar_64_t x0 = __builtin_unpack_vector_int128 (v1, 0);
+  scalar_64_t x1 = __builtin_unpack_vector_int128 (v1, 1);
+  vector_128_t v3 = __builtin_pack_vector_int128 (x0, x1);
+
+  size_t i;
+  union {
+    scalar_128_t i128;
+    vector_128_t v128;
+    scalar_64_t u64;
+    unsigned char uc[sizeof (scalar_128_t)];
+    char c[sizeof (scalar_128_t)];
+  } u, u2;
+
+#ifdef DEBUG
+  {
+    printf ("a  = 0x");
+    u.i128 = a;
+    for (i = 0; i < sizeof (scalar_128_t); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf ("\nv1 = 0x");
+    u.v128 = v1;
+    for (i = 0; i < sizeof (scalar_128_t); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf ("\nv2 = 0x");
+    u.v128 = v2;
+    for (i = 0; i < sizeof (scalar_128_t); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf ("\nv3 = 0x");
+    u.v128 = v3;
+    for (i = 0; i < sizeof (scalar_128_t); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf ("\nx0 = 0x");
+    u.u64 = x0;
+    for (i = 0; i < sizeof (scalar_64_t); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf ("\nx1 = 0x");
+    u.u64 = x1;
+    for (i = 0; i < sizeof (scalar_64_t); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf ("\n");
+  }
+#endif
+
+  u2.i128 = a;
+  u.v128 = v1;
+  if (memcmp (u.c, u2.c, sizeof (scalar_128_t)) != 0)
+    abort ();
+
+  u.v128 = v2;
+  if (memcmp (u.c, u2.c, sizeof (scalar_128_t)) != 0)
+    abort ();
+
+  u.v128 = v3;
+  if (memcmp (u.c, u2.c, sizeof (scalar_128_t)) != 0)
+    abort ();
+
+  return 0;
+}
Index: gcc/testsuite/gcc.target/powerpc/bcd-3.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/bcd-3.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/bcd-3.c	(revision 0)
@@ -0,0 +1,103 @@ 
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+/* { dg-final { scan-assembler-times "bcdadd\[.\] " 4 } } */
+/* { dg-final { scan-assembler-times "bcdsub\[.\] " 4 } } */
+/* { dg-final { scan-assembler-not   "bl __builtin"   } } */
+/* { dg-final { scan-assembler-not   "mtvsr"   	      } } */
+/* { dg-final { scan-assembler-not   "mfvsr"   	      } } */
+/* { dg-final { scan-assembler-not   "lvx"     	      } } */
+/* { dg-final { scan-assembler-not   "lxvw4x"  	      } } */
+/* { dg-final { scan-assembler-not   "lxvd2x"  	      } } */
+/* { dg-final { scan-assembler-not   "stvx"    	      } } */
+/* { dg-final { scan-assembler-not   "stxvw4x" 	      } } */
+/* { dg-final { scan-assembler-not   "stxvd2x" 	      } } */
+
+typedef __int128_t __attribute__((__vector_size__(16)))	vector_128_t;
+typedef __int128_t					scalar_128_t;
+typedef	unsigned long long				scalar_64_t;
+
+/* Test whether the peephole works to allow folding a bcdadd, with a
+   bcdadd_<test> into a single instruction.  */
+
+vector_128_t
+do_add_lt (vector_128_t a, vector_128_t b, int *p)
+{
+  vector_128_t ret = __builtin_bcdadd (a, b, 0);
+  if (__builtin_bcdadd_lt (a, b, 0))
+    *p = 1;
+
+  return ret;
+}
+
+vector_128_t
+do_add_eq (vector_128_t a, vector_128_t b, int *p)
+{
+  vector_128_t ret = __builtin_bcdadd (a, b, 0);
+  if (__builtin_bcdadd_eq (a, b, 0))
+    *p = 1;
+
+  return ret;
+}
+
+vector_128_t
+do_add_gt (vector_128_t a, vector_128_t b, int *p)
+{
+  vector_128_t ret = __builtin_bcdadd (a, b, 0);
+  if (__builtin_bcdadd_gt (a, b, 0))
+    *p = 1;
+
+  return ret;
+}
+
+vector_128_t
+do_add_ov (vector_128_t a, vector_128_t b, int *p)
+{
+  vector_128_t ret = __builtin_bcdadd (a, b, 0);
+  if (__builtin_bcdadd_ov (a, b, 0))
+    *p = 1;
+
+  return ret;
+}
+
+vector_128_t
+do_sub_lt (vector_128_t a, vector_128_t b, int *p)
+{
+  vector_128_t ret = __builtin_bcdsub (a, b, 0);
+  if (__builtin_bcdsub_lt (a, b, 0))
+    *p = 1;
+
+  return ret;
+}
+
+vector_128_t
+do_sub_eq (vector_128_t a, vector_128_t b, int *p)
+{
+  vector_128_t ret = __builtin_bcdsub (a, b, 0);
+  if (__builtin_bcdsub_eq (a, b, 0))
+    *p = 1;
+
+  return ret;
+}
+
+vector_128_t
+do_sub_gt (vector_128_t a, vector_128_t b, int *p)
+{
+  vector_128_t ret = __builtin_bcdsub (a, b, 0);
+  if (__builtin_bcdsub_gt (a, b, 0))
+    *p = 1;
+
+  return ret;
+}
+
+vector_128_t
+do_sub_ov (vector_128_t a, vector_128_t b, int *p)
+{
+  vector_128_t ret = __builtin_bcdsub (a, b, 0);
+  if (__builtin_bcdsub_ov (a, b, 0))
+    *p = 1;
+
+  return ret;
+}
Index: gcc/testsuite/gcc.target/powerpc/pack02.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pack02.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pack02.c	(revision 0)
@@ -0,0 +1,95 @@ 
+/* { dg-do run { target { powerpc*-*-linux* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2" } */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <math.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+int
+main (void)
+{
+  double high = pow (2.0, 60);
+  double low  = 2.0;
+  long double a = ((long double)high) + ((long double)low);
+  double x0 = __builtin_unpack_longdouble (a, 0);
+  double x1 = __builtin_unpack_longdouble (a, 1);
+  long double b = __builtin_pack_longdouble (x0, x1);
+
+#ifdef DEBUG
+  {
+    size_t i;
+    union {
+      long double ld;
+      double d;
+      unsigned char uc[sizeof (long double)];
+      char c[sizeof (long double)];
+    } u;
+
+    printf ("a  = 0x");
+    u.ld = a;
+    for (i = 0; i < sizeof (long double); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf (", %Lg\n", a);
+
+    printf ("b  = 0x");
+    u.ld = b;
+    for (i = 0; i < sizeof (long double); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf (", %Lg\n", b);
+
+    printf ("hi = 0x");
+    u.d = high;
+    for (i = 0; i < sizeof (double); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf (",%*s %g\n", (int)(2 * (sizeof (long double) - sizeof (double))), "", high);
+
+    printf ("lo = 0x");
+    u.d = low;
+    for (i = 0; i < sizeof (double); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf (",%*s %g\n", (int)(2 * (sizeof (long double) - sizeof (double))), "", low);
+
+    printf ("x0 = 0x");
+    u.d = x0;
+    for (i = 0; i < sizeof (double); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf (",%*s %g\n", (int)(2 * (sizeof (long double) - sizeof (double))), "", x0);
+
+    printf ("x1 = 0x");
+    u.d = x1;
+    for (i = 0; i < sizeof (double); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf (",%*s %g\n", (int)(2 * (sizeof (long double) - sizeof (double))), "", x1);
+  }
+#endif
+
+  if (high != x0)
+    abort ();
+
+  if (low != x1)
+    abort ();
+
+  if (a != b)
+    abort ();
+
+  if (x0 != high)
+    abort ();
+
+  if (x1 != low)
+    abort ();
+
+  return 0;
+}
Index: gcc/testsuite/gcc.target/powerpc/pack03.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pack03.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pack03.c	(revision 0)
@@ -0,0 +1,88 @@ 
+/* { dg-do run { target { powerpc*-*-linux* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2" } */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <math.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+int
+main (void)
+{
+  _Decimal128 one	= (_Decimal128)1.0;
+  _Decimal128 two	= (_Decimal128)2.0;
+  _Decimal128 ten	= (_Decimal128)10.0;
+  _Decimal128 a		= one;
+  _Decimal128 b;
+  _Decimal128 c;
+  unsigned long long x0;
+  unsigned long long x1;
+  size_t i;
+
+  for (i = 0; i < 25; i++)
+    a *= ten;
+
+  a += two;
+
+  x0 = __builtin_unpack_dec128 (a, 0);
+  x1 = __builtin_unpack_dec128 (a, 1);
+  b = __builtin_pack_dec128 (x0, x1);
+  c = __builtin_dscliq (one, 25) + two;
+
+#ifdef DEBUG
+  {
+    union {
+      _Decimal128 d;
+      unsigned long long ull;
+      unsigned char uc[sizeof (_Decimal128)];
+    } u;
+
+    printf ("a  = 0x");
+    u.d = a;
+    for (i = 0; i < sizeof (_Decimal128); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf (", %Lg\n", (long double)a);
+
+    printf ("b  = 0x");
+    u.d = b;
+    for (i = 0; i < sizeof (_Decimal128); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf (", %Lg\n", (long double)b);
+
+    printf ("c  = 0x");
+    u.d = c;
+    for (i = 0; i < sizeof (_Decimal128); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf (", %Lg\n", (long double)c);
+
+    printf ("x0 = 0x");
+    u.ull = x0;
+    for (i = 0; i < sizeof (unsigned long long); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf ("\nx1 = 0x");
+    u.ull = x1;
+    for (i = 0; i < sizeof (unsigned long long); i++)
+      printf ("%.2x", u.uc[i]);
+
+    printf ("\n");
+  }
+#endif
+
+  if (a != b)
+    abort ();
+
+  if (a != c)
+    abort ();
+
+  return 0;
+}