diff mbox series

[2/6] rs6000 Add vector insert builtin support

Message ID feca6cc11323e394aa9501bbdffc2025e70447ec.camel@us.ibm.com
State New
Headers show
Series Permute Class Operations | expand

Commit Message

Carl Love June 1, 2020, 4:14 p.m. UTC
GCC maintainers:

This patch adds support for vec_insertl and vec_inserth builtins.

The patch has been compiled and tested on

  powerpc64le-unknown-linux-gnu (Power 9 LE)

and mambo with no regression errors.

Please let me know if this patch is acceptable for the mainline branch.

Thanks.

                         Carl Love

--------------------------------------------------------------
gcc/ChangeLog

2020-05-30  Carl Love  <cel@us.ibm.com>

        * config/rs6000/altivec.h: Add define vec_insertl, vec_inserth.
	* config/rs6000/rs6000-builtin.def (BU_FUTURE_V_3): Add definition for
	VINSERTGPRBL, VINSERTGPRHL, VINSERTGPRWL, VINSERTGPRDL, VINSERTVPRBL,
	VINSERTVPRHL, VINSERTVPRWL, VINSERTGPRBR, VINSERTGPRHR, VINSERTGPRWR,
	VINSERTGPRDR, VINSERTVPRBR, VINSERTVPRHR, VINSERTVPRWR.
	(BU_FUTURE_OVERLOAD_3): Add definition for INSERTL, INSERTH.
	* config/rs6000/rs6000-call.c (FUTURE_BUILTIN_VEC_INSERTL): Add overloaded
	argument declarations.
	(FUTURE_BUILTIN_VEC_INSERTH):  Add overloaded	argument declarations.
	(builtin_function_type): Add case entries for FUTURE_BUILTIN_VINSERTGPRBL,
	FUTURE_BUILTIN_VINSERTGPRHL, FUTURE_BUILTIN_VINSERTGPRWL,
	FUTURE_BUILTIN_VINSERTGPRDL, FUTURE_BUILTIN_VINSERTVPRBL,
	FUTURE_BUILTIN_VINSERTVPRHL, FUTURE_BUILTIN_VINSERTVPRWL.
	* config/rs6000/vsx.md (define_c_enum): Add UNSPEC_INSERTL, UNSPEC_INSERTR.
	(define_expand): Add vinsertvl_<mode>, vinsertvr_<mode>, vinsertgl_<mode>
	vinsertgr_<mode>, mode is VI2.
	(define_ins): vinsertvl_internal_<mode>, vinsertvr_internal_<mode>,
	vinsertgl_internal_<mode>, vinsertgr_internal_<mode>, mode VEC_I.
	* doc/extend.texi: Add documentation for vec_insertl, vec_inserth.
	* gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c: New
	test case.
---
 gcc/config/rs6000/altivec.h                   |   2 +
 gcc/config/rs6000/rs6000-builtin.def          |  18 +
 gcc/config/rs6000/rs6000-call.c               |  51 +++
 gcc/config/rs6000/vsx.md                      | 110 ++++++
 gcc/doc/extend.texi                           |  68 ++++
 .../powerpc/vec-insert-word-runnable.c        | 345 ++++++++++++++++++
 6 files changed, 594 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c

Comments

Segher Boessenkool June 3, 2020, 11:15 p.m. UTC | #1
Hi!

On Mon, Jun 01, 2020 at 09:14:40AM -0700, Carl Love wrote:
>         * config/rs6000/altivec.h: Add define vec_insertl, vec_inserth.

	* config/rs6000/altivec.h (vec_insertl, vec_inserth): New defines.

> 	* config/rs6000/rs6000-builtin.def (BU_FUTURE_V_3): Add definition for
> 	VINSERTGPRBL, VINSERTGPRHL, VINSERTGPRWL, VINSERTGPRDL, VINSERTVPRBL,
> 	VINSERTVPRHL, VINSERTVPRWL, VINSERTGPRBR, VINSERTGPRHR, VINSERTGPRWR,
> 	VINSERTGPRDR, VINSERTVPRBR, VINSERTVPRHR, VINSERTVPRWR.

	* config/rs6000/rs6000-builtin.def (VINSERTGPRBL, VINSERTGPRHL,
	VINSERTGPRWL, VINSERTGPRDL, VINSERTVPRBL, VINSERTVPRHL, VINSERTVPRWL,
	VINSERTGPRBR, VINSERTGPRHR, VINSERTGPRWR, VINSERTGPRDR, VINSERTVPRBR,
	VINSERTVPRHR, VINSERTVPRWR): New builtins.

> 	(BU_FUTURE_OVERLOAD_3): Add definition for INSERTL, INSERTH.

	(INSERTL, INSERTH): New builtins.

> 	* config/rs6000/rs6000-call.c (FUTURE_BUILTIN_VEC_INSERTL): Add overloaded
> 	argument declarations.
> 	(FUTURE_BUILTIN_VEC_INSERTH):  Add overloaded	argument declarations.

Maybe just "New overloads." for both?  (That latter one has whitespace
problems).

> 	(builtin_function_type): Add case entries for FUTURE_BUILTIN_VINSERTGPRBL,
> 	FUTURE_BUILTIN_VINSERTGPRHL, FUTURE_BUILTIN_VINSERTGPRWL,
> 	FUTURE_BUILTIN_VINSERTGPRDL, FUTURE_BUILTIN_VINSERTVPRBL,
> 	FUTURE_BUILTIN_VINSERTVPRHL, FUTURE_BUILTIN_VINSERTVPRWL.

builtin_function_type is not the name of what you changed (FUTURE* is).

> +(define_expand "vinsertvr_<mode>"
> +  [(set (match_operand:VI2 0 "altivec_register_operand")
> +       (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
> +                    (match_operand:VI2 2 "altivec_register_operand")
> +                    (match_operand:SI 3 "register_operand" "r")]
> +		   UNSPEC_INSERTR))]
> +  "TARGET_FUTURE"
> +{
> +  if (BYTES_BIG_ENDIAN)
> +     emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
> +                                               operands[1], operands[2]));
> +   else
> +     emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
> +                                               operands[1], operands[2]));
> +   DONE;
> +})

The two cases (BE and LE) are now exactly identical?  Same for all
similar cases.

Okay for trunk with those nits taken care of.  Thanks!


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 0a7e8ab3647..936aeb1ee09 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -699,6 +699,8 @@  __altivec_scalar_pred(vec_any_nle,
 /* Overloaded built-in functions for future architecture.  */
 #define vec_extractl(a, b, c)	__builtin_vec_extractl (a, b, c)
 #define vec_extracth(a, b, c)	__builtin_vec_extracth (a, b, c)
+#define vec_insertl(a, b, c)   __builtin_vec_insertl (a, b, c)
+#define vec_inserth(a, b, c)   __builtin_vec_inserth (a, b, c)
 
 #define vec_gnb(a, b)	__builtin_vec_gnb (a, b)
 #define vec_clrl(a, b)	__builtin_vec_clrl (a, b)
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 8b1ddb00045..c5bd4f86555 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2627,6 +2627,22 @@  BU_FUTURE_V_3 (VEXTRACTHR, "vextduhvhx", CONST, vextractrv8hi)
 BU_FUTURE_V_3 (VEXTRACTWR, "vextduwvhx", CONST, vextractrv4si)
 BU_FUTURE_V_3 (VEXTRACTDR, "vextddvhx", CONST, vextractrv2di)
 
+BU_FUTURE_V_3 (VINSERTGPRBL, "vinsgubvlx", CONST, vinsertgl_v16qi)
+BU_FUTURE_V_3 (VINSERTGPRHL, "vinsguhvlx", CONST, vinsertgl_v8hi)
+BU_FUTURE_V_3 (VINSERTGPRWL, "vinsguwvlx", CONST, vinsertgl_v4si)
+BU_FUTURE_V_3 (VINSERTGPRDL, "vinsgudvlx", CONST, vinsertgl_v2di)
+BU_FUTURE_V_3 (VINSERTVPRBL, "vinsvubvlx", CONST, vinsertvl_v16qi)
+BU_FUTURE_V_3 (VINSERTVPRHL, "vinsvuhvlx", CONST, vinsertvl_v8hi)
+BU_FUTURE_V_3 (VINSERTVPRWL, "vinsvuwvlx", CONST, vinsertvl_v4si)
+
+BU_FUTURE_V_3 (VINSERTGPRBR, "vinsgubvrx", CONST, vinsertgr_v16qi)
+BU_FUTURE_V_3 (VINSERTGPRHR, "vinsguhvrx", CONST, vinsertgr_v8hi)
+BU_FUTURE_V_3 (VINSERTGPRWR, "vinsguwvrx", CONST, vinsertgr_v4si)
+BU_FUTURE_V_3 (VINSERTGPRDR, "vinsgudvrx", CONST, vinsertgr_v2di)
+BU_FUTURE_V_3 (VINSERTVPRBR, "vinsvubvrx", CONST, vinsertvr_v16qi)
+BU_FUTURE_V_3 (VINSERTVPRHR, "vinsvuhvrx", CONST, vinsertvr_v8hi)
+BU_FUTURE_V_3 (VINSERTVPRWR, "vinsvuwvrx", CONST, vinsertvr_v4si)
+
 BU_FUTURE_V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi)
 BU_FUTURE_V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi)
 BU_FUTURE_V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi)
@@ -2646,6 +2662,8 @@  BU_FUTURE_OVERLOAD_2 (XXGENPCVM, "xxgenpcvm")
 
 BU_FUTURE_OVERLOAD_3 (EXTRACTL, "extractl")
 BU_FUTURE_OVERLOAD_3 (EXTRACTH, "extracth")
+BU_FUTURE_OVERLOAD_3 (INSERTL, "insertl")
+BU_FUTURE_OVERLOAD_3 (INSERTH, "inserth")
 
 BU_FUTURE_OVERLOAD_1 (VSTRIR, "strir")
 BU_FUTURE_OVERLOAD_1 (VSTRIL, "stril")
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 0ac8054d030..a265e30d1d9 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -5565,6 +5565,28 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI },
 
+  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTGPRBL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI },
+ { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTGPRHL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTHI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTGPRWL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTGPRDL,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTDI,
+   RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTVPRBL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
+  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTVPRHL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTQI },
+  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTVPRWL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI },
+
   { FUTURE_BUILTIN_VEC_EXTRACTH, FUTURE_BUILTIN_VEXTRACTBR,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
@@ -5578,6 +5600,28 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI },
 
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTGPRBR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTGPRHR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTHI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTGPRWR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTGPRDR,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTDI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTVPRBR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTVPRHR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTQI },
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTVPRWR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI },
+
   { FUTURE_BUILTIN_VEC_VSTRIL, FUTURE_BUILTIN_VSTRIBL,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
   { FUTURE_BUILTIN_VEC_VSTRIL, FUTURE_BUILTIN_VSTRIBL,
@@ -13289,6 +13333,13 @@  builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
     case FUTURE_BUILTIN_VEXTRACTHR:
     case FUTURE_BUILTIN_VEXTRACTWR:
     case FUTURE_BUILTIN_VEXTRACTDR:
+    case FUTURE_BUILTIN_VINSERTGPRBL:
+    case FUTURE_BUILTIN_VINSERTGPRHL:
+    case FUTURE_BUILTIN_VINSERTGPRWL:
+    case FUTURE_BUILTIN_VINSERTGPRDL:
+    case FUTURE_BUILTIN_VINSERTVPRBL:
+    case FUTURE_BUILTIN_VINSERTVPRHL:
+    case FUTURE_BUILTIN_VINSERTVPRWL:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 51ffe2d2000..6a4d4e4d093 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -346,6 +346,8 @@ 
    UNSPEC_XXGENPCV
    UNSPEC_EXTRACTL
    UNSPEC_EXTRACTR
+   UNSPEC_INSERTL
+   UNSPEC_INSERTR
   ])
 
 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
@@ -3847,6 +3849,114 @@ 
   "vext<du_or_d><wd>vrx %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
 
+(define_expand "vinsertvl_<mode>"
+  [(set (match_operand:VI2 0 "altivec_register_operand")
+       (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
+                    (match_operand:VI2 2 "altivec_register_operand")
+                    (match_operand:SI 3 "register_operand" "r")]
+		   UNSPEC_INSERTL))]
+  "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+     emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
+                                               operands[1], operands[2]));
+   else
+     emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
+                                               operands[1], operands[2]));
+   DONE;
+})
+
+(define_insn "vinsertvl_internal_<mode>"
+  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
+       (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
+                      (match_operand:VEC_I 2 "altivec_register_operand" "v")
+                      (match_operand:VEC_I 3 "altivec_register_operand" "0")]
+		     UNSPEC_INSERTL))]
+  "TARGET_FUTURE"
+  "vins<wd>vlx %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_expand "vinsertvr_<mode>"
+  [(set (match_operand:VI2 0 "altivec_register_operand")
+       (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
+                    (match_operand:VI2 2 "altivec_register_operand")
+                    (match_operand:SI 3 "register_operand" "r")]
+		   UNSPEC_INSERTR))]
+  "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+     emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
+                                               operands[1], operands[2]));
+   else
+     emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
+                                               operands[1], operands[2]));
+   DONE;
+})
+
+(define_insn "vinsertvr_internal_<mode>"
+  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
+       (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
+                      (match_operand:VEC_I 2 "altivec_register_operand" "v")
+                      (match_operand:VEC_I 3 "altivec_register_operand" "0")]
+                     UNSPEC_INSERTR))]
+  "TARGET_FUTURE"
+  "vins<wd>vrx %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_expand "vinsertgl_<mode>"
+  [(set (match_operand:VI2 0 "altivec_register_operand")
+       (unspec:VI2 [(match_operand:SI 1 "register_operand")
+                    (match_operand:VI2 2 "altivec_register_operand")
+                    (match_operand:SI 3 "register_operand")]
+	            UNSPEC_INSERTL))]
+  "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
+                                            operands[1], operands[2]));
+  else
+    emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
+                                            operands[1], operands[2]));
+  DONE;
+ })
+
+(define_insn "vinsertgl_internal_<mode>"
+ [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
+ (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
+               (match_operand:SI 2 "register_operand" "r")
+               (match_operand:VEC_I 3 "altivec_register_operand" "0")]
+              UNSPEC_INSERTL))]
+ "TARGET_FUTURE"
+ "vins<wd>lx %0,%1,%2"
+ [(set_attr "type" "vecsimple")])
+
+(define_expand "vinsertgr_<mode>"
+  [(set (match_operand:VI2 0 "altivec_register_operand")
+       (unspec:VI2 [(match_operand:SI 1 "register_operand")
+                    (match_operand:VI2 2 "altivec_register_operand")
+                    (match_operand:SI 3 "register_operand")]
+                   UNSPEC_INSERTR))]
+  "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
+                                            operands[1], operands[2]));
+  else
+    emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
+                                            operands[1], operands[2]));
+  DONE;
+ })
+
+(define_insn "vinsertgr_internal_<mode>"
+ [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
+ (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
+               (match_operand:SI 2 "register_operand" "r")
+               (match_operand:VEC_I 3 "altivec_register_operand" "0")]
+              UNSPEC_INSERTR))]
+ "TARGET_FUTURE"
+ "vins<wd>rx %0,%1,%2"
+ [(set_attr "type" "vecsimple")])
+
 ;; VSX_EXTRACT optimizations
 ;; Optimize double d = (double) vec_extract (vi, <n>)
 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 8e116775219..6b6a52cae33 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -20958,6 +20958,74 @@  limitation of the bi-endian vector programming model consistent with the
 limitation on vec_perm, for example.
 @findex vec_extracth
 
+Vector Insert
+
+@smallexample
+@exdent vector unsigned char
+@exdent vec_insertl (unsigned char, vector unsigned char, unsigned int);
+@exdent vector unsigned short
+@exdent vec_insertl (unsigned short, vector unsigned short, unsigned int);
+@exdent vector unsigned int
+@exdent vec_insertl (unsigned int, vector unsigned int, unsigned int);
+@exdent vector unsigned long long
+@exdent vec_insertl (unsigned long long, vector unsigned long long, unsigned int);
+@exdent vector unsigned char
+@exdent vec_insertl (vector unsigned char, vector unsigned char, unsigned int;
+@exdent vector unsigned short
+@exdent vec_insertl (vector unsigned short, vector unsigned short, unsigned int);
+@exdent vector unsigned int
+@exdent vec_insertl (vector unsigned int, vector unsigned int, unsigned int);
+@end smallexample
+
+Let src be the first argument, when the first argument is a scalar, or the
+rightmost element of the left doubleword of the first argument, when the first
+argument is a vector.  Insert src into the second argument at the position
+identified by the third argument, using natural element order in the second
+argument, and leaving the rest of the second argument unchanged.  If the byte
+index is greater than 14 for halfwords,  12 for words, or 8 for doublewords,
+the intrinsic will be rejected.  Note that the underlying hardware instruction
+uses the same register for the second argument and the result, but this is
+hidden by the built-in.  For little-endian, the generated code will be
+semantically equivalent to vins*rx, while for big-endian it will be semantically
+equivalent to vins*lx.  Note that some fairly anomalous results can be generated
+if the byte index is not aligned on an element boundary for the sort of element
+being inserted. This is a limitation of the bi-endian vector programming model
+consistent with the limitation on veextracthc_perm, for example.
+@findex vec_insertl
+
+@smallexample
+@exdent vector unsigned char
+@exdent vec_inserth (unsigned char, vector unsigned char, unsigned int);
+@exdent vector unsigned short
+@exdent vec_inserth (unsigned short, vector unsigned short, unsigned int);
+@exdent vector unsigned int
+@exdent vec_inserth (unsigned int, vector unsigned int, unsigned int);
+@exdent vector unsigned long long
+@exdent vec_inserth (unsigned long long, vector unsigned long long, unsigned int);
+@exdent vector unsigned char
+@exdent vec_inserth (vector unsigned char, vector unsigned char, unsigned int);
+@exdent vector unsigned short
+@exdent vec_inserth (vector unsigned short, vector unsigned short, unsigned int);
+@exdent vector unsigned int
+@exdent vec_inserth (vector unsigned int, vector unsigned int, unsigned int);
+@end smallexample
+
+Let src be the first argument, when the first argument is a scalar, or the
+rightmost element of the first argument, when the first argument is a vector.  In
+sert src into the second argument at the position identified by the third
+argument, using opposite element order in the second argument, and leaving the
+rest of the second argument unchanged.  If the byte index is greater than 14 for
+halfwords, 12 for words, or 8 for doublewords, the intrinsic will be rejected.
+Note that the underlying hardware instruction uses the same register for the
+second argument and the result, but this is hidden by the built-in.  For
+little-endian, the code generation will be semantically equivalent to vins*lx,
+while for big-endian it will be semantically equivalent to vins*rx.  Note that
+some fairly anomalous results can be generated if the byte index is not
+aligned on an element boundary for the sort of element being inserted.  This is a
+limitation of the bi-endian vector programming model consistent with the
+limitation on vec_perm, for example.
+@findex vec_inserth
+
 @smallexample
 @exdent vector unsigned long long int
 @exdent vec_pdep (vector unsigned long long int, vector unsigned long long int)
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c
new file mode 100644
index 00000000000..3ffcc0da67a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c
@@ -0,0 +1,345 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_future_hw } */
+/* { dg-options "-mdejagnu-cpu=future" } */
+#include <altivec.h>
+
+#define DEBUG 0
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+extern void abort (void);
+
+int
+main (int argc, char *argv [])
+{
+  int i;
+  unsigned int index;
+  vector unsigned char vresult_ch;
+  vector unsigned char expected_vresult_ch;
+  vector unsigned char src_va_ch;
+  vector unsigned char src_vb_ch;
+  unsigned char src_a_ch;
+
+  vector unsigned short vresult_sh;
+  vector unsigned short expected_vresult_sh;
+  vector unsigned short src_va_sh;
+  vector unsigned short src_vb_sh;
+  unsigned short int src_a_sh;
+
+  vector unsigned int vresult_int;
+  vector unsigned int expected_vresult_int;
+  vector unsigned int src_va_int;
+  vector unsigned int src_vb_int;
+  unsigned int src_a_int;
+  
+  vector unsigned long long vresult_ll;
+  vector unsigned long long expected_vresult_ll;
+  vector unsigned long long src_va_ll;
+  unsigned long long int src_a_ll;
+
+  /* Vector insert, low index, from GPR */
+  src_a_ch = 79;
+  index = 2;
+  src_va_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 13, 14, 15 };
+  vresult_ch = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
+					0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_ch = (vector unsigned char) { 0, 1, 79, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 13, 14, 15 };
+						 
+    vresult_ch = vec_insertl (src_a_ch, src_va_ch, index);
+
+  if (!vec_all_eq (vresult_ch,  expected_vresult_ch)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_a_ch, src_va_ch, index)\n");
+    for(i = 0; i < 16; i++)
+      printf(" vresult_ch[%d] = %d, expected_vresult_ch[%d] = %d\n",
+	     i, vresult_ch[i], i, expected_vresult_ch[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_sh = 79;
+  index = 10;
+  src_va_sh = (vector unsigned short int) { 0, 1, 2, 3, 4, 5, 6, 7 };
+  vresult_sh = (vector unsigned short int) { 0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_sh = (vector unsigned short int) { 0, 1, 2, 3,
+						      4, 79, 6, 7 };
+
+  vresult_sh = vec_insertl (src_a_sh, src_va_sh, index);
+
+  if (!vec_all_eq (vresult_sh,  expected_vresult_sh)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_a_sh, src_va_sh, index)\n");
+    for(i = 0; i < 8; i++)
+      printf(" vresult_sh[%d] = %d, expected_vresult_sh[%d] = %d\n",
+	     i, vresult_sh[i], i, expected_vresult_sh[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_int = 79;
+  index = 8;
+  src_va_int = (vector unsigned int) { 0, 1, 2, 3 };
+  vresult_int = (vector unsigned int) { 0, 0, 0, 0 };
+  expected_vresult_int = (vector unsigned int) { 0, 1, 79, 3 };
+
+  vresult_int = vec_insertl (src_a_int, src_va_int, index);
+
+  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_a_int, src_va_int, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+	     i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_ll = 79;
+  index = 8;
+  src_va_ll = (vector unsigned long long) { 0, 1 };
+  vresult_ll = (vector unsigned long long) { 0, 0 };
+  expected_vresult_ll = (vector unsigned long long) { 0, 79 };
+
+  vresult_ll = vec_insertl (src_a_ll, src_va_ll, index);
+
+  if (!vec_all_eq (vresult_ll,  expected_vresult_ll)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_a_ll, src_va_ll, index)\n");
+    for(i = 0; i < 2; i++)
+      printf(" vresult_ll[%d] = %d, expected_vresult_ll[%d] = %d\n",
+	     i, vresult_ll[i], i, expected_vresult_ll[i]);
+#else
+    abort();
+#endif
+  }
+
+  /* Vector insert, low index, from vector */
+  index = 2;
+  src_va_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 13, 14, 15 };
+  src_vb_ch = (vector unsigned char) { 10, 11, 12, 13, 14, 15, 16, 17,
+				       18, 19, 20, 21, 22, 23, 24, 25 };
+  vresult_ch = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
+					0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_ch = (vector unsigned char) { 0, 1, 18, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 13, 14, 15 };
+						 
+  vresult_ch = vec_insertl (src_vb_ch, src_va_ch, index);
+
+  if (!vec_all_eq (vresult_ch,  expected_vresult_ch)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_vb_ch, src_va_ch, index)\n");
+    for(i = 0; i < 16; i++)
+      printf(" vresult_ch[%d] = %d, expected_vresult_ch[%d] = %d\n",
+	     i, vresult_ch[i], i, expected_vresult_ch[i]);
+#else
+    abort();
+#endif
+  }
+
+  index = 4;
+  src_va_sh = (vector unsigned short) { 0, 1, 2, 3, 4, 5, 6, 7 };
+  src_vb_sh = (vector unsigned short) { 10, 11, 12, 13, 14, 15, 16, 17 };
+  vresult_sh = (vector unsigned short) { 0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_sh = (vector unsigned short) { 0, 1, 14, 3, 4, 5, 6, 7 };
+						 
+  vresult_sh = vec_insertl (src_vb_sh, src_va_sh, index);
+
+  if (!vec_all_eq (vresult_sh,  expected_vresult_sh)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_vb_sh, src_va_sh, index)\n");
+    for(i = 0; i < 8; i++)
+      printf(" vresult_sh[%d] = %d, expected_vresult_sh[%d] = %d\n",
+	     i, vresult_sh[i], i, expected_vresult_sh[i]);
+#else
+    abort();
+#endif
+  }
+
+  index = 8;
+  src_va_int = (vector unsigned int) { 0, 1, 2, 3 };
+  src_vb_int = (vector unsigned int) { 10, 11, 12, 13 };
+  vresult_int = (vector unsigned int) { 0, 0, 0, 0 };
+  expected_vresult_int = (vector unsigned int) { 0, 1, 12, 3 };
+						 
+  vresult_int = vec_insertl (src_vb_int, src_va_int, index);
+
+  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_vb_int, src_va_int, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+	     i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+    abort();
+#endif
+  }
+
+  /* Vector insert, high index, from GPR */
+  src_a_ch = 79;
+  index = 2;
+  src_va_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 13, 14, 15 };
+  vresult_ch = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
+					0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 79, 14, 15 };
+						 
+    vresult_ch = vec_inserth (src_a_ch, src_va_ch, index);
+
+  if (!vec_all_eq (vresult_ch,  expected_vresult_ch)) {
+#if DEBUG
+   printf("ERROR, vec_inserth (src_a_ch, src_va_ch, index)\n");
+    for(i = 0; i < 16; i++)
+      printf(" vresult_ch[%d] = %d, expected_vresult_ch[%d] = %d\n",
+	     i, vresult_ch[i], i, expected_vresult_ch[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_sh = 79;
+  index = 10;
+  src_va_sh = (vector unsigned short int) { 0, 1, 2, 3, 4, 5, 6, 7 };
+  vresult_sh = (vector unsigned short int) { 0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_sh = (vector unsigned short int) { 0, 1, 79, 3,
+						      4, 5, 6, 7 };
+
+  vresult_sh = vec_inserth (src_a_sh, src_va_sh, index);
+
+  if (!vec_all_eq (vresult_sh,  expected_vresult_sh)) {
+#if DEBUG
+    printf("ERROR, vec_inserth (src_a_sh, src_va_sh, index)\n");
+    for(i = 0; i < 8; i++)
+      printf(" vresult_sh[%d] = %d, expected_vresult_sh[%d] = %d\n",
+	     i, vresult_sh[i], i, expected_vresult_sh[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_int = 79;
+  index = 8;
+  src_va_int = (vector unsigned int) { 0, 1, 2, 3 };
+  vresult_int = (vector unsigned int) { 0, 0, 0, 0 };
+  expected_vresult_int = (vector unsigned int) { 0, 79, 2, 3 };
+
+  vresult_int = vec_inserth (src_a_int, src_va_int, index);
+
+  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
+#if DEBUG
+    printf("ERROR, vec_inserth (src_a_int, src_va_int, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+	     i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_ll = 79;
+  index = 8;
+  src_va_ll = (vector unsigned long long) { 0, 1 };
+  vresult_ll = (vector unsigned long long) { 0, 0 };
+  expected_vresult_ll = (vector unsigned long long) { 79, 1 };
+
+  vresult_ll = vec_inserth (src_a_ll, src_va_ll, index);
+
+  if (!vec_all_eq (vresult_ll,  expected_vresult_ll)) {
+#if DEBUG
+    printf("ERROR, vec_inserth (src_a_ll, src_va_ll, index)\n");
+    for(i = 0; i < 2; i++)
+      printf(" vresult_ll[%d] = %d, expected_vresult_ll[%d] = %d\n",
+	     i, vresult_ll[i], i, expected_vresult_ll[i]);
+#else
+    abort();
+#endif
+  }
+
+  /* Vector insert, left index, from vector */
+  index = 2;
+  src_va_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 13, 14, 15 };
+  src_vb_ch = (vector unsigned char) { 10, 11, 12, 13, 14, 15, 16, 17,
+				       18, 19, 20, 21, 22, 23, 24, 25 };
+  vresult_ch = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
+					0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 18, 14, 15 };
+						 
+  vresult_ch = vec_inserth (src_vb_ch, src_va_ch, index);
+
+  if (!vec_all_eq (vresult_ch,  expected_vresult_ch)) {
+#if DEBUG
+    printf("ERROR, vec_inserth (src_vb_ch, src_va_ch, index)\n");
+    for(i = 0; i < 16; i++)
+      printf(" vresult_ch[%d] = %d, expected_vresult_ch[%d] = %d\n",
+	     i, vresult_ch[i], i, expected_vresult_ch[i]);
+#else
+    abort();
+#endif
+  }
+
+  index = 4;
+  src_va_sh = (vector unsigned short) { 0, 1, 2, 3, 4, 5, 6, 7 };
+  src_vb_sh = (vector unsigned short) { 10, 11, 12, 13, 14, 15, 16, 17 };
+  vresult_sh = (vector unsigned short) { 0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_sh = (vector unsigned short) { 0, 1, 2, 3, 4, 14, 6, 7 };
+						 
+  vresult_sh = vec_inserth (src_vb_sh, src_va_sh, index);
+
+  if (!vec_all_eq (vresult_sh,  expected_vresult_sh)) {
+#if DEBUG
+    printf("ERROR, vec_inserth (src_vb_sh, src_va_sh, index)\n");
+    for(i = 0; i < 8; i++)
+      printf(" vresult_sh[%d] = %d, expected_vresult_sh[%d] = %d\n",
+	     i, vresult_sh[i], i, expected_vresult_sh[i]);
+#else
+    abort();
+#endif
+  }
+
+  index = 8;
+  src_va_int = (vector unsigned int) { 0, 1, 2, 3 };
+  src_vb_int = (vector unsigned int) { 10, 11, 12, 13 };
+  vresult_int = (vector unsigned int) { 0, 0, 0, 0 };
+  expected_vresult_int = (vector unsigned int) { 0, 12, 2, 3 };
+						 
+  vresult_int = vec_inserth (src_vb_int, src_va_int, index);
+
+  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
+#if DEBUG
+    printf("ERROR, vec_inserth (src_vb_int, src_va_int, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+	     i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+    abort();
+#endif
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler {\mvinsblx\M} } } */
+/* { dg-final { scan-assembler {\mvinshlx\M} } } */
+/* { dg-final { scan-assembler {\mvinswlx\M} } } */
+/* { dg-final { scan-assembler {\mvinsdlx\M} } } */
+/* { dg-final { scan-assembler {\mvinsbvlx\M} } } */
+/* { dg-final { scan-assembler {\mvinshvlx\M} } } */
+/* { dg-final { scan-assembler {\mvinswvlx\M} } } */
+
+/* { dg-final { scan-assembler {\mvinsbrx\M} } } */
+/* { dg-final { scan-assembler {\mvinshrx\M} } } */
+/* { dg-final { scan-assembler {\mvinswrx\M} } } */
+/* { dg-final { scan-assembler {\mvinsdrx\M} } } */
+/* { dg-final { scan-assembler {\mvinsbvrx\M} } } */
+/* { dg-final { scan-assembler {\mvinshvrx\M} } } */
+/* { dg-final { scan-assembler {\mvinswvrx\M} } } */
+