diff mbox series

[2/6,ver,2] rs6000 Add vector insert builtin support

Message ID d16730cca224903e973f2e8496fa9ed21122df5c.camel@us.ibm.com
State New
Headers show
Series ] Permute Class Operations | expand

Commit Message

Carl Love June 15, 2020, 11:37 p.m. UTC
v2 changes

Fix change log entry for config/rs6000/altivec.h

Fix change log entry for config/rs6000/rs6000-builtin.def

Fix change log entry for config/rs6000/rs6000-call.c

vsx.md: Fixed if (BYTES_BIG_ENDIAN) else statements.
Porting error from pu branch.

---------------------------------------------------------------
GCC maintainers:

This patch adds support for vec_insertl and vec_inserth builtins.

The patch has been compiled and tested on

  powerpc64le-unknown-linux-gnu (Power 9 LE)

and mambo with no regression errors.

Please let me know if this patch is acceptable for the mainline branch.

Thanks.

                         Carl Love

--------------------------------------------------------------
gcc/ChangeLog

2020-06-15  Carl Love  <cel@us.ibm.com>

        * config/rs6000/altivec.h (vec_insertl, vec_inserth): New defines.
	* config/rs6000/rs6000-builtin.def (VINSERTGPRBL, VINSERTGPRHL,
	VINSERTGPRWL, VINSERTGPRDL, VINSERTVPRBL, VINSERTVPRHL, VINSERTVPRWL,
	VINSERTGPRBR, VINSERTGPRHR, VINSERTGPRWR, VINSERTGPRDR, VINSERTVPRBR,
	VINSERTVPRHR, VINSERTVPRWR): New builtins.
	(INSERTL, INSERTH): New builtins.
	* config/rs6000/rs6000-call.c (FUTURE_BUILTIN_VEC_INSERTL,
	FUTURE_BUILTIN_VEC_INSERTH):  New Overloaded definitions.
	(FUTURE_BUILTIN_VINSERTGPRBL, FUTURE_BUILTIN_VINSERTGPRHL,
	FUTURE_BUILTIN_VINSERTGPRWL, FUTURE_BUILTIN_VINSERTGPRDL,
	FUTURE_BUILTIN_VINSERTVPRBL, FUTURE_BUILTIN_VINSERTVPRHL,
	FUTURE_BUILTIN_VINSERTVPRWL): Add case entries.
	* config/rs6000/vsx.md (define_c_enum): Add UNSPEC_INSERTL,
	UNSPEC_INSERTR.
	(define_expand): Add vinsertvl_<mode>, vinsertvr_<mode>,
	vinsertgl_<mode>, vinsertgr_<mode>, mode is VI2.
	(define_ins): vinsertvl_internal_<mode>, vinsertvr_internal_<mode>,
	vinsertgl_internal_<mode>, vinsertgr_internal_<mode>, mode VEC_I.
	* doc/extend.texi: Add documentation for vec_insertl, vec_inserth.

gcc/testsuite/ChangeLog

2020-06-15  Carl Love  <cel@us.ibm.com>

	* gcc.target/powerpc/vec-insert-word-runnable.c: New
	test case.
---
 gcc/config/rs6000/altivec.h                   |   2 +
 gcc/config/rs6000/rs6000-builtin.def          |  18 +
 gcc/config/rs6000/rs6000-call.c               |  51 +++
 gcc/config/rs6000/vsx.md                      | 110 ++++++
 gcc/doc/extend.texi                           |  73 ++++
 .../powerpc/vec-insert-word-runnable.c        | 345 ++++++++++++++++++
 6 files changed, 599 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c

Comments

will schmidt June 16, 2020, 7:45 p.m. UTC | #1
On Mon, 2020-06-15 at 16:37 -0700, Carl Love via Gcc-patches wrote:
> v2 changes
> 
> Fix change log entry for config/rs6000/altivec.h
> 
> Fix change log entry for config/rs6000/rs6000-builtin.def
> 
> Fix change log entry for config/rs6000/rs6000-call.c
> 
> vsx.md: Fixed if (BYTES_BIG_ENDIAN) else statements.
> Porting error from pu branch.
> 
> ---------------------------------------------------------------
> GCC maintainers:
> 
> This patch adds support for vec_insertl and vec_inserth builtins.
> 
> The patch has been compiled and tested on
> 
>   powerpc64le-unknown-linux-gnu (Power 9 LE)
> 
> and mambo with no regression errors.
> 
> Please let me know if this patch is acceptable for the mainline branch.
> 
> Thanks.
> 
>                          Carl Love
> 
> --------------------------------------------------------------
> gcc/ChangeLog
> 
> 2020-06-15  Carl Love  <cel@us.ibm.com>
> 
>         * config/rs6000/altivec.h (vec_insertl, vec_inserth): New defines.

tabs/spaces.

> 	* config/rs6000/rs6000-builtin.def (VINSERTGPRBL, VINSERTGPRHL,
> 	VINSERTGPRWL, VINSERTGPRDL, VINSERTVPRBL, VINSERTVPRHL, VINSERTVPRWL,
> 	VINSERTGPRBR, VINSERTGPRHR, VINSERTGPRWR, VINSERTGPRDR, VINSERTVPRBR,
> 	VINSERTVPRHR, VINSERTVPRWR): New builtins.
> 	(INSERTL, INSERTH): New builtins.
> 	* config/rs6000/rs6000-call.c (FUTURE_BUILTIN_VEC_INSERTL,
> 	FUTURE_BUILTIN_VEC_INSERTH):  New Overloaded definitions.
> 	(FUTURE_BUILTIN_VINSERTGPRBL, FUTURE_BUILTIN_VINSERTGPRHL,
> 	FUTURE_BUILTIN_VINSERTGPRWL, FUTURE_BUILTIN_VINSERTGPRDL,
> 	FUTURE_BUILTIN_VINSERTVPRBL, FUTURE_BUILTIN_VINSERTVPRHL,
> 	FUTURE_BUILTIN_VINSERTVPRWL): Add case entries.
> 	* config/rs6000/vsx.md (define_c_enum): Add UNSPEC_INSERTL,
> 	UNSPEC_INSERTR.
> 	(define_expand): Add vinsertvl_<mode>, vinsertvr_<mode>,
> 	vinsertgl_<mode>, vinsertgr_<mode>, mode is VI2.
> 	(define_ins): vinsertvl_internal_<mode>, vinsertvr_internal_<mode>,
> 	vinsertgl_internal_<mode>, vinsertgr_internal_<mode>, mode VEC_I.
> 	* doc/extend.texi: Add documentation for vec_insertl, vec_inserth.
> 
> gcc/testsuite/ChangeLog
> 
> 2020-06-15  Carl Love  <cel@us.ibm.com>
> 
> 	* gcc.target/powerpc/vec-insert-word-runnable.c: New
> 	test case.
> ---
>  gcc/config/rs6000/altivec.h                   |   2 +
>  gcc/config/rs6000/rs6000-builtin.def          |  18 +
>  gcc/config/rs6000/rs6000-call.c               |  51 +++
>  gcc/config/rs6000/vsx.md                      | 110 ++++++
>  gcc/doc/extend.texi                           |  73 ++++
>  .../powerpc/vec-insert-word-runnable.c        | 345 ++++++++++++++++++
>  6 files changed, 599 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c
> 
> diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
> index 0a7e8ab3647..936aeb1ee09 100644
> --- a/gcc/config/rs6000/altivec.h
> +++ b/gcc/config/rs6000/altivec.h
> @@ -699,6 +699,8 @@ __altivec_scalar_pred(vec_any_nle,
>  /* Overloaded built-in functions for future architecture.  */
>  #define vec_extractl(a, b, c)	__builtin_vec_extractl (a, b, c)
>  #define vec_extracth(a, b, c)	__builtin_vec_extracth (a, b, c)
> +#define vec_insertl(a, b, c)   __builtin_vec_insertl (a, b, c)
> +#define vec_inserth(a, b, c)   __builtin_vec_inserth (a, b, c)
> 
>  #define vec_gnb(a, b)	__builtin_vec_gnb (a, b)
>  #define vec_clrl(a, b)	__builtin_vec_clrl (a, b)
> diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
> index 8b1ddb00045..c5bd4f86555 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -2627,6 +2627,22 @@ BU_FUTURE_V_3 (VEXTRACTHR, "vextduhvhx", CONST, vextractrv8hi)
>  BU_FUTURE_V_3 (VEXTRACTWR, "vextduwvhx", CONST, vextractrv4si)
>  BU_FUTURE_V_3 (VEXTRACTDR, "vextddvhx", CONST, vextractrv2di)
> 
> +BU_FUTURE_V_3 (VINSERTGPRBL, "vinsgubvlx", CONST, vinsertgl_v16qi)
> +BU_FUTURE_V_3 (VINSERTGPRHL, "vinsguhvlx", CONST, vinsertgl_v8hi)
> +BU_FUTURE_V_3 (VINSERTGPRWL, "vinsguwvlx", CONST, vinsertgl_v4si)
> +BU_FUTURE_V_3 (VINSERTGPRDL, "vinsgudvlx", CONST, vinsertgl_v2di)
> +BU_FUTURE_V_3 (VINSERTVPRBL, "vinsvubvlx", CONST, vinsertvl_v16qi)
> +BU_FUTURE_V_3 (VINSERTVPRHL, "vinsvuhvlx", CONST, vinsertvl_v8hi)
> +BU_FUTURE_V_3 (VINSERTVPRWL, "vinsvuwvlx", CONST, vinsertvl_v4si)
> +
> +BU_FUTURE_V_3 (VINSERTGPRBR, "vinsgubvrx", CONST, vinsertgr_v16qi)
> +BU_FUTURE_V_3 (VINSERTGPRHR, "vinsguhvrx", CONST, vinsertgr_v8hi)
> +BU_FUTURE_V_3 (VINSERTGPRWR, "vinsguwvrx", CONST, vinsertgr_v4si)
> +BU_FUTURE_V_3 (VINSERTGPRDR, "vinsgudvrx", CONST, vinsertgr_v2di)
> +BU_FUTURE_V_3 (VINSERTVPRBR, "vinsvubvrx", CONST, vinsertvr_v16qi)
> +BU_FUTURE_V_3 (VINSERTVPRHR, "vinsvuhvrx", CONST, vinsertvr_v8hi)
> +BU_FUTURE_V_3 (VINSERTVPRWR, "vinsvuwvrx", CONST, vinsertvr_v4si)
> +
>  BU_FUTURE_V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi)
>  BU_FUTURE_V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi)
>  BU_FUTURE_V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi)
> @@ -2646,6 +2662,8 @@ BU_FUTURE_OVERLOAD_2 (XXGENPCVM, "xxgenpcvm")
> 
>  BU_FUTURE_OVERLOAD_3 (EXTRACTL, "extractl")
>  BU_FUTURE_OVERLOAD_3 (EXTRACTH, "extracth")
> +BU_FUTURE_OVERLOAD_3 (INSERTL, "insertl")
> +BU_FUTURE_OVERLOAD_3 (INSERTH, "inserth")
> 
>  BU_FUTURE_OVERLOAD_1 (VSTRIR, "strir")
>  BU_FUTURE_OVERLOAD_1 (VSTRIL, "stril")
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index 817a14c9c0d..abbe00030ea 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -5567,6 +5567,28 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
>      RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
>      RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI },
> 
> +  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTGPRBL,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI },
> + { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTGPRHL,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTHI,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTSI },
> +  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTGPRWL,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI },
> +  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTGPRDL,
> +    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTDI,
> +   RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTSI },
> +  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTVPRBL,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
> +  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTVPRHL,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTQI },
> +  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTVPRWL,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI },
> +
>    { FUTURE_BUILTIN_VEC_EXTRACTH, FUTURE_BUILTIN_VEXTRACTBR,
>      RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI,
>      RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
> @@ -5580,6 +5602,28 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
>      RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
>      RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI },
> 
> +  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTGPRBR,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI },
> +  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTGPRHR,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTHI,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTSI },
> +  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTGPRWR,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI },
> +  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTGPRDR,
> +    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTDI,
> +    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTSI },
> +  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTVPRBR,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
> +  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTVPRHR,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTQI },
> +  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTVPRWR,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI },
> +
>    { FUTURE_BUILTIN_VEC_VSTRIL, FUTURE_BUILTIN_VSTRIBL,
>      RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
>    { FUTURE_BUILTIN_VEC_VSTRIL, FUTURE_BUILTIN_VSTRIBL,
> @@ -13291,6 +13335,13 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
>      case FUTURE_BUILTIN_VEXTRACTHR:
>      case FUTURE_BUILTIN_VEXTRACTWR:
>      case FUTURE_BUILTIN_VEXTRACTDR:
> +    case FUTURE_BUILTIN_VINSERTGPRBL:
> +    case FUTURE_BUILTIN_VINSERTGPRHL:
> +    case FUTURE_BUILTIN_VINSERTGPRWL:
> +    case FUTURE_BUILTIN_VINSERTGPRDL:
> +    case FUTURE_BUILTIN_VINSERTVPRBL:
> +    case FUTURE_BUILTIN_VINSERTVPRHL:
> +    case FUTURE_BUILTIN_VINSERTVPRWL:
>        h.uns_p[0] = 1;
>        h.uns_p[1] = 1;
>        h.uns_p[2] = 1;
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 51ffe2d2000..6ce93f14dec 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -346,6 +346,8 @@
>     UNSPEC_XXGENPCV
>     UNSPEC_EXTRACTL
>     UNSPEC_EXTRACTR
> +   UNSPEC_INSERTL
> +   UNSPEC_INSERTR
>    ])
> 
>  ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
> @@ -3847,6 +3849,114 @@
>    "vext<du_or_d><wd>vrx %0,%1,%2,%3"
>    [(set_attr "type" "vecsimple")])
> 
> +(define_expand "vinsertvl_<mode>"
> +  [(set (match_operand:VI2 0 "altivec_register_operand")
> +       (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
> +                    (match_operand:VI2 2 "altivec_register_operand")
> +                    (match_operand:SI 3 "register_operand" "r")]
> +		   UNSPEC_INSERTL))]

spaces/tabs

> +  "TARGET_FUTURE"
> +{
> +  if (BYTES_BIG_ENDIAN)
> +     emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
> +                                               operands[1], operands[2]));
> +   else
> +     emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
> +                                               operands[1], operands[2]));
> +   DONE;
> +})
> +
> +(define_insn "vinsertvl_internal_<mode>"
> +  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
> +       (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
> +                      (match_operand:VEC_I 2 "altivec_register_operand" "v")
> +                      (match_operand:VEC_I 3 "altivec_register_operand" "0")]
> +		     UNSPEC_INSERTL))]

spaces/tabs

> +  "TARGET_FUTURE"
> +  "vins<wd>vlx %0,%1,%2"
> +  [(set_attr "type" "vecsimple")])
> +
> +(define_expand "vinsertvr_<mode>"
> +  [(set (match_operand:VI2 0 "altivec_register_operand")
> +       (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
> +                    (match_operand:VI2 2 "altivec_register_operand")
> +                    (match_operand:SI 3 "register_operand" "r")]
> +		   UNSPEC_INSERTR))]
> +  "TARGET_FUTURE"
> +{
> +  if (BYTES_BIG_ENDIAN)
> +     emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
> +                                               operands[1], operands[2]));
> +   else
> +     emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
> +                                               operands[1], operands[2]));
> +   DONE;

spaces/tabs

more below.


> +})
> +
> +(define_insn "vinsertvr_internal_<mode>"
> +  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
> +       (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
> +                      (match_operand:VEC_I 2 "altivec_register_operand" "v")
> +                      (match_operand:VEC_I 3 "altivec_register_operand" "0")]
> +                     UNSPEC_INSERTR))]
> +  "TARGET_FUTURE"
> +  "vins<wd>vrx %0,%1,%2"
> +  [(set_attr "type" "vecsimple")])
> +
> +(define_expand "vinsertgl_<mode>"
> +  [(set (match_operand:VI2 0 "altivec_register_operand")
> +       (unspec:VI2 [(match_operand:SI 1 "register_operand")
> +                    (match_operand:VI2 2 "altivec_register_operand")
> +                    (match_operand:SI 3 "register_operand")]
> +	            UNSPEC_INSERTL))]
> +  "TARGET_FUTURE"
> +{
> +  if (BYTES_BIG_ENDIAN)
> +    emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
> +                                            operands[1], operands[2]));
> +  else
> +    emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
> +                                            operands[1], operands[2]));
> +  DONE;
> + })
> +
> +(define_insn "vinsertgl_internal_<mode>"
> + [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
> + (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
> +               (match_operand:SI 2 "register_operand" "r")
> +               (match_operand:VEC_I 3 "altivec_register_operand" "0")]
> +              UNSPEC_INSERTL))]
> + "TARGET_FUTURE"
> + "vins<wd>lx %0,%1,%2"
> + [(set_attr "type" "vecsimple")])
> +
> +(define_expand "vinsertgr_<mode>"
> +  [(set (match_operand:VI2 0 "altivec_register_operand")
> +       (unspec:VI2 [(match_operand:SI 1 "register_operand")
> +                    (match_operand:VI2 2 "altivec_register_operand")
> +                    (match_operand:SI 3 "register_operand")]
> +                   UNSPEC_INSERTR))]
> +  "TARGET_FUTURE"
> +{
> +  if (BYTES_BIG_ENDIAN)
> +    emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
> +                                            operands[1], operands[2]));
> +  else
> +    emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
> +                                            operands[1], operands[2]));
> +  DONE;
> + })
> +
> +(define_insn "vinsertgr_internal_<mode>"
> + [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
> + (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
> +               (match_operand:SI 2 "register_operand" "r")
> +               (match_operand:VEC_I 3 "altivec_register_operand" "0")]
> +              UNSPEC_INSERTR))]
> + "TARGET_FUTURE"
> + "vins<wd>rx %0,%1,%2"
> + [(set_attr "type" "vecsimple")])
> +
>  ;; VSX_EXTRACT optimizations
>  ;; Optimize double d = (double) vec_extract (vi, <n>)
>  ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 5549a695b42..8931c7950f6 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -20972,6 +20972,79 @@ limitation of the bi-endian vector programming model consistent with the
>  limitation on vec_perm, for example.
>  @findex vec_extracth
> 
> +Vector Insert
> +
> +@smallexample
> +@exdent vector unsigned char
> +@exdent vec_insertl (unsigned char, vector unsigned char, unsigned int);
> +@exdent vector unsigned short
> +@exdent vec_insertl (unsigned short, vector unsigned short, unsigned int);
> +@exdent vector unsigned int
> +@exdent vec_insertl (unsigned int, vector unsigned int, unsigned int);
> +@exdent vector unsigned long long
> +@exdent vec_insertl (unsigned long long, vector unsigned long long,
> +unsigned int);
> +@exdent vector unsigned char
> +@exdent vec_insertl (vector unsigned char, vector unsigned char, unsigned int;
> +@exdent vector unsigned short
> +@exdent vec_insertl (vector unsigned short, vector unsigned short,
> +unsigned int);
> +@exdent vector unsigned int
> +@exdent vec_insertl (vector unsigned int, vector unsigned int, unsigned int);
> +@end smallexample
> +

> +Let src be the first argument, when the first argument is a scalar, or the
> +rightmost element of the left doubleword of the first argument, when the first
> +argument is a vector. 

> 			 Insert src into the second argument at the position
> +identified by the third argument, using natural element order in the second
> +argument, and leaving the rest of the second argument unchanged.

 								  
> + If the byte
> +index is greater than 14 for halfwords,  12 for words, or 8 for 
> + doublewords,
> +the intrinsic will be rejected.

Thats good implementation detail, but is hard to read. but what does
the builtin do? 

stl
  +@exdent vec_insertl (vector unsigned int src, vector unsigned int dest, unsigned int offset);

Insert src into dest at the byte offset as specified.  The specified
offset must be within a valid byte offset of the destination.


>  				    Note that the underlying hardware instruction
> +uses the same register for the second argument and the result, but this is
> +hidden by the built-in.

If its hidden from the builtin, it may be better if this is documented
on the actual insn.  is it? 


>  			     For little-endian, the generated code will be
> +semantically equivalent to vins*rx, while for big-endian it will be
> +semantically equivalent to vins*lx.  

surround with @code{vins*rx}.   probably should expand '*' out to the
respective bhw chars, etc.

> 					Note that some fairly anomalous results
> +can be generated if the byte index is not aligned on an element boundary for
> +the sort of element being inserted.

So should the incoming offset be checked against alignment of the type?


>  					This is a limitation of the bi-endian
> +vector programming model consistent with the limitation on veextracthc_perm,

typo veextracthc_perm ? 


> +for example.

Move 'for example' to the beginning of the sentence.

No further comments, 
Thanks
-Will


> +@findex vec_insertl
> +
> +@smallexample
> +@exdent vector unsigned char
> +@exdent vec_inserth (unsigned char, vector unsigned char, unsigned int);
> +@exdent vector unsigned short
> +@exdent vec_inserth (unsigned short, vector unsigned short, unsigned int);
> +@exdent vector unsigned int
> +@exdent vec_inserth (unsigned int, vector unsigned int, unsigned int);
> +@exdent vector unsigned long long
> +@exdent vec_inserth (unsigned long long, vector unsigned long long,
> +unsigned int);
> +@exdent vector unsigned char
> +@exdent vec_inserth (vector unsigned char, vector unsigned char, unsigned int);
> +@exdent vector unsigned short
> +@exdent vec_inserth (vector unsigned short, vector unsigned short,
> +unsigned int);
> +@exdent vector unsigned int
> +@exdent vec_inserth (vector unsigned int, vector unsigned int, unsigned int);
> +@end smallexample
> +
> +Let src be the first argument, when the first argument is a scalar, or the
> +rightmost element of the first argument, when the first argument is a vector.
> +Insert src into the second argument at the position identified by the third
> +argument, using opposite element order in the second argument, and leaving the
> +rest of the second argument unchanged.  If the byte index is greater than 14
> +for halfwords, 12 for words, or 8 for doublewords, the intrinsic will be
> +rejected. Note that the underlying hardware instruction uses the same register
> +for the second argument and the result, but this is hidden by the built-in.
> +For little-endian, the code generation will be semantically equivalent to
> +vins*lx, while for big-endian it will be semantically equivalent to vins*rx.
> +Note that some fairly anomalous results can be generated if the byte index is
> +not aligned on an element boundary for the sort of element being inserted.
> +This is a limitation of the bi-endian vector programming model consistent with
> +the limitation on vec_perm, for example.
> +@findex vec_inserth
> +
>  @smallexample
>  @exdent vector unsigned long long int
>  @exdent vec_pdep (vector unsigned long long int, vector unsigned long long int)
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c
> new file mode 100644
> index 00000000000..3fc68e9d7c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c
> @@ -0,0 +1,345 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_future_hw } */
> +/* { dg-options "-mdejagnu-cpu=future" } */
> +#include <altivec.h>
> +
> +#define DEBUG 1
> +
> +#ifdef DEBUG
> +#include <stdio.h>
> +#endif
> +
> +extern void abort (void);
> +
> +int
> +main (int argc, char *argv [])
> +{
> +  int i;
> +  unsigned int index;
> +  vector unsigned char vresult_ch;
> +  vector unsigned char expected_vresult_ch;
> +  vector unsigned char src_va_ch;
> +  vector unsigned char src_vb_ch;
> +  unsigned char src_a_ch;
> +
> +  vector unsigned short vresult_sh;
> +  vector unsigned short expected_vresult_sh;
> +  vector unsigned short src_va_sh;
> +  vector unsigned short src_vb_sh;
> +  unsigned short int src_a_sh;
> +
> +  vector unsigned int vresult_int;
> +  vector unsigned int expected_vresult_int;
> +  vector unsigned int src_va_int;
> +  vector unsigned int src_vb_int;
> +  unsigned int src_a_int;
> +  
> +  vector unsigned long long vresult_ll;
> +  vector unsigned long long expected_vresult_ll;
> +  vector unsigned long long src_va_ll;
> +  unsigned long long int src_a_ll;
> +
> +  /* Vector insert, low index, from GPR */
> +  src_a_ch = 79;
> +  index = 2;
> +  src_va_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
> +				       8, 9, 10, 11, 12, 13, 14, 15 };
> +  vresult_ch = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
> +					0, 0, 0, 0, 0, 0, 0, 0 };
> +  expected_vresult_ch = (vector unsigned char) { 0, 1, 79, 3, 4, 5, 6, 7,
> +				       8, 9, 10, 11, 12, 13, 14, 15 };
> +						 
> +    vresult_ch = vec_insertl (src_a_ch, src_va_ch, index);
> +
> +  if (!vec_all_eq (vresult_ch,  expected_vresult_ch)) {
> +#if DEBUG
> +    printf("ERROR, vec_insertl (src_a_ch, src_va_ch, index)\n");
> +    for(i = 0; i < 16; i++)
> +      printf(" vresult_ch[%d] = %d, expected_vresult_ch[%d] = %d\n",
> +	     i, vresult_ch[i], i, expected_vresult_ch[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  src_a_sh = 79;
> +  index = 10;
> +  src_va_sh = (vector unsigned short int) { 0, 1, 2, 3, 4, 5, 6, 7 };
> +  vresult_sh = (vector unsigned short int) { 0, 0, 0, 0, 0, 0, 0, 0 };
> +  expected_vresult_sh = (vector unsigned short int) { 0, 1, 2, 3,
> +						      4, 79, 6, 7 };
> +
> +  vresult_sh = vec_insertl (src_a_sh, src_va_sh, index);
> +
> +  if (!vec_all_eq (vresult_sh,  expected_vresult_sh)) {
> +#if DEBUG
> +    printf("ERROR, vec_insertl (src_a_sh, src_va_sh, index)\n");
> +    for(i = 0; i < 8; i++)
> +      printf(" vresult_sh[%d] = %d, expected_vresult_sh[%d] = %d\n",
> +	     i, vresult_sh[i], i, expected_vresult_sh[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  src_a_int = 79;
> +  index = 8;
> +  src_va_int = (vector unsigned int) { 0, 1, 2, 3 };
> +  vresult_int = (vector unsigned int) { 0, 0, 0, 0 };
> +  expected_vresult_int = (vector unsigned int) { 0, 1, 79, 3 };
> +
> +  vresult_int = vec_insertl (src_a_int, src_va_int, index);
> +
> +  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
> +#if DEBUG
> +    printf("ERROR, vec_insertl (src_a_int, src_va_int, index)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
> +	     i, vresult_int[i], i, expected_vresult_int[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  src_a_ll = 79;
> +  index = 8;
> +  src_va_ll = (vector unsigned long long) { 0, 1 };
> +  vresult_ll = (vector unsigned long long) { 0, 0 };
> +  expected_vresult_ll = (vector unsigned long long) { 0, 79 };
> +
> +  vresult_ll = vec_insertl (src_a_ll, src_va_ll, index);
> +
> +  if (!vec_all_eq (vresult_ll,  expected_vresult_ll)) {
> +#if DEBUG
> +    printf("ERROR, vec_insertl (src_a_ll, src_va_ll, index)\n");
> +    for(i = 0; i < 2; i++)
> +      printf(" vresult_ll[%d] = %d, expected_vresult_ll[%d] = %d\n",
> +	     i, vresult_ll[i], i, expected_vresult_ll[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  /* Vector insert, low index, from vector */
> +  index = 2;
> +  src_va_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
> +				       8, 9, 10, 11, 12, 13, 14, 15 };
> +  src_vb_ch = (vector unsigned char) { 10, 11, 12, 13, 14, 15, 16, 17,
> +				       18, 19, 20, 21, 22, 23, 24, 25 };
> +  vresult_ch = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
> +					0, 0, 0, 0, 0, 0, 0, 0 };
> +  expected_vresult_ch = (vector unsigned char) { 0, 1, 18, 3, 4, 5, 6, 7,
> +				       8, 9, 10, 11, 12, 13, 14, 15 };
> +						 
> +  vresult_ch = vec_insertl (src_vb_ch, src_va_ch, index);
> +
> +  if (!vec_all_eq (vresult_ch,  expected_vresult_ch)) {
> +#if DEBUG
> +    printf("ERROR, vec_insertl (src_vb_ch, src_va_ch, index)\n");
> +    for(i = 0; i < 16; i++)
> +      printf(" vresult_ch[%d] = %d, expected_vresult_ch[%d] = %d\n",
> +	     i, vresult_ch[i], i, expected_vresult_ch[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  index = 4;
> +  src_va_sh = (vector unsigned short) { 0, 1, 2, 3, 4, 5, 6, 7 };
> +  src_vb_sh = (vector unsigned short) { 10, 11, 12, 13, 14, 15, 16, 17 };
> +  vresult_sh = (vector unsigned short) { 0, 0, 0, 0, 0, 0, 0, 0 };
> +  expected_vresult_sh = (vector unsigned short) { 0, 1, 14, 3, 4, 5, 6, 7 };
> +						 
> +  vresult_sh = vec_insertl (src_vb_sh, src_va_sh, index);
> +
> +  if (!vec_all_eq (vresult_sh,  expected_vresult_sh)) {
> +#if DEBUG
> +    printf("ERROR, vec_insertl (src_vb_sh, src_va_sh, index)\n");
> +    for(i = 0; i < 8; i++)
> +      printf(" vresult_sh[%d] = %d, expected_vresult_sh[%d] = %d\n",
> +	     i, vresult_sh[i], i, expected_vresult_sh[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  index = 8;
> +  src_va_int = (vector unsigned int) { 0, 1, 2, 3 };
> +  src_vb_int = (vector unsigned int) { 10, 11, 12, 13 };
> +  vresult_int = (vector unsigned int) { 0, 0, 0, 0 };
> +  expected_vresult_int = (vector unsigned int) { 0, 1, 12, 3 };
> +						 
> +  vresult_int = vec_insertl (src_vb_int, src_va_int, index);
> +
> +  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
> +#if DEBUG
> +    printf("ERROR, vec_insertl (src_vb_int, src_va_int, index)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
> +	     i, vresult_int[i], i, expected_vresult_int[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  /* Vector insert, high index, from GPR */
> +  src_a_ch = 79;
> +  index = 2;
> +  src_va_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
> +				       8, 9, 10, 11, 12, 13, 14, 15 };
> +  vresult_ch = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
> +					0, 0, 0, 0, 0, 0, 0, 0 };
> +  expected_vresult_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
> +				       8, 9, 10, 11, 12, 79, 14, 15 };
> +						 
> +    vresult_ch = vec_inserth (src_a_ch, src_va_ch, index);
> +
> +  if (!vec_all_eq (vresult_ch,  expected_vresult_ch)) {
> +#if DEBUG
> +   printf("ERROR, vec_inserth (src_a_ch, src_va_ch, index)\n");
> +    for(i = 0; i < 16; i++)
> +      printf(" vresult_ch[%d] = %d, expected_vresult_ch[%d] = %d\n",
> +	     i, vresult_ch[i], i, expected_vresult_ch[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  src_a_sh = 79;
> +  index = 10;
> +  src_va_sh = (vector unsigned short int) { 0, 1, 2, 3, 4, 5, 6, 7 };
> +  vresult_sh = (vector unsigned short int) { 0, 0, 0, 0, 0, 0, 0, 0 };
> +  expected_vresult_sh = (vector unsigned short int) { 0, 1, 79, 3,
> +						      4, 5, 6, 7 };
> +
> +  vresult_sh = vec_inserth (src_a_sh, src_va_sh, index);
> +
> +  if (!vec_all_eq (vresult_sh,  expected_vresult_sh)) {
> +#if DEBUG
> +    printf("ERROR, vec_inserth (src_a_sh, src_va_sh, index)\n");
> +    for(i = 0; i < 8; i++)
> +      printf(" vresult_sh[%d] = %d, expected_vresult_sh[%d] = %d\n",
> +	     i, vresult_sh[i], i, expected_vresult_sh[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  src_a_int = 79;
> +  index = 8;
> +  src_va_int = (vector unsigned int) { 0, 1, 2, 3 };
> +  vresult_int = (vector unsigned int) { 0, 0, 0, 0 };
> +  expected_vresult_int = (vector unsigned int) { 0, 79, 2, 3 };
> +
> +  vresult_int = vec_inserth (src_a_int, src_va_int, index);
> +
> +  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
> +#if DEBUG
> +    printf("ERROR, vec_inserth (src_a_int, src_va_int, index)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
> +	     i, vresult_int[i], i, expected_vresult_int[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  src_a_ll = 79;
> +  index = 8;
> +  src_va_ll = (vector unsigned long long) { 0, 1 };
> +  vresult_ll = (vector unsigned long long) { 0, 0 };
> +  expected_vresult_ll = (vector unsigned long long) { 79, 1 };
> +
> +  vresult_ll = vec_inserth (src_a_ll, src_va_ll, index);
> +
> +  if (!vec_all_eq (vresult_ll,  expected_vresult_ll)) {
> +#if DEBUG
> +    printf("ERROR, vec_inserth (src_a_ll, src_va_ll, index)\n");
> +    for(i = 0; i < 2; i++)
> +      printf(" vresult_ll[%d] = %d, expected_vresult_ll[%d] = %d\n",
> +	     i, vresult_ll[i], i, expected_vresult_ll[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  /* Vector insert, left index, from vector */
> +  index = 2;
> +  src_va_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
> +				       8, 9, 10, 11, 12, 13, 14, 15 };
> +  src_vb_ch = (vector unsigned char) { 10, 11, 12, 13, 14, 15, 16, 17,
> +				       18, 19, 20, 21, 22, 23, 24, 25 };
> +  vresult_ch = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
> +					0, 0, 0, 0, 0, 0, 0, 0 };
> +  expected_vresult_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
> +				       8, 9, 10, 11, 12, 18, 14, 15 };
> +						 
> +  vresult_ch = vec_inserth (src_vb_ch, src_va_ch, index);
> +
> +  if (!vec_all_eq (vresult_ch,  expected_vresult_ch)) {
> +#if DEBUG
> +    printf("ERROR, vec_inserth (src_vb_ch, src_va_ch, index)\n");
> +    for(i = 0; i < 16; i++)
> +      printf(" vresult_ch[%d] = %d, expected_vresult_ch[%d] = %d\n",
> +	     i, vresult_ch[i], i, expected_vresult_ch[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  index = 4;
> +  src_va_sh = (vector unsigned short) { 0, 1, 2, 3, 4, 5, 6, 7 };
> +  src_vb_sh = (vector unsigned short) { 10, 11, 12, 13, 14, 15, 16, 17 };
> +  vresult_sh = (vector unsigned short) { 0, 0, 0, 0, 0, 0, 0, 0 };
> +  expected_vresult_sh = (vector unsigned short) { 0, 1, 2, 3, 4, 14, 6, 7 };
> +						 
> +  vresult_sh = vec_inserth (src_vb_sh, src_va_sh, index);
> +
> +  if (!vec_all_eq (vresult_sh,  expected_vresult_sh)) {
> +#if DEBUG
> +    printf("ERROR, vec_inserth (src_vb_sh, src_va_sh, index)\n");
> +    for(i = 0; i < 8; i++)
> +      printf(" vresult_sh[%d] = %d, expected_vresult_sh[%d] = %d\n",
> +	     i, vresult_sh[i], i, expected_vresult_sh[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  index = 8;
> +  src_va_int = (vector unsigned int) { 0, 1, 2, 3 };
> +  src_vb_int = (vector unsigned int) { 10, 11, 12, 13 };
> +  vresult_int = (vector unsigned int) { 0, 0, 0, 0 };
> +  expected_vresult_int = (vector unsigned int) { 0, 12, 2, 3 };
> +						 
> +  vresult_int = vec_inserth (src_vb_int, src_va_int, index);
> +
> +  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
> +#if DEBUG
> +    printf("ERROR, vec_inserth (src_vb_int, src_va_int, index)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
> +	     i, vresult_int[i], i, expected_vresult_int[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler {\mvinsblx\M} } } */
> +/* { dg-final { scan-assembler {\mvinshlx\M} } } */
> +/* { dg-final { scan-assembler {\mvinswlx\M} } } */
> +/* { dg-final { scan-assembler {\mvinsdlx\M} } } */
> +/* { dg-final { scan-assembler {\mvinsbvlx\M} } } */
> +/* { dg-final { scan-assembler {\mvinshvlx\M} } } */
> +/* { dg-final { scan-assembler {\mvinswvlx\M} } } */
> +
> +/* { dg-final { scan-assembler {\mvinsbrx\M} } } */
> +/* { dg-final { scan-assembler {\mvinshrx\M} } } */
> +/* { dg-final { scan-assembler {\mvinswrx\M} } } */
> +/* { dg-final { scan-assembler {\mvinsdrx\M} } } */
> +/* { dg-final { scan-assembler {\mvinsbvrx\M} } } */
> +/* { dg-final { scan-assembler {\mvinshvrx\M} } } */
> +/* { dg-final { scan-assembler {\mvinswvrx\M} } } */
> +
diff mbox series

Patch

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 0a7e8ab3647..936aeb1ee09 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -699,6 +699,8 @@  __altivec_scalar_pred(vec_any_nle,
 /* Overloaded built-in functions for future architecture.  */
 #define vec_extractl(a, b, c)	__builtin_vec_extractl (a, b, c)
 #define vec_extracth(a, b, c)	__builtin_vec_extracth (a, b, c)
+#define vec_insertl(a, b, c)   __builtin_vec_insertl (a, b, c)
+#define vec_inserth(a, b, c)   __builtin_vec_inserth (a, b, c)
 
 #define vec_gnb(a, b)	__builtin_vec_gnb (a, b)
 #define vec_clrl(a, b)	__builtin_vec_clrl (a, b)
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 8b1ddb00045..c5bd4f86555 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2627,6 +2627,22 @@  BU_FUTURE_V_3 (VEXTRACTHR, "vextduhvhx", CONST, vextractrv8hi)
 BU_FUTURE_V_3 (VEXTRACTWR, "vextduwvhx", CONST, vextractrv4si)
 BU_FUTURE_V_3 (VEXTRACTDR, "vextddvhx", CONST, vextractrv2di)
 
+BU_FUTURE_V_3 (VINSERTGPRBL, "vinsgubvlx", CONST, vinsertgl_v16qi)
+BU_FUTURE_V_3 (VINSERTGPRHL, "vinsguhvlx", CONST, vinsertgl_v8hi)
+BU_FUTURE_V_3 (VINSERTGPRWL, "vinsguwvlx", CONST, vinsertgl_v4si)
+BU_FUTURE_V_3 (VINSERTGPRDL, "vinsgudvlx", CONST, vinsertgl_v2di)
+BU_FUTURE_V_3 (VINSERTVPRBL, "vinsvubvlx", CONST, vinsertvl_v16qi)
+BU_FUTURE_V_3 (VINSERTVPRHL, "vinsvuhvlx", CONST, vinsertvl_v8hi)
+BU_FUTURE_V_3 (VINSERTVPRWL, "vinsvuwvlx", CONST, vinsertvl_v4si)
+
+BU_FUTURE_V_3 (VINSERTGPRBR, "vinsgubvrx", CONST, vinsertgr_v16qi)
+BU_FUTURE_V_3 (VINSERTGPRHR, "vinsguhvrx", CONST, vinsertgr_v8hi)
+BU_FUTURE_V_3 (VINSERTGPRWR, "vinsguwvrx", CONST, vinsertgr_v4si)
+BU_FUTURE_V_3 (VINSERTGPRDR, "vinsgudvrx", CONST, vinsertgr_v2di)
+BU_FUTURE_V_3 (VINSERTVPRBR, "vinsvubvrx", CONST, vinsertvr_v16qi)
+BU_FUTURE_V_3 (VINSERTVPRHR, "vinsvuhvrx", CONST, vinsertvr_v8hi)
+BU_FUTURE_V_3 (VINSERTVPRWR, "vinsvuwvrx", CONST, vinsertvr_v4si)
+
 BU_FUTURE_V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi)
 BU_FUTURE_V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi)
 BU_FUTURE_V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi)
@@ -2646,6 +2662,8 @@  BU_FUTURE_OVERLOAD_2 (XXGENPCVM, "xxgenpcvm")
 
 BU_FUTURE_OVERLOAD_3 (EXTRACTL, "extractl")
 BU_FUTURE_OVERLOAD_3 (EXTRACTH, "extracth")
+BU_FUTURE_OVERLOAD_3 (INSERTL, "insertl")
+BU_FUTURE_OVERLOAD_3 (INSERTH, "inserth")
 
 BU_FUTURE_OVERLOAD_1 (VSTRIR, "strir")
 BU_FUTURE_OVERLOAD_1 (VSTRIL, "stril")
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 817a14c9c0d..abbe00030ea 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -5567,6 +5567,28 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI },
 
+  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTGPRBL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI },
+ { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTGPRHL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTHI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTGPRWL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTGPRDL,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTDI,
+   RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTVPRBL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
+  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTVPRHL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTQI },
+  { FUTURE_BUILTIN_VEC_INSERTL, FUTURE_BUILTIN_VINSERTVPRWL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI },
+
   { FUTURE_BUILTIN_VEC_EXTRACTH, FUTURE_BUILTIN_VEXTRACTBR,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
@@ -5580,6 +5602,28 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI },
 
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTGPRBR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTGPRHR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTHI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTGPRWR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTGPRDR,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTDI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTSI },
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTVPRBR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTVPRHR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTQI },
+  { FUTURE_BUILTIN_VEC_INSERTH, FUTURE_BUILTIN_VINSERTVPRWR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI },
+
   { FUTURE_BUILTIN_VEC_VSTRIL, FUTURE_BUILTIN_VSTRIBL,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
   { FUTURE_BUILTIN_VEC_VSTRIL, FUTURE_BUILTIN_VSTRIBL,
@@ -13291,6 +13335,13 @@  builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
     case FUTURE_BUILTIN_VEXTRACTHR:
     case FUTURE_BUILTIN_VEXTRACTWR:
     case FUTURE_BUILTIN_VEXTRACTDR:
+    case FUTURE_BUILTIN_VINSERTGPRBL:
+    case FUTURE_BUILTIN_VINSERTGPRHL:
+    case FUTURE_BUILTIN_VINSERTGPRWL:
+    case FUTURE_BUILTIN_VINSERTGPRDL:
+    case FUTURE_BUILTIN_VINSERTVPRBL:
+    case FUTURE_BUILTIN_VINSERTVPRHL:
+    case FUTURE_BUILTIN_VINSERTVPRWL:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 51ffe2d2000..6ce93f14dec 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -346,6 +346,8 @@ 
    UNSPEC_XXGENPCV
    UNSPEC_EXTRACTL
    UNSPEC_EXTRACTR
+   UNSPEC_INSERTL
+   UNSPEC_INSERTR
   ])
 
 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
@@ -3847,6 +3849,114 @@ 
   "vext<du_or_d><wd>vrx %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
 
+(define_expand "vinsertvl_<mode>"
+  [(set (match_operand:VI2 0 "altivec_register_operand")
+       (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
+                    (match_operand:VI2 2 "altivec_register_operand")
+                    (match_operand:SI 3 "register_operand" "r")]
+		   UNSPEC_INSERTL))]
+  "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+     emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
+                                               operands[1], operands[2]));
+   else
+     emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
+                                               operands[1], operands[2]));
+   DONE;
+})
+
+(define_insn "vinsertvl_internal_<mode>"
+  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
+       (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
+                      (match_operand:VEC_I 2 "altivec_register_operand" "v")
+                      (match_operand:VEC_I 3 "altivec_register_operand" "0")]
+		     UNSPEC_INSERTL))]
+  "TARGET_FUTURE"
+  "vins<wd>vlx %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_expand "vinsertvr_<mode>"
+  [(set (match_operand:VI2 0 "altivec_register_operand")
+       (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
+                    (match_operand:VI2 2 "altivec_register_operand")
+                    (match_operand:SI 3 "register_operand" "r")]
+		   UNSPEC_INSERTR))]
+  "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+     emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
+                                               operands[1], operands[2]));
+   else
+     emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
+                                               operands[1], operands[2]));
+   DONE;
+})
+
+(define_insn "vinsertvr_internal_<mode>"
+  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
+       (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
+                      (match_operand:VEC_I 2 "altivec_register_operand" "v")
+                      (match_operand:VEC_I 3 "altivec_register_operand" "0")]
+                     UNSPEC_INSERTR))]
+  "TARGET_FUTURE"
+  "vins<wd>vrx %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_expand "vinsertgl_<mode>"
+  [(set (match_operand:VI2 0 "altivec_register_operand")
+       (unspec:VI2 [(match_operand:SI 1 "register_operand")
+                    (match_operand:VI2 2 "altivec_register_operand")
+                    (match_operand:SI 3 "register_operand")]
+	            UNSPEC_INSERTL))]
+  "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
+                                            operands[1], operands[2]));
+  else
+    emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
+                                            operands[1], operands[2]));
+  DONE;
+ })
+
+(define_insn "vinsertgl_internal_<mode>"
+ [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
+ (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
+               (match_operand:SI 2 "register_operand" "r")
+               (match_operand:VEC_I 3 "altivec_register_operand" "0")]
+              UNSPEC_INSERTL))]
+ "TARGET_FUTURE"
+ "vins<wd>lx %0,%1,%2"
+ [(set_attr "type" "vecsimple")])
+
+(define_expand "vinsertgr_<mode>"
+  [(set (match_operand:VI2 0 "altivec_register_operand")
+       (unspec:VI2 [(match_operand:SI 1 "register_operand")
+                    (match_operand:VI2 2 "altivec_register_operand")
+                    (match_operand:SI 3 "register_operand")]
+                   UNSPEC_INSERTR))]
+  "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
+                                            operands[1], operands[2]));
+  else
+    emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
+                                            operands[1], operands[2]));
+  DONE;
+ })
+
+(define_insn "vinsertgr_internal_<mode>"
+ [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
+ (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
+               (match_operand:SI 2 "register_operand" "r")
+               (match_operand:VEC_I 3 "altivec_register_operand" "0")]
+              UNSPEC_INSERTR))]
+ "TARGET_FUTURE"
+ "vins<wd>rx %0,%1,%2"
+ [(set_attr "type" "vecsimple")])
+
 ;; VSX_EXTRACT optimizations
 ;; Optimize double d = (double) vec_extract (vi, <n>)
 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 5549a695b42..8931c7950f6 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -20972,6 +20972,79 @@  limitation of the bi-endian vector programming model consistent with the
 limitation on vec_perm, for example.
 @findex vec_extracth
 
+Vector Insert
+
+@smallexample
+@exdent vector unsigned char
+@exdent vec_insertl (unsigned char, vector unsigned char, unsigned int);
+@exdent vector unsigned short
+@exdent vec_insertl (unsigned short, vector unsigned short, unsigned int);
+@exdent vector unsigned int
+@exdent vec_insertl (unsigned int, vector unsigned int, unsigned int);
+@exdent vector unsigned long long
+@exdent vec_insertl (unsigned long long, vector unsigned long long,
+unsigned int);
+@exdent vector unsigned char
+@exdent vec_insertl (vector unsigned char, vector unsigned char, unsigned int;
+@exdent vector unsigned short
+@exdent vec_insertl (vector unsigned short, vector unsigned short,
+unsigned int);
+@exdent vector unsigned int
+@exdent vec_insertl (vector unsigned int, vector unsigned int, unsigned int);
+@end smallexample
+
+Let src be the first argument, when the first argument is a scalar, or the
+rightmost element of the left doubleword of the first argument, when the first
+argument is a vector.  Insert src into the second argument at the position
+identified by the third argument, using natural element order in the second
+argument, and leaving the rest of the second argument unchanged.  If the byte
+index is greater than 14 for halfwords,  12 for words, or 8 for doublewords,
+the intrinsic will be rejected.  Note that the underlying hardware instruction
+uses the same register for the second argument and the result, but this is
+hidden by the built-in.  For little-endian, the generated code will be
+semantically equivalent to vins*rx, while for big-endian it will be
+semantically equivalent to vins*lx.  Note that some fairly anomalous results
+can be generated if the byte index is not aligned on an element boundary for
+the sort of element being inserted. This is a limitation of the bi-endian
+vector programming model consistent with the limitation on veextracthc_perm,
+for example.
+@findex vec_insertl
+
+@smallexample
+@exdent vector unsigned char
+@exdent vec_inserth (unsigned char, vector unsigned char, unsigned int);
+@exdent vector unsigned short
+@exdent vec_inserth (unsigned short, vector unsigned short, unsigned int);
+@exdent vector unsigned int
+@exdent vec_inserth (unsigned int, vector unsigned int, unsigned int);
+@exdent vector unsigned long long
+@exdent vec_inserth (unsigned long long, vector unsigned long long,
+unsigned int);
+@exdent vector unsigned char
+@exdent vec_inserth (vector unsigned char, vector unsigned char, unsigned int);
+@exdent vector unsigned short
+@exdent vec_inserth (vector unsigned short, vector unsigned short,
+unsigned int);
+@exdent vector unsigned int
+@exdent vec_inserth (vector unsigned int, vector unsigned int, unsigned int);
+@end smallexample
+
+Let src be the first argument, when the first argument is a scalar, or the
+rightmost element of the first argument, when the first argument is a vector.
+Insert src into the second argument at the position identified by the third
+argument, using opposite element order in the second argument, and leaving the
+rest of the second argument unchanged.  If the byte index is greater than 14
+for halfwords, 12 for words, or 8 for doublewords, the intrinsic will be
+rejected. Note that the underlying hardware instruction uses the same register
+for the second argument and the result, but this is hidden by the built-in.
+For little-endian, the code generation will be semantically equivalent to
+vins*lx, while for big-endian it will be semantically equivalent to vins*rx.
+Note that some fairly anomalous results can be generated if the byte index is
+not aligned on an element boundary for the sort of element being inserted.
+This is a limitation of the bi-endian vector programming model consistent with
+the limitation on vec_perm, for example.
+@findex vec_inserth
+
 @smallexample
 @exdent vector unsigned long long int
 @exdent vec_pdep (vector unsigned long long int, vector unsigned long long int)
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c
new file mode 100644
index 00000000000..3fc68e9d7c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c
@@ -0,0 +1,345 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_future_hw } */
+/* { dg-options "-mdejagnu-cpu=future" } */
+#include <altivec.h>
+
+#define DEBUG 1
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+extern void abort (void);
+
+int
+main (int argc, char *argv [])
+{
+  int i;
+  unsigned int index;
+  vector unsigned char vresult_ch;
+  vector unsigned char expected_vresult_ch;
+  vector unsigned char src_va_ch;
+  vector unsigned char src_vb_ch;
+  unsigned char src_a_ch;
+
+  vector unsigned short vresult_sh;
+  vector unsigned short expected_vresult_sh;
+  vector unsigned short src_va_sh;
+  vector unsigned short src_vb_sh;
+  unsigned short int src_a_sh;
+
+  vector unsigned int vresult_int;
+  vector unsigned int expected_vresult_int;
+  vector unsigned int src_va_int;
+  vector unsigned int src_vb_int;
+  unsigned int src_a_int;
+  
+  vector unsigned long long vresult_ll;
+  vector unsigned long long expected_vresult_ll;
+  vector unsigned long long src_va_ll;
+  unsigned long long int src_a_ll;
+
+  /* Vector insert, low index, from GPR */
+  src_a_ch = 79;
+  index = 2;
+  src_va_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 13, 14, 15 };
+  vresult_ch = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
+					0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_ch = (vector unsigned char) { 0, 1, 79, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 13, 14, 15 };
+						 
+    vresult_ch = vec_insertl (src_a_ch, src_va_ch, index);
+
+  if (!vec_all_eq (vresult_ch,  expected_vresult_ch)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_a_ch, src_va_ch, index)\n");
+    for(i = 0; i < 16; i++)
+      printf(" vresult_ch[%d] = %d, expected_vresult_ch[%d] = %d\n",
+	     i, vresult_ch[i], i, expected_vresult_ch[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_sh = 79;
+  index = 10;
+  src_va_sh = (vector unsigned short int) { 0, 1, 2, 3, 4, 5, 6, 7 };
+  vresult_sh = (vector unsigned short int) { 0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_sh = (vector unsigned short int) { 0, 1, 2, 3,
+						      4, 79, 6, 7 };
+
+  vresult_sh = vec_insertl (src_a_sh, src_va_sh, index);
+
+  if (!vec_all_eq (vresult_sh,  expected_vresult_sh)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_a_sh, src_va_sh, index)\n");
+    for(i = 0; i < 8; i++)
+      printf(" vresult_sh[%d] = %d, expected_vresult_sh[%d] = %d\n",
+	     i, vresult_sh[i], i, expected_vresult_sh[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_int = 79;
+  index = 8;
+  src_va_int = (vector unsigned int) { 0, 1, 2, 3 };
+  vresult_int = (vector unsigned int) { 0, 0, 0, 0 };
+  expected_vresult_int = (vector unsigned int) { 0, 1, 79, 3 };
+
+  vresult_int = vec_insertl (src_a_int, src_va_int, index);
+
+  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_a_int, src_va_int, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+	     i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_ll = 79;
+  index = 8;
+  src_va_ll = (vector unsigned long long) { 0, 1 };
+  vresult_ll = (vector unsigned long long) { 0, 0 };
+  expected_vresult_ll = (vector unsigned long long) { 0, 79 };
+
+  vresult_ll = vec_insertl (src_a_ll, src_va_ll, index);
+
+  if (!vec_all_eq (vresult_ll,  expected_vresult_ll)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_a_ll, src_va_ll, index)\n");
+    for(i = 0; i < 2; i++)
+      printf(" vresult_ll[%d] = %d, expected_vresult_ll[%d] = %d\n",
+	     i, vresult_ll[i], i, expected_vresult_ll[i]);
+#else
+    abort();
+#endif
+  }
+
+  /* Vector insert, low index, from vector */
+  index = 2;
+  src_va_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 13, 14, 15 };
+  src_vb_ch = (vector unsigned char) { 10, 11, 12, 13, 14, 15, 16, 17,
+				       18, 19, 20, 21, 22, 23, 24, 25 };
+  vresult_ch = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
+					0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_ch = (vector unsigned char) { 0, 1, 18, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 13, 14, 15 };
+						 
+  vresult_ch = vec_insertl (src_vb_ch, src_va_ch, index);
+
+  if (!vec_all_eq (vresult_ch,  expected_vresult_ch)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_vb_ch, src_va_ch, index)\n");
+    for(i = 0; i < 16; i++)
+      printf(" vresult_ch[%d] = %d, expected_vresult_ch[%d] = %d\n",
+	     i, vresult_ch[i], i, expected_vresult_ch[i]);
+#else
+    abort();
+#endif
+  }
+
+  index = 4;
+  src_va_sh = (vector unsigned short) { 0, 1, 2, 3, 4, 5, 6, 7 };
+  src_vb_sh = (vector unsigned short) { 10, 11, 12, 13, 14, 15, 16, 17 };
+  vresult_sh = (vector unsigned short) { 0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_sh = (vector unsigned short) { 0, 1, 14, 3, 4, 5, 6, 7 };
+						 
+  vresult_sh = vec_insertl (src_vb_sh, src_va_sh, index);
+
+  if (!vec_all_eq (vresult_sh,  expected_vresult_sh)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_vb_sh, src_va_sh, index)\n");
+    for(i = 0; i < 8; i++)
+      printf(" vresult_sh[%d] = %d, expected_vresult_sh[%d] = %d\n",
+	     i, vresult_sh[i], i, expected_vresult_sh[i]);
+#else
+    abort();
+#endif
+  }
+
+  index = 8;
+  src_va_int = (vector unsigned int) { 0, 1, 2, 3 };
+  src_vb_int = (vector unsigned int) { 10, 11, 12, 13 };
+  vresult_int = (vector unsigned int) { 0, 0, 0, 0 };
+  expected_vresult_int = (vector unsigned int) { 0, 1, 12, 3 };
+						 
+  vresult_int = vec_insertl (src_vb_int, src_va_int, index);
+
+  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
+#if DEBUG
+    printf("ERROR, vec_insertl (src_vb_int, src_va_int, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+	     i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+    abort();
+#endif
+  }
+
+  /* Vector insert, high index, from GPR */
+  src_a_ch = 79;
+  index = 2;
+  src_va_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 13, 14, 15 };
+  vresult_ch = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
+					0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 79, 14, 15 };
+						 
+    vresult_ch = vec_inserth (src_a_ch, src_va_ch, index);
+
+  if (!vec_all_eq (vresult_ch,  expected_vresult_ch)) {
+#if DEBUG
+   printf("ERROR, vec_inserth (src_a_ch, src_va_ch, index)\n");
+    for(i = 0; i < 16; i++)
+      printf(" vresult_ch[%d] = %d, expected_vresult_ch[%d] = %d\n",
+	     i, vresult_ch[i], i, expected_vresult_ch[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_sh = 79;
+  index = 10;
+  src_va_sh = (vector unsigned short int) { 0, 1, 2, 3, 4, 5, 6, 7 };
+  vresult_sh = (vector unsigned short int) { 0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_sh = (vector unsigned short int) { 0, 1, 79, 3,
+						      4, 5, 6, 7 };
+
+  vresult_sh = vec_inserth (src_a_sh, src_va_sh, index);
+
+  if (!vec_all_eq (vresult_sh,  expected_vresult_sh)) {
+#if DEBUG
+    printf("ERROR, vec_inserth (src_a_sh, src_va_sh, index)\n");
+    for(i = 0; i < 8; i++)
+      printf(" vresult_sh[%d] = %d, expected_vresult_sh[%d] = %d\n",
+	     i, vresult_sh[i], i, expected_vresult_sh[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_int = 79;
+  index = 8;
+  src_va_int = (vector unsigned int) { 0, 1, 2, 3 };
+  vresult_int = (vector unsigned int) { 0, 0, 0, 0 };
+  expected_vresult_int = (vector unsigned int) { 0, 79, 2, 3 };
+
+  vresult_int = vec_inserth (src_a_int, src_va_int, index);
+
+  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
+#if DEBUG
+    printf("ERROR, vec_inserth (src_a_int, src_va_int, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+	     i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_ll = 79;
+  index = 8;
+  src_va_ll = (vector unsigned long long) { 0, 1 };
+  vresult_ll = (vector unsigned long long) { 0, 0 };
+  expected_vresult_ll = (vector unsigned long long) { 79, 1 };
+
+  vresult_ll = vec_inserth (src_a_ll, src_va_ll, index);
+
+  if (!vec_all_eq (vresult_ll,  expected_vresult_ll)) {
+#if DEBUG
+    printf("ERROR, vec_inserth (src_a_ll, src_va_ll, index)\n");
+    for(i = 0; i < 2; i++)
+      printf(" vresult_ll[%d] = %d, expected_vresult_ll[%d] = %d\n",
+	     i, vresult_ll[i], i, expected_vresult_ll[i]);
+#else
+    abort();
+#endif
+  }
+
+  /* Vector insert, left index, from vector */
+  index = 2;
+  src_va_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 13, 14, 15 };
+  src_vb_ch = (vector unsigned char) { 10, 11, 12, 13, 14, 15, 16, 17,
+				       18, 19, 20, 21, 22, 23, 24, 25 };
+  vresult_ch = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
+					0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_ch = (vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7,
+				       8, 9, 10, 11, 12, 18, 14, 15 };
+						 
+  vresult_ch = vec_inserth (src_vb_ch, src_va_ch, index);
+
+  if (!vec_all_eq (vresult_ch,  expected_vresult_ch)) {
+#if DEBUG
+    printf("ERROR, vec_inserth (src_vb_ch, src_va_ch, index)\n");
+    for(i = 0; i < 16; i++)
+      printf(" vresult_ch[%d] = %d, expected_vresult_ch[%d] = %d\n",
+	     i, vresult_ch[i], i, expected_vresult_ch[i]);
+#else
+    abort();
+#endif
+  }
+
+  index = 4;
+  src_va_sh = (vector unsigned short) { 0, 1, 2, 3, 4, 5, 6, 7 };
+  src_vb_sh = (vector unsigned short) { 10, 11, 12, 13, 14, 15, 16, 17 };
+  vresult_sh = (vector unsigned short) { 0, 0, 0, 0, 0, 0, 0, 0 };
+  expected_vresult_sh = (vector unsigned short) { 0, 1, 2, 3, 4, 14, 6, 7 };
+						 
+  vresult_sh = vec_inserth (src_vb_sh, src_va_sh, index);
+
+  if (!vec_all_eq (vresult_sh,  expected_vresult_sh)) {
+#if DEBUG
+    printf("ERROR, vec_inserth (src_vb_sh, src_va_sh, index)\n");
+    for(i = 0; i < 8; i++)
+      printf(" vresult_sh[%d] = %d, expected_vresult_sh[%d] = %d\n",
+	     i, vresult_sh[i], i, expected_vresult_sh[i]);
+#else
+    abort();
+#endif
+  }
+
+  index = 8;
+  src_va_int = (vector unsigned int) { 0, 1, 2, 3 };
+  src_vb_int = (vector unsigned int) { 10, 11, 12, 13 };
+  vresult_int = (vector unsigned int) { 0, 0, 0, 0 };
+  expected_vresult_int = (vector unsigned int) { 0, 12, 2, 3 };
+						 
+  vresult_int = vec_inserth (src_vb_int, src_va_int, index);
+
+  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
+#if DEBUG
+    printf("ERROR, vec_inserth (src_vb_int, src_va_int, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+	     i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+    abort();
+#endif
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler {\mvinsblx\M} } } */
+/* { dg-final { scan-assembler {\mvinshlx\M} } } */
+/* { dg-final { scan-assembler {\mvinswlx\M} } } */
+/* { dg-final { scan-assembler {\mvinsdlx\M} } } */
+/* { dg-final { scan-assembler {\mvinsbvlx\M} } } */
+/* { dg-final { scan-assembler {\mvinshvlx\M} } } */
+/* { dg-final { scan-assembler {\mvinswvlx\M} } } */
+
+/* { dg-final { scan-assembler {\mvinsbrx\M} } } */
+/* { dg-final { scan-assembler {\mvinshrx\M} } } */
+/* { dg-final { scan-assembler {\mvinswrx\M} } } */
+/* { dg-final { scan-assembler {\mvinsdrx\M} } } */
+/* { dg-final { scan-assembler {\mvinsbvrx\M} } } */
+/* { dg-final { scan-assembler {\mvinshvrx\M} } } */
+/* { dg-final { scan-assembler {\mvinswvrx\M} } } */
+