diff mbox series

[3/4] rs6000: Enable vec_insert for P8 with rs6000_expand_vector_set_var_p8

Message ID 20201010080825.3599892-4-luoxhu@linux.ibm.com
State New
Headers show
Series rs6000: Enable variable vec_insert with IFN VEC_SET | expand

Commit Message

Xionghu Luo Oct. 10, 2020, 8:08 a.m. UTC
gcc/ChangeLog:

2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>

	* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
	Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later
	platforms.
	* config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update
	to call different path for P8 and P9.
	(rs6000_expand_vector_set_var_p9): New function.
	(rs6000_expand_vector_set_var_p8): New function.

gcc/testsuite/ChangeLog:

2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>

	* gcc.target/powerpc/pr79251.p8.c: New test.
---
 gcc/config/rs6000/rs6000-c.c                  |  27 +++-
 gcc/config/rs6000/rs6000.c                    | 117 +++++++++++++++++-
 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c |  17 +++
 3 files changed, 155 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c

Comments

Xionghu Luo Nov. 27, 2020, 1:04 a.m. UTC | #1
Hi Segher,
Thanks for the approval of [PATCH 1/4] and [PATCH 2/4], what's your
opinion of this [PATCH 3/4] for P8, please?  xxinsertw only exists since 
v3.0, so we had to implement by another way.


Xionghu


On 2020/10/10 16:08, Xionghu Luo wrote:
> gcc/ChangeLog:
> 
> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
> 
> 	* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
> 	Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later
> 	platforms.
> 	* config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update
> 	to call different path for P8 and P9.
> 	(rs6000_expand_vector_set_var_p9): New function.
> 	(rs6000_expand_vector_set_var_p8): New function.
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
> 
> 	* gcc.target/powerpc/pr79251.p8.c: New test.
> ---
>   gcc/config/rs6000/rs6000-c.c                  |  27 +++-
>   gcc/config/rs6000/rs6000.c                    | 117 +++++++++++++++++-
>   gcc/testsuite/gcc.target/powerpc/pr79251.p8.c |  17 +++
>   3 files changed, 155 insertions(+), 6 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
> 
> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
> index 5551a21d738..4bea8001ec6 100644
> --- a/gcc/config/rs6000/rs6000-c.c
> +++ b/gcc/config/rs6000/rs6000-c.c
> @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
>   	  SET_EXPR_LOCATION (stmt, loc);
>   	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
>   	}
> -      stmt = build_array_ref (loc, stmt, arg2);
> -      stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
> -			  convert (TREE_TYPE (stmt), arg0));
> -      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
> +
> +      if (TARGET_P8_VECTOR)
> +	{
> +	  stmt = build_array_ref (loc, stmt, arg2);
> +	  stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
> +			      convert (TREE_TYPE (stmt), arg0));
> +	  stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
> +	}
> +      else
> +	{
> +	  tree arg1_inner_type;
> +	  tree innerptrtype;
> +	  arg1_inner_type = TREE_TYPE (arg1_type);
> +	  innerptrtype = build_pointer_type (arg1_inner_type);
> +
> +	  stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
> +	  stmt = convert (innerptrtype, stmt);
> +	  stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
> +	  stmt = build_indirect_ref (loc, stmt, RO_NULL);
> +	  stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
> +			 convert (TREE_TYPE (stmt), arg0));
> +	  stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
> +	}
>         return stmt;
>       }
>   
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 96f76c7a74c..33ca839cb28 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -6806,10 +6806,10 @@ rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
>   }
>   
>   /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
> -   is variable and also counts by vector element size.  */
> +   is variable and also counts by vector element size for p9 and above.  */
>   
>   void
> -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
> +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
>   {
>     machine_mode mode = GET_MODE (target);
>   
> @@ -6852,6 +6852,119 @@ rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
>     emit_insn (perml);
>   }
>   
> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
> +   is variable and also counts by vector element size for p8.  */
> +
> +void
> +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx)
> +{
> +  machine_mode mode = GET_MODE (target);
> +
> +  gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
> +
> +  gcc_assert (GET_MODE (idx) == E_SImode);
> +
> +  machine_mode inner_mode = GET_MODE (val);
> +  HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
> +
> +  rtx tmp = gen_reg_rtx (GET_MODE (idx));
> +  int width = GET_MODE_SIZE (inner_mode);
> +
> +  gcc_assert (width >= 1 && width <= 4);
> +
> +  if (!BYTES_BIG_ENDIAN)
> +    {
> +      /*  idx = idx * width.  */
> +      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
> +      /*  idx = idx + 8.  */
> +      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8)));
> +    }
> +  else
> +    {
> +      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
> +      emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp));
> +    }
> +
> +  /*  lxv vs33, mask.
> +      DImode: 0xffffffffffffffff0000000000000000
> +      SImode: 0x00000000ffffffff0000000000000000
> +      HImode: 0x000000000000ffff0000000000000000.
> +      QImode: 0x00000000000000ff0000000000000000.  */
> +  rtx mask = gen_reg_rtx (V16QImode);
> +  rtx mask_v2di = gen_reg_rtx (V2DImode);
> +  rtvec v = rtvec_alloc (2);
> +  if (!BYTES_BIG_ENDIAN)
> +    {
> +      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
> +      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
> +    }
> +  else
> +    {
> +      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
> +      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
> +    }
> +  emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
> +  rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
> +  emit_insn (gen_rtx_SET (mask, sub_mask));
> +
> +  /*  mtvsrd[wz] f0,tmp_val.  */
> +  rtx tmp_val = gen_reg_rtx (SImode);
> +  if (inner_mode == E_SFmode)
> +    emit_insn (gen_movsi_from_sf (tmp_val, val));
> +  else
> +    tmp_val = force_reg (SImode, val);
> +
> +  rtx val_v16qi = gen_reg_rtx (V16QImode);
> +  rtx val_v2di = gen_reg_rtx (V2DImode);
> +  rtvec vec_val = rtvec_alloc (2);
> +  if (!BYTES_BIG_ENDIAN)
> +  {
> +    RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
> +    RTVEC_ELT (vec_val, 1) = tmp_val;
> +  }
> +  else
> +  {
> +    RTVEC_ELT (vec_val, 0) = tmp_val;
> +    RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
> +  }
> +  emit_insn (
> +    gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
> +  rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
> +  emit_insn (gen_rtx_SET (val_v16qi, sub_val));
> +
> +  /*  lvsl    13,0,idx.  */
> +  tmp = convert_modes (DImode, SImode, tmp, 1);
> +  rtx pcv = gen_reg_rtx (V16QImode);
> +  emit_insn (gen_altivec_lvsl_reg (pcv, tmp));
> +
> +  /*  vperm 1,1,1,13.  */
> +  /*  vperm 0,0,0,13.  */
> +  rtx val_perm = gen_reg_rtx (V16QImode);
> +  rtx mask_perm = gen_reg_rtx (V16QImode);
> +  emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
> +  emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
> +
> +  rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
> +
> +  /*  xxsel 34,34,32,33.  */
> +  emit_insn (
> +    gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
> +}
> +
> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
> +   is variable and also counts by vector element size.  */
> +
> +void
> +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
> +{
> +  machine_mode mode = GET_MODE (target);
> +  machine_mode inner_mode = GET_MODE_INNER (mode);
> +  if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8)
> +    rs6000_expand_vector_set_var_p9 (target, val, idx);
> +  else
> +    rs6000_expand_vector_set_var_p8 (target, val, idx);
> +}
> +
>   /* Extract field ELT from VEC into TARGET.  */
>   
>   void
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
> new file mode 100644
> index 00000000000..06da47b7758
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */
> +
> +#include <stddef.h>
> +#include <altivec.h>
> +#include "pr79251.h"
> +
> +TEST_VEC_INSERT_ALL (test)
> +
> +/* { dg-final { scan-assembler-not {\mstxw\M} } } */
> +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */
> +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */
> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */
> +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */
> +
>
Xionghu Luo Dec. 3, 2020, 2:16 p.m. UTC | #2
Ping. Thanks.


On 2020/11/27 09:04, Xionghu Luo via Gcc-patches wrote:
> Hi Segher,
> Thanks for the approval of [PATCH 1/4] and [PATCH 2/4], what's your
> opinion of this [PATCH 3/4] for P8, please?  xxinsertw only exists since
> v3.0, so we had to implement by another way.
> 
> 
> Xionghu
> 
> 
> On 2020/10/10 16:08, Xionghu Luo wrote:
>> gcc/ChangeLog:
>>
>> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
>>
>> 	* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
>> 	Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later
>> 	platforms.
>> 	* config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update
>> 	to call different path for P8 and P9.
>> 	(rs6000_expand_vector_set_var_p9): New function.
>> 	(rs6000_expand_vector_set_var_p8): New function.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
>>
>> 	* gcc.target/powerpc/pr79251.p8.c: New test.
>> ---
>>    gcc/config/rs6000/rs6000-c.c                  |  27 +++-
>>    gcc/config/rs6000/rs6000.c                    | 117 +++++++++++++++++-
>>    gcc/testsuite/gcc.target/powerpc/pr79251.p8.c |  17 +++
>>    3 files changed, 155 insertions(+), 6 deletions(-)
>>    create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>
>> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
>> index 5551a21d738..4bea8001ec6 100644
>> --- a/gcc/config/rs6000/rs6000-c.c
>> +++ b/gcc/config/rs6000/rs6000-c.c
>> @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
>>    	  SET_EXPR_LOCATION (stmt, loc);
>>    	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
>>    	}
>> -      stmt = build_array_ref (loc, stmt, arg2);
>> -      stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
>> -			  convert (TREE_TYPE (stmt), arg0));
>> -      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>> +
>> +      if (TARGET_P8_VECTOR)
>> +	{
>> +	  stmt = build_array_ref (loc, stmt, arg2);
>> +	  stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
>> +			      convert (TREE_TYPE (stmt), arg0));
>> +	  stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>> +	}
>> +      else
>> +	{
>> +	  tree arg1_inner_type;
>> +	  tree innerptrtype;
>> +	  arg1_inner_type = TREE_TYPE (arg1_type);
>> +	  innerptrtype = build_pointer_type (arg1_inner_type);
>> +
>> +	  stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
>> +	  stmt = convert (innerptrtype, stmt);
>> +	  stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
>> +	  stmt = build_indirect_ref (loc, stmt, RO_NULL);
>> +	  stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
>> +			 convert (TREE_TYPE (stmt), arg0));
>> +	  stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>> +	}
>>          return stmt;
>>        }
>>    
>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>> index 96f76c7a74c..33ca839cb28 100644
>> --- a/gcc/config/rs6000/rs6000.c
>> +++ b/gcc/config/rs6000/rs6000.c
>> @@ -6806,10 +6806,10 @@ rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
>>    }
>>    
>>    /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
>> -   is variable and also counts by vector element size.  */
>> +   is variable and also counts by vector element size for p9 and above.  */
>>    
>>    void
>> -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
>> +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
>>    {
>>      machine_mode mode = GET_MODE (target);
>>    
>> @@ -6852,6 +6852,119 @@ rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
>>      emit_insn (perml);
>>    }
>>    
>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
>> +   is variable and also counts by vector element size for p8.  */
>> +
>> +void
>> +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx)
>> +{
>> +  machine_mode mode = GET_MODE (target);
>> +
>> +  gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
>> +
>> +  gcc_assert (GET_MODE (idx) == E_SImode);
>> +
>> +  machine_mode inner_mode = GET_MODE (val);
>> +  HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
>> +
>> +  rtx tmp = gen_reg_rtx (GET_MODE (idx));
>> +  int width = GET_MODE_SIZE (inner_mode);
>> +
>> +  gcc_assert (width >= 1 && width <= 4);
>> +
>> +  if (!BYTES_BIG_ENDIAN)
>> +    {
>> +      /*  idx = idx * width.  */
>> +      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
>> +      /*  idx = idx + 8.  */
>> +      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8)));
>> +    }
>> +  else
>> +    {
>> +      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
>> +      emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp));
>> +    }
>> +
>> +  /*  lxv vs33, mask.
>> +      DImode: 0xffffffffffffffff0000000000000000
>> +      SImode: 0x00000000ffffffff0000000000000000
>> +      HImode: 0x000000000000ffff0000000000000000.
>> +      QImode: 0x00000000000000ff0000000000000000.  */
>> +  rtx mask = gen_reg_rtx (V16QImode);
>> +  rtx mask_v2di = gen_reg_rtx (V2DImode);
>> +  rtvec v = rtvec_alloc (2);
>> +  if (!BYTES_BIG_ENDIAN)
>> +    {
>> +      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
>> +      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
>> +    }
>> +  else
>> +    {
>> +      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
>> +      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
>> +    }
>> +  emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
>> +  rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
>> +  emit_insn (gen_rtx_SET (mask, sub_mask));
>> +
>> +  /*  mtvsrd[wz] f0,tmp_val.  */
>> +  rtx tmp_val = gen_reg_rtx (SImode);
>> +  if (inner_mode == E_SFmode)
>> +    emit_insn (gen_movsi_from_sf (tmp_val, val));
>> +  else
>> +    tmp_val = force_reg (SImode, val);
>> +
>> +  rtx val_v16qi = gen_reg_rtx (V16QImode);
>> +  rtx val_v2di = gen_reg_rtx (V2DImode);
>> +  rtvec vec_val = rtvec_alloc (2);
>> +  if (!BYTES_BIG_ENDIAN)
>> +  {
>> +    RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
>> +    RTVEC_ELT (vec_val, 1) = tmp_val;
>> +  }
>> +  else
>> +  {
>> +    RTVEC_ELT (vec_val, 0) = tmp_val;
>> +    RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
>> +  }
>> +  emit_insn (
>> +    gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
>> +  rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
>> +  emit_insn (gen_rtx_SET (val_v16qi, sub_val));
>> +
>> +  /*  lvsl    13,0,idx.  */
>> +  tmp = convert_modes (DImode, SImode, tmp, 1);
>> +  rtx pcv = gen_reg_rtx (V16QImode);
>> +  emit_insn (gen_altivec_lvsl_reg (pcv, tmp));
>> +
>> +  /*  vperm 1,1,1,13.  */
>> +  /*  vperm 0,0,0,13.  */
>> +  rtx val_perm = gen_reg_rtx (V16QImode);
>> +  rtx mask_perm = gen_reg_rtx (V16QImode);
>> +  emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
>> +  emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
>> +
>> +  rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
>> +
>> +  /*  xxsel 34,34,32,33.  */
>> +  emit_insn (
>> +    gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
>> +}
>> +
>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
>> +   is variable and also counts by vector element size.  */
>> +
>> +void
>> +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
>> +{
>> +  machine_mode mode = GET_MODE (target);
>> +  machine_mode inner_mode = GET_MODE_INNER (mode);
>> +  if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8)
>> +    rs6000_expand_vector_set_var_p9 (target, val, idx);
>> +  else
>> +    rs6000_expand_vector_set_var_p8 (target, val, idx);
>> +}
>> +
>>    /* Extract field ELT from VEC into TARGET.  */
>>    
>>    void
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>> new file mode 100644
>> index 00000000000..06da47b7758
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>> @@ -0,0 +1,17 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target powerpc_p8vector_ok } */
>> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */
>> +
>> +#include <stddef.h>
>> +#include <altivec.h>
>> +#include "pr79251.h"
>> +
>> +TEST_VEC_INSERT_ALL (test)
>> +
>> +/* { dg-final { scan-assembler-not {\mstxw\M} } } */
>> +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */
>> +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */
>> +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */
>> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */
>> +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */
>> +
>>
>
Xionghu Luo Dec. 10, 2020, 3:32 a.m. UTC | #3
Ping^2. Thanks.

On 2020/12/3 22:16, Xionghu Luo via Gcc-patches wrote:
> Ping. Thanks.
> 
> 
> On 2020/11/27 09:04, Xionghu Luo via Gcc-patches wrote:
>> Hi Segher,
>> Thanks for the approval of [PATCH 1/4] and [PATCH 2/4], what's your
>> opinion of this [PATCH 3/4] for P8, please?  xxinsertw only exists since
>> v3.0, so we had to implement by another way.
>>
>>
>> Xionghu
>>
>>
>> On 2020/10/10 16:08, Xionghu Luo wrote:
>>> gcc/ChangeLog:
>>>
>>> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
>>>
>>>     * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
>>>     Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later
>>>     platforms.
>>>     * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update
>>>     to call different path for P8 and P9.
>>>     (rs6000_expand_vector_set_var_p9): New function.
>>>     (rs6000_expand_vector_set_var_p8): New function.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
>>>
>>>     * gcc.target/powerpc/pr79251.p8.c: New test.
>>> ---
>>>    gcc/config/rs6000/rs6000-c.c                  |  27 +++-
>>>    gcc/config/rs6000/rs6000.c                    | 117 
>>> +++++++++++++++++-
>>>    gcc/testsuite/gcc.target/powerpc/pr79251.p8.c |  17 +++
>>>    3 files changed, 155 insertions(+), 6 deletions(-)
>>>    create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>>
>>> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
>>> index 5551a21d738..4bea8001ec6 100644
>>> --- a/gcc/config/rs6000/rs6000-c.c
>>> +++ b/gcc/config/rs6000/rs6000-c.c
>>> @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin 
>>> (location_t loc, tree fndecl,
>>>          SET_EXPR_LOCATION (stmt, loc);
>>>          stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
>>>        }
>>> -      stmt = build_array_ref (loc, stmt, arg2);
>>> -      stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
>>> -              convert (TREE_TYPE (stmt), arg0));
>>> -      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>>> +
>>> +      if (TARGET_P8_VECTOR)
>>> +    {
>>> +      stmt = build_array_ref (loc, stmt, arg2);
>>> +      stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
>>> +                  convert (TREE_TYPE (stmt), arg0));
>>> +      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>>> +    }
>>> +      else
>>> +    {
>>> +      tree arg1_inner_type;
>>> +      tree innerptrtype;
>>> +      arg1_inner_type = TREE_TYPE (arg1_type);
>>> +      innerptrtype = build_pointer_type (arg1_inner_type);
>>> +
>>> +      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
>>> +      stmt = convert (innerptrtype, stmt);
>>> +      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
>>> +      stmt = build_indirect_ref (loc, stmt, RO_NULL);
>>> +      stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
>>> +             convert (TREE_TYPE (stmt), arg0));
>>> +      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>>> +    }
>>>          return stmt;
>>>        }
>>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>>> index 96f76c7a74c..33ca839cb28 100644
>>> --- a/gcc/config/rs6000/rs6000.c
>>> +++ b/gcc/config/rs6000/rs6000.c
>>> @@ -6806,10 +6806,10 @@ rs6000_expand_vector_set (rtx target, rtx 
>>> val, rtx elt_rtx)
>>>    }
>>>    /* Insert VAL into IDX of TARGET, VAL size is same of the vector 
>>> element, IDX
>>> -   is variable and also counts by vector element size.  */
>>> +   is variable and also counts by vector element size for p9 and 
>>> above.  */
>>>    void
>>> -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
>>> +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
>>>    {
>>>      machine_mode mode = GET_MODE (target);
>>> @@ -6852,6 +6852,119 @@ rs6000_expand_vector_set_var (rtx target, rtx 
>>> val, rtx idx)
>>>      emit_insn (perml);
>>>    }
>>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector 
>>> element, IDX
>>> +   is variable and also counts by vector element size for p8.  */
>>> +
>>> +void
>>> +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx)
>>> +{
>>> +  machine_mode mode = GET_MODE (target);
>>> +
>>> +  gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
>>> +
>>> +  gcc_assert (GET_MODE (idx) == E_SImode);
>>> +
>>> +  machine_mode inner_mode = GET_MODE (val);
>>> +  HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
>>> +
>>> +  rtx tmp = gen_reg_rtx (GET_MODE (idx));
>>> +  int width = GET_MODE_SIZE (inner_mode);
>>> +
>>> +  gcc_assert (width >= 1 && width <= 4);
>>> +
>>> +  if (!BYTES_BIG_ENDIAN)
>>> +    {
>>> +      /*  idx = idx * width.  */
>>> +      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
>>> +      /*  idx = idx + 8.  */
>>> +      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8)));
>>> +    }
>>> +  else
>>> +    {
>>> +      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
>>> +      emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp));
>>> +    }
>>> +
>>> +  /*  lxv vs33, mask.
>>> +      DImode: 0xffffffffffffffff0000000000000000
>>> +      SImode: 0x00000000ffffffff0000000000000000
>>> +      HImode: 0x000000000000ffff0000000000000000.
>>> +      QImode: 0x00000000000000ff0000000000000000.  */
>>> +  rtx mask = gen_reg_rtx (V16QImode);
>>> +  rtx mask_v2di = gen_reg_rtx (V2DImode);
>>> +  rtvec v = rtvec_alloc (2);
>>> +  if (!BYTES_BIG_ENDIAN)
>>> +    {
>>> +      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
>>> +      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
>>> +    }
>>> +  else
>>> +    {
>>> +      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
>>> +      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
>>> +    }
>>> +  emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL 
>>> (V2DImode, v)));
>>> +  rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, 
>>> V2DImode, 0);
>>> +  emit_insn (gen_rtx_SET (mask, sub_mask));
>>> +
>>> +  /*  mtvsrd[wz] f0,tmp_val.  */
>>> +  rtx tmp_val = gen_reg_rtx (SImode);
>>> +  if (inner_mode == E_SFmode)
>>> +    emit_insn (gen_movsi_from_sf (tmp_val, val));
>>> +  else
>>> +    tmp_val = force_reg (SImode, val);
>>> +
>>> +  rtx val_v16qi = gen_reg_rtx (V16QImode);
>>> +  rtx val_v2di = gen_reg_rtx (V2DImode);
>>> +  rtvec vec_val = rtvec_alloc (2);
>>> +  if (!BYTES_BIG_ENDIAN)
>>> +  {
>>> +    RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
>>> +    RTVEC_ELT (vec_val, 1) = tmp_val;
>>> +  }
>>> +  else
>>> +  {
>>> +    RTVEC_ELT (vec_val, 0) = tmp_val;
>>> +    RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
>>> +  }
>>> +  emit_insn (
>>> +    gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, 
>>> vec_val)));
>>> +  rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
>>> +  emit_insn (gen_rtx_SET (val_v16qi, sub_val));
>>> +
>>> +  /*  lvsl    13,0,idx.  */
>>> +  tmp = convert_modes (DImode, SImode, tmp, 1);
>>> +  rtx pcv = gen_reg_rtx (V16QImode);
>>> +  emit_insn (gen_altivec_lvsl_reg (pcv, tmp));
>>> +
>>> +  /*  vperm 1,1,1,13.  */
>>> +  /*  vperm 0,0,0,13.  */
>>> +  rtx val_perm = gen_reg_rtx (V16QImode);
>>> +  rtx mask_perm = gen_reg_rtx (V16QImode);
>>> +  emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, 
>>> val_v16qi, pcv));
>>> +  emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
>>> +
>>> +  rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
>>> +
>>> +  /*  xxsel 34,34,32,33.  */
>>> +  emit_insn (
>>> +    gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, 
>>> mask_perm));
>>> +}
>>> +
>>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector 
>>> element, IDX
>>> +   is variable and also counts by vector element size.  */
>>> +
>>> +void
>>> +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
>>> +{
>>> +  machine_mode mode = GET_MODE (target);
>>> +  machine_mode inner_mode = GET_MODE_INNER (mode);
>>> +  if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8)
>>> +    rs6000_expand_vector_set_var_p9 (target, val, idx);
>>> +  else
>>> +    rs6000_expand_vector_set_var_p8 (target, val, idx);
>>> +}
>>> +
>>>    /* Extract field ELT from VEC into TARGET.  */
>>>    void
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c 
>>> b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>> new file mode 100644
>>> index 00000000000..06da47b7758
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>> @@ -0,0 +1,17 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target powerpc_p8vector_ok } */
>>> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */
>>> +
>>> +#include <stddef.h>
>>> +#include <altivec.h>
>>> +#include "pr79251.h"
>>> +
>>> +TEST_VEC_INSERT_ALL (test)
>>> +
>>> +/* { dg-final { scan-assembler-not {\mstxw\M} } } */
>>> +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */
>>> +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */
>>> +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */
>>> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */
>>> +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */
>>> +
>>>
>>
>
Xionghu Luo Dec. 23, 2020, 2:18 a.m. UTC | #4
Ping^3 for stage 3.

And this followed patch:
[PATCH 4/4] rs6000: Update testcases' instruction count.

Thanks:)


On 2020/12/3 22:16, Xionghu Luo via Gcc-patches wrote:
> Ping. Thanks.
> 
> 
> On 2020/11/27 09:04, Xionghu Luo via Gcc-patches wrote:
>> Hi Segher,
>> Thanks for the approval of [PATCH 1/4] and [PATCH 2/4], what's your
>> opinion of this [PATCH 3/4] for P8, please?  xxinsertw only exists since
>> v3.0, so we had to implement by another way.
>>
>>
>> Xionghu
>>
>>
>> On 2020/10/10 16:08, Xionghu Luo wrote:
>>> gcc/ChangeLog:
>>>
>>> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
>>>
>>>     * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
>>>     Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later
>>>     platforms.
>>>     * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update
>>>     to call different path for P8 and P9.
>>>     (rs6000_expand_vector_set_var_p9): New function.
>>>     (rs6000_expand_vector_set_var_p8): New function.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
>>>
>>>     * gcc.target/powerpc/pr79251.p8.c: New test.
>>> ---
>>>    gcc/config/rs6000/rs6000-c.c                  |  27 +++-
>>>    gcc/config/rs6000/rs6000.c                    | 117 
>>> +++++++++++++++++-
>>>    gcc/testsuite/gcc.target/powerpc/pr79251.p8.c |  17 +++
>>>    3 files changed, 155 insertions(+), 6 deletions(-)
>>>    create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>>
>>> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
>>> index 5551a21d738..4bea8001ec6 100644
>>> --- a/gcc/config/rs6000/rs6000-c.c
>>> +++ b/gcc/config/rs6000/rs6000-c.c
>>> @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin 
>>> (location_t loc, tree fndecl,
>>>          SET_EXPR_LOCATION (stmt, loc);
>>>          stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
>>>        }
>>> -      stmt = build_array_ref (loc, stmt, arg2);
>>> -      stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
>>> -              convert (TREE_TYPE (stmt), arg0));
>>> -      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>>> +
>>> +      if (TARGET_P8_VECTOR)
>>> +    {
>>> +      stmt = build_array_ref (loc, stmt, arg2);
>>> +      stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
>>> +                  convert (TREE_TYPE (stmt), arg0));
>>> +      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>>> +    }
>>> +      else
>>> +    {
>>> +      tree arg1_inner_type;
>>> +      tree innerptrtype;
>>> +      arg1_inner_type = TREE_TYPE (arg1_type);
>>> +      innerptrtype = build_pointer_type (arg1_inner_type);
>>> +
>>> +      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
>>> +      stmt = convert (innerptrtype, stmt);
>>> +      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
>>> +      stmt = build_indirect_ref (loc, stmt, RO_NULL);
>>> +      stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
>>> +             convert (TREE_TYPE (stmt), arg0));
>>> +      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>>> +    }
>>>          return stmt;
>>>        }
>>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>>> index 96f76c7a74c..33ca839cb28 100644
>>> --- a/gcc/config/rs6000/rs6000.c
>>> +++ b/gcc/config/rs6000/rs6000.c
>>> @@ -6806,10 +6806,10 @@ rs6000_expand_vector_set (rtx target, rtx 
>>> val, rtx elt_rtx)
>>>    }
>>>    /* Insert VAL into IDX of TARGET, VAL size is same of the vector 
>>> element, IDX
>>> -   is variable and also counts by vector element size.  */
>>> +   is variable and also counts by vector element size for p9 and 
>>> above.  */
>>>    void
>>> -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
>>> +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
>>>    {
>>>      machine_mode mode = GET_MODE (target);
>>> @@ -6852,6 +6852,119 @@ rs6000_expand_vector_set_var (rtx target, rtx 
>>> val, rtx idx)
>>>      emit_insn (perml);
>>>    }
>>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector 
>>> element, IDX
>>> +   is variable and also counts by vector element size for p8.  */
>>> +
>>> +void
>>> +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx)
>>> +{
>>> +  machine_mode mode = GET_MODE (target);
>>> +
>>> +  gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
>>> +
>>> +  gcc_assert (GET_MODE (idx) == E_SImode);
>>> +
>>> +  machine_mode inner_mode = GET_MODE (val);
>>> +  HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
>>> +
>>> +  rtx tmp = gen_reg_rtx (GET_MODE (idx));
>>> +  int width = GET_MODE_SIZE (inner_mode);
>>> +
>>> +  gcc_assert (width >= 1 && width <= 4);
>>> +
>>> +  if (!BYTES_BIG_ENDIAN)
>>> +    {
>>> +      /*  idx = idx * width.  */
>>> +      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
>>> +      /*  idx = idx + 8.  */
>>> +      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8)));
>>> +    }
>>> +  else
>>> +    {
>>> +      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
>>> +      emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp));
>>> +    }
>>> +
>>> +  /*  lxv vs33, mask.
>>> +      DImode: 0xffffffffffffffff0000000000000000
>>> +      SImode: 0x00000000ffffffff0000000000000000
>>> +      HImode: 0x000000000000ffff0000000000000000.
>>> +      QImode: 0x00000000000000ff0000000000000000.  */
>>> +  rtx mask = gen_reg_rtx (V16QImode);
>>> +  rtx mask_v2di = gen_reg_rtx (V2DImode);
>>> +  rtvec v = rtvec_alloc (2);
>>> +  if (!BYTES_BIG_ENDIAN)
>>> +    {
>>> +      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
>>> +      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
>>> +    }
>>> +  else
>>> +    {
>>> +      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
>>> +      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
>>> +    }
>>> +  emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL 
>>> (V2DImode, v)));
>>> +  rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, 
>>> V2DImode, 0);
>>> +  emit_insn (gen_rtx_SET (mask, sub_mask));
>>> +
>>> +  /*  mtvsrd[wz] f0,tmp_val.  */
>>> +  rtx tmp_val = gen_reg_rtx (SImode);
>>> +  if (inner_mode == E_SFmode)
>>> +    emit_insn (gen_movsi_from_sf (tmp_val, val));
>>> +  else
>>> +    tmp_val = force_reg (SImode, val);
>>> +
>>> +  rtx val_v16qi = gen_reg_rtx (V16QImode);
>>> +  rtx val_v2di = gen_reg_rtx (V2DImode);
>>> +  rtvec vec_val = rtvec_alloc (2);
>>> +  if (!BYTES_BIG_ENDIAN)
>>> +  {
>>> +    RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
>>> +    RTVEC_ELT (vec_val, 1) = tmp_val;
>>> +  }
>>> +  else
>>> +  {
>>> +    RTVEC_ELT (vec_val, 0) = tmp_val;
>>> +    RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
>>> +  }
>>> +  emit_insn (
>>> +    gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, 
>>> vec_val)));
>>> +  rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
>>> +  emit_insn (gen_rtx_SET (val_v16qi, sub_val));
>>> +
>>> +  /*  lvsl    13,0,idx.  */
>>> +  tmp = convert_modes (DImode, SImode, tmp, 1);
>>> +  rtx pcv = gen_reg_rtx (V16QImode);
>>> +  emit_insn (gen_altivec_lvsl_reg (pcv, tmp));
>>> +
>>> +  /*  vperm 1,1,1,13.  */
>>> +  /*  vperm 0,0,0,13.  */
>>> +  rtx val_perm = gen_reg_rtx (V16QImode);
>>> +  rtx mask_perm = gen_reg_rtx (V16QImode);
>>> +  emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, 
>>> val_v16qi, pcv));
>>> +  emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
>>> +
>>> +  rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
>>> +
>>> +  /*  xxsel 34,34,32,33.  */
>>> +  emit_insn (
>>> +    gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, 
>>> mask_perm));
>>> +}
>>> +
>>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector 
>>> element, IDX
>>> +   is variable and also counts by vector element size.  */
>>> +
>>> +void
>>> +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
>>> +{
>>> +  machine_mode mode = GET_MODE (target);
>>> +  machine_mode inner_mode = GET_MODE_INNER (mode);
>>> +  if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8)
>>> +    rs6000_expand_vector_set_var_p9 (target, val, idx);
>>> +  else
>>> +    rs6000_expand_vector_set_var_p8 (target, val, idx);
>>> +}
>>> +
>>>    /* Extract field ELT from VEC into TARGET.  */
>>>    void
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c 
>>> b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>> new file mode 100644
>>> index 00000000000..06da47b7758
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>> @@ -0,0 +1,17 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target powerpc_p8vector_ok } */
>>> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */
>>> +
>>> +#include <stddef.h>
>>> +#include <altivec.h>
>>> +#include "pr79251.h"
>>> +
>>> +TEST_VEC_INSERT_ALL (test)
>>> +
>>> +/* { dg-final { scan-assembler-not {\mstxw\M} } } */
>>> +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */
>>> +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */
>>> +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */
>>> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */
>>> +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */
>>> +
>>>
>>
>
Xionghu Luo Jan. 15, 2021, 2:48 a.m. UTC | #5
Ping^4, thanks.


On 2020/12/23 10:18, Xionghu Luo via Gcc-patches wrote:
> Ping^3 for stage 3.
> 
> And this followed patch:
> [PATCH 4/4] rs6000: Update testcases' instruction count.
> 
> Thanks:)
> 
> 
> On 2020/12/3 22:16, Xionghu Luo via Gcc-patches wrote:
>> Ping. Thanks.
>>
>>
>> On 2020/11/27 09:04, Xionghu Luo via Gcc-patches wrote:
>>> Hi Segher,
>>> Thanks for the approval of [PATCH 1/4] and [PATCH 2/4], what's your
>>> opinion of this [PATCH 3/4] for P8, please?  xxinsertw only exists since
>>> v3.0, so we had to implement by another way.
>>>
>>>
>>> Xionghu
>>>
>>>
>>> On 2020/10/10 16:08, Xionghu Luo wrote:
>>>> gcc/ChangeLog:
>>>>
>>>> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
>>>>
>>>>     * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
>>>>     Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later
>>>>     platforms.
>>>>     * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update
>>>>     to call different path for P8 and P9.
>>>>     (rs6000_expand_vector_set_var_p9): New function.
>>>>     (rs6000_expand_vector_set_var_p8): New function.
>>>>
>>>> gcc/testsuite/ChangeLog:
>>>>
>>>> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
>>>>
>>>>     * gcc.target/powerpc/pr79251.p8.c: New test.
>>>> ---
>>>>    gcc/config/rs6000/rs6000-c.c                  |  27 +++-
>>>>    gcc/config/rs6000/rs6000.c                    | 117 
>>>> +++++++++++++++++-
>>>>    gcc/testsuite/gcc.target/powerpc/pr79251.p8.c |  17 +++
>>>>    3 files changed, 155 insertions(+), 6 deletions(-)
>>>>    create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>>>
>>>> diff --git a/gcc/config/rs6000/rs6000-c.c 
>>>> b/gcc/config/rs6000/rs6000-c.c
>>>> index 5551a21d738..4bea8001ec6 100644
>>>> --- a/gcc/config/rs6000/rs6000-c.c
>>>> +++ b/gcc/config/rs6000/rs6000-c.c
>>>> @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin 
>>>> (location_t loc, tree fndecl,
>>>>          SET_EXPR_LOCATION (stmt, loc);
>>>>          stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
>>>>        }
>>>> -      stmt = build_array_ref (loc, stmt, arg2);
>>>> -      stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
>>>> -              convert (TREE_TYPE (stmt), arg0));
>>>> -      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>>>> +
>>>> +      if (TARGET_P8_VECTOR)
>>>> +    {
>>>> +      stmt = build_array_ref (loc, stmt, arg2);
>>>> +      stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
>>>> +                  convert (TREE_TYPE (stmt), arg0));
>>>> +      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>>>> +    }
>>>> +      else
>>>> +    {
>>>> +      tree arg1_inner_type;
>>>> +      tree innerptrtype;
>>>> +      arg1_inner_type = TREE_TYPE (arg1_type);
>>>> +      innerptrtype = build_pointer_type (arg1_inner_type);
>>>> +
>>>> +      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
>>>> +      stmt = convert (innerptrtype, stmt);
>>>> +      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
>>>> +      stmt = build_indirect_ref (loc, stmt, RO_NULL);
>>>> +      stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
>>>> +             convert (TREE_TYPE (stmt), arg0));
>>>> +      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>>>> +    }
>>>>          return stmt;
>>>>        }
>>>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>>>> index 96f76c7a74c..33ca839cb28 100644
>>>> --- a/gcc/config/rs6000/rs6000.c
>>>> +++ b/gcc/config/rs6000/rs6000.c
>>>> @@ -6806,10 +6806,10 @@ rs6000_expand_vector_set (rtx target, rtx 
>>>> val, rtx elt_rtx)
>>>>    }
>>>>    /* Insert VAL into IDX of TARGET, VAL size is same of the vector 
>>>> element, IDX
>>>> -   is variable and also counts by vector element size.  */
>>>> +   is variable and also counts by vector element size for p9 and 
>>>> above.  */
>>>>    void
>>>> -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
>>>> +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
>>>>    {
>>>>      machine_mode mode = GET_MODE (target);
>>>> @@ -6852,6 +6852,119 @@ rs6000_expand_vector_set_var (rtx target, 
>>>> rtx val, rtx idx)
>>>>      emit_insn (perml);
>>>>    }
>>>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector 
>>>> element, IDX
>>>> +   is variable and also counts by vector element size for p8.  */
>>>> +
>>>> +void
>>>> +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx)
>>>> +{
>>>> +  machine_mode mode = GET_MODE (target);
>>>> +
>>>> +  gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
>>>> +
>>>> +  gcc_assert (GET_MODE (idx) == E_SImode);
>>>> +
>>>> +  machine_mode inner_mode = GET_MODE (val);
>>>> +  HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
>>>> +
>>>> +  rtx tmp = gen_reg_rtx (GET_MODE (idx));
>>>> +  int width = GET_MODE_SIZE (inner_mode);
>>>> +
>>>> +  gcc_assert (width >= 1 && width <= 4);
>>>> +
>>>> +  if (!BYTES_BIG_ENDIAN)
>>>> +    {
>>>> +      /*  idx = idx * width.  */
>>>> +      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
>>>> +      /*  idx = idx + 8.  */
>>>> +      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8)));
>>>> +    }
>>>> +  else
>>>> +    {
>>>> +      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
>>>> +      emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp));
>>>> +    }
>>>> +
>>>> +  /*  lxv vs33, mask.
>>>> +      DImode: 0xffffffffffffffff0000000000000000
>>>> +      SImode: 0x00000000ffffffff0000000000000000
>>>> +      HImode: 0x000000000000ffff0000000000000000.
>>>> +      QImode: 0x00000000000000ff0000000000000000.  */
>>>> +  rtx mask = gen_reg_rtx (V16QImode);
>>>> +  rtx mask_v2di = gen_reg_rtx (V2DImode);
>>>> +  rtvec v = rtvec_alloc (2);
>>>> +  if (!BYTES_BIG_ENDIAN)
>>>> +    {
>>>> +      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
>>>> +      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
>>>> +    }
>>>> +  else
>>>> +    {
>>>> +      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
>>>> +      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
>>>> +    }
>>>> +  emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL 
>>>> (V2DImode, v)));
>>>> +  rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, 
>>>> V2DImode, 0);
>>>> +  emit_insn (gen_rtx_SET (mask, sub_mask));
>>>> +
>>>> +  /*  mtvsrd[wz] f0,tmp_val.  */
>>>> +  rtx tmp_val = gen_reg_rtx (SImode);
>>>> +  if (inner_mode == E_SFmode)
>>>> +    emit_insn (gen_movsi_from_sf (tmp_val, val));
>>>> +  else
>>>> +    tmp_val = force_reg (SImode, val);
>>>> +
>>>> +  rtx val_v16qi = gen_reg_rtx (V16QImode);
>>>> +  rtx val_v2di = gen_reg_rtx (V2DImode);
>>>> +  rtvec vec_val = rtvec_alloc (2);
>>>> +  if (!BYTES_BIG_ENDIAN)
>>>> +  {
>>>> +    RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
>>>> +    RTVEC_ELT (vec_val, 1) = tmp_val;
>>>> +  }
>>>> +  else
>>>> +  {
>>>> +    RTVEC_ELT (vec_val, 0) = tmp_val;
>>>> +    RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
>>>> +  }
>>>> +  emit_insn (
>>>> +    gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, 
>>>> vec_val)));
>>>> +  rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 
>>>> 0);
>>>> +  emit_insn (gen_rtx_SET (val_v16qi, sub_val));
>>>> +
>>>> +  /*  lvsl    13,0,idx.  */
>>>> +  tmp = convert_modes (DImode, SImode, tmp, 1);
>>>> +  rtx pcv = gen_reg_rtx (V16QImode);
>>>> +  emit_insn (gen_altivec_lvsl_reg (pcv, tmp));
>>>> +
>>>> +  /*  vperm 1,1,1,13.  */
>>>> +  /*  vperm 0,0,0,13.  */
>>>> +  rtx val_perm = gen_reg_rtx (V16QImode);
>>>> +  rtx mask_perm = gen_reg_rtx (V16QImode);
>>>> +  emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, 
>>>> val_v16qi, pcv));
>>>> +  emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, 
>>>> pcv));
>>>> +
>>>> +  rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
>>>> +
>>>> +  /*  xxsel 34,34,32,33.  */
>>>> +  emit_insn (
>>>> +    gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, 
>>>> mask_perm));
>>>> +}
>>>> +
>>>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector 
>>>> element, IDX
>>>> +   is variable and also counts by vector element size.  */
>>>> +
>>>> +void
>>>> +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
>>>> +{
>>>> +  machine_mode mode = GET_MODE (target);
>>>> +  machine_mode inner_mode = GET_MODE_INNER (mode);
>>>> +  if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8)
>>>> +    rs6000_expand_vector_set_var_p9 (target, val, idx);
>>>> +  else
>>>> +    rs6000_expand_vector_set_var_p8 (target, val, idx);
>>>> +}
>>>> +
>>>>    /* Extract field ELT from VEC into TARGET.  */
>>>>    void
>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c 
>>>> b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>>> new file mode 100644
>>>> index 00000000000..06da47b7758
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>>> @@ -0,0 +1,17 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-require-effective-target powerpc_p8vector_ok } */
>>>> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */
>>>> +
>>>> +#include <stddef.h>
>>>> +#include <altivec.h>
>>>> +#include "pr79251.h"
>>>> +
>>>> +TEST_VEC_INSERT_ALL (test)
>>>> +
>>>> +/* { dg-final { scan-assembler-not {\mstxw\M} } } */
>>>> +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */
>>>> +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */
>>>> +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */
>>>> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */
>>>> +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */
>>>> +
>>>>
>>>
>>
>
Segher Boessenkool Jan. 21, 2021, 11:48 p.m. UTC | #6
Hi!

You never committed 2/4?  That makes it harder to review this one :-)

On Sat, Oct 10, 2020 at 03:08:24AM -0500, Xionghu Luo wrote:
> gcc/ChangeLog:
> 
> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
> 
> 	* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
> 	Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later
> 	platforms.
> 	* config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update
> 	to call different path for P8 and P9.
> 	(rs6000_expand_vector_set_var_p9): New function.
> 	(rs6000_expand_vector_set_var_p8): New function.
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
> 
> 	* gcc.target/powerpc/pr79251.p8.c: New test.

If testing on P9 LE and P7 BE (32-bit and 64-bit) worked, this is okay
for trunk.  Thanks!

(Let me know if you need help testing.)


Segher
David Edelsohn Jan. 22, 2021, 8:08 p.m. UTC | #7
On Thu, Jan 21, 2021 at 6:51 PM Segher Boessenkool
<segher@kernel.crashing.org> wrote:
>
> Hi!
>
> You never committed 2/4?  That makes it harder to review this one :-)
>
> On Sat, Oct 10, 2020 at 03:08:24AM -0500, Xionghu Luo wrote:
> > gcc/ChangeLog:
> >
> > 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
> >
> >       * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
> >       Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later
> >       platforms.
> >       * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update
> >       to call different path for P8 and P9.
> >       (rs6000_expand_vector_set_var_p9): New function.
> >       (rs6000_expand_vector_set_var_p8): New function.
> >
> > gcc/testsuite/ChangeLog:
> >
> > 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
> >
> >       * gcc.target/powerpc/pr79251.p8.c: New test.
>
> If testing on P9 LE and P7 BE (32-bit and 64-bit) worked, this is okay
> for trunk.  Thanks!

This testcase ICEs on AIX.  Please fix.  This was not tested properly.

The new pattern does not have matching target conditions for patterns
on which it relies.

Thanks, David
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 5551a21d738..4bea8001ec6 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -1599,10 +1599,29 @@  altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	  SET_EXPR_LOCATION (stmt, loc);
 	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
 	}
-      stmt = build_array_ref (loc, stmt, arg2);
-      stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
-			  convert (TREE_TYPE (stmt), arg0));
-      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
+
+      if (TARGET_P8_VECTOR)
+	{
+	  stmt = build_array_ref (loc, stmt, arg2);
+	  stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
+			      convert (TREE_TYPE (stmt), arg0));
+	  stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
+	}
+      else
+	{
+	  tree arg1_inner_type;
+	  tree innerptrtype;
+	  arg1_inner_type = TREE_TYPE (arg1_type);
+	  innerptrtype = build_pointer_type (arg1_inner_type);
+
+	  stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+	  stmt = convert (innerptrtype, stmt);
+	  stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+	  stmt = build_indirect_ref (loc, stmt, RO_NULL);
+	  stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
+			 convert (TREE_TYPE (stmt), arg0));
+	  stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
+	}
       return stmt;
     }
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 96f76c7a74c..33ca839cb28 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6806,10 +6806,10 @@  rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
 }
 
 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
-   is variable and also counts by vector element size.  */
+   is variable and also counts by vector element size for p9 and above.  */
 
 void
-rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
+rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
 {
   machine_mode mode = GET_MODE (target);
 
@@ -6852,6 +6852,119 @@  rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
   emit_insn (perml);
 }
 
+/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
+   is variable and also counts by vector element size for p8.  */
+
+void
+rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx)
+{
+  machine_mode mode = GET_MODE (target);
+
+  gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
+
+  gcc_assert (GET_MODE (idx) == E_SImode);
+
+  machine_mode inner_mode = GET_MODE (val);
+  HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
+
+  rtx tmp = gen_reg_rtx (GET_MODE (idx));
+  int width = GET_MODE_SIZE (inner_mode);
+
+  gcc_assert (width >= 1 && width <= 4);
+
+  if (!BYTES_BIG_ENDIAN)
+    {
+      /*  idx = idx * width.  */
+      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
+      /*  idx = idx + 8.  */
+      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8)));
+    }
+  else
+    {
+      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
+      emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp));
+    }
+
+  /*  lxv vs33, mask.
+      DImode: 0xffffffffffffffff0000000000000000
+      SImode: 0x00000000ffffffff0000000000000000
+      HImode: 0x000000000000ffff0000000000000000.
+      QImode: 0x00000000000000ff0000000000000000.  */
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtx mask_v2di = gen_reg_rtx (V2DImode);
+  rtvec v = rtvec_alloc (2);
+  if (!BYTES_BIG_ENDIAN)
+    {
+      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
+      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
+    }
+  else
+    {
+      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
+      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
+    }
+  emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
+  rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
+  emit_insn (gen_rtx_SET (mask, sub_mask));
+
+  /*  mtvsrd[wz] f0,tmp_val.  */
+  rtx tmp_val = gen_reg_rtx (SImode);
+  if (inner_mode == E_SFmode)
+    emit_insn (gen_movsi_from_sf (tmp_val, val));
+  else
+    tmp_val = force_reg (SImode, val);
+
+  rtx val_v16qi = gen_reg_rtx (V16QImode);
+  rtx val_v2di = gen_reg_rtx (V2DImode);
+  rtvec vec_val = rtvec_alloc (2);
+  if (!BYTES_BIG_ENDIAN)
+  {
+    RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
+    RTVEC_ELT (vec_val, 1) = tmp_val;
+  }
+  else
+  {
+    RTVEC_ELT (vec_val, 0) = tmp_val;
+    RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
+  }
+  emit_insn (
+    gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
+  rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
+  emit_insn (gen_rtx_SET (val_v16qi, sub_val));
+
+  /*  lvsl    13,0,idx.  */
+  tmp = convert_modes (DImode, SImode, tmp, 1);
+  rtx pcv = gen_reg_rtx (V16QImode);
+  emit_insn (gen_altivec_lvsl_reg (pcv, tmp));
+
+  /*  vperm 1,1,1,13.  */
+  /*  vperm 0,0,0,13.  */
+  rtx val_perm = gen_reg_rtx (V16QImode);
+  rtx mask_perm = gen_reg_rtx (V16QImode);
+  emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
+  emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
+
+  rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
+
+  /*  xxsel 34,34,32,33.  */
+  emit_insn (
+    gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
+}
+
+/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
+   is variable and also counts by vector element size.  */
+
+void
+rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
+{
+  machine_mode mode = GET_MODE (target);
+  machine_mode inner_mode = GET_MODE_INNER (mode);
+  if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8)
+    rs6000_expand_vector_set_var_p9 (target, val, idx);
+  else
+    rs6000_expand_vector_set_var_p8 (target, val, idx);
+}
+
 /* Extract field ELT from VEC into TARGET.  */
 
 void
diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
new file mode 100644
index 00000000000..06da47b7758
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */
+
+#include <stddef.h>
+#include <altivec.h>
+#include "pr79251.h"
+
+TEST_VEC_INSERT_ALL (test)
+
+/* { dg-final { scan-assembler-not {\mstxw\M} } } */
+/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */
+/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */
+/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */
+