[rs6000,v3] enable gimple folding for vec_xl, vec_xst

Message ID 1531163298.17604.38.camel@brimstone.rchland.ibm.com
State New
Headers show
Series
  • [rs6000,v3] enable gimple folding for vec_xl, vec_xst
Related show

Commit Message

Will Schmidt July 9, 2018, 7:08 p.m.
Hi,
  Re-posting.  Richard provided feedback on a previous version of this
patch, I wanted to make sure he was/is OK with the latest. :-) 

Add support for Gimple folding for unaligned vector loads and stores.
    
Regtest completed across variety of systems, P6,P7,P8,P9.
    
[v2] Added the type for the MEM_REF, per feedback.
Testcases for gimple-folding of the same are currently in-tree
as powerpc/fold-vec-load-*.c and powerpc/fold-vec-store-*.c.
Re-tested, still looks good. :-)
    
[v3] Updated the alignment for the MEM_REF to be 4bytes.
Updated/added/removed comments in the code for clarity.
    
OK for trunk?
    
Thanks
-Will
    
[gcc]
    
2018-07-09 Will Schmidt <will_schmidt@vnet.ibm.com>
    
	* config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add
	vec_xst variants to the list.
	(rs6000_gimple_fold_builtin): Add support for folding unaligned
	vector loads and stores.

Comments

Richard Biener July 10, 2018, 7:10 a.m. | #1
On Mon, Jul 9, 2018 at 9:08 PM Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
>
> Hi,
>   Re-posting.  Richard provided feedback on a previous version of this
> patch, I wanted to make sure he was/is OK with the latest. :-)
>
> Add support for Gimple folding for unaligned vector loads and stores.
>
> Regtest completed across variety of systems, P6,P7,P8,P9.
>
> [v2] Added the type for the MEM_REF, per feedback.
> Testcases for gimple-folding of the same are currently in-tree
> as powerpc/fold-vec-load-*.c and powerpc/fold-vec-store-*.c.
> Re-tested, still looks good. :-)
>
> [v3] Updated the alignment for the MEM_REF to be 4bytes.
> Updated/added/removed comments in the code for clarity.
>
> OK for trunk?
>
> Thanks
> -Will
>
> [gcc]
>
> 2018-07-09 Will Schmidt <will_schmidt@vnet.ibm.com>
>
>         * config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add
>         vec_xst variants to the list.
>         (rs6000_gimple_fold_builtin): Add support for folding unaligned
>         vector loads and stores.
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 8bc4109..774c60a 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -15401,10 +15401,16 @@ rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
>      case ALTIVEC_BUILTIN_STVX_V8HI:
>      case ALTIVEC_BUILTIN_STVX_V4SI:
>      case ALTIVEC_BUILTIN_STVX_V4SF:
>      case ALTIVEC_BUILTIN_STVX_V2DI:
>      case ALTIVEC_BUILTIN_STVX_V2DF:
> +    case VSX_BUILTIN_STXVW4X_V16QI:
> +    case VSX_BUILTIN_STXVW4X_V8HI:
> +    case VSX_BUILTIN_STXVW4X_V4SF:
> +    case VSX_BUILTIN_STXVW4X_V4SI:
> +    case VSX_BUILTIN_STXVD2X_V2DF:
> +    case VSX_BUILTIN_STXVD2X_V2DI:
>        return true;
>      default:
>        return false;
>      }
>  }
> @@ -15910,10 +15916,79 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>         gimple_set_location (g, loc);
>         gsi_replace (gsi, g, true);
>         return true;
>        }
>
> +    /* unaligned Vector loads.  */
> +    case VSX_BUILTIN_LXVW4X_V16QI:
> +    case VSX_BUILTIN_LXVW4X_V8HI:
> +    case VSX_BUILTIN_LXVW4X_V4SF:
> +    case VSX_BUILTIN_LXVW4X_V4SI:
> +    case VSX_BUILTIN_LXVD2X_V2DF:
> +    case VSX_BUILTIN_LXVD2X_V2DI:
> +      {
> +        arg0 = gimple_call_arg (stmt, 0);  // offset
> +        arg1 = gimple_call_arg (stmt, 1);  // address
> +        lhs = gimple_call_lhs (stmt);
> +        location_t loc = gimple_location (stmt);
> +        /* Since arg1 may be cast to a different type, just use ptr_type_node
> +           here instead of trying to enforce TBAA on pointer types.  */
> +        tree arg1_type = ptr_type_node;
> +        tree lhs_type = TREE_TYPE (lhs);
> +        /* in GIMPLE the type of the MEM_REF specifies the alignment.  The
> +          required alignment (power) is 4 bytes regardless of data type.  */
> +        tree align_ltype = build_aligned_type (lhs_type, 4);
> +        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
> +           the tree using the value from arg0.  The resulting type will match
> +           the type of arg1.  */
> +        gimple_seq stmts = NULL;
> +        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
> +        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
> +                                      arg1_type, arg1, temp_offset);
> +        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> +        /* Use the build2 helper to set up the mem_ref.  The MEM_REF could also
> +           take an offset, but since we've already incorporated the offset
> +           above, here we just pass in a zero.  */
> +        gimple *g;
> +        g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
> +                                               build_int_cst (arg1_type, 0)));
> +        gimple_set_location (g, loc);
> +        gsi_replace (gsi, g, true);
> +        return true;
> +      }
> +
> +    /* unaligned Vector stores.  */
> +    case VSX_BUILTIN_STXVW4X_V16QI:
> +    case VSX_BUILTIN_STXVW4X_V8HI:
> +    case VSX_BUILTIN_STXVW4X_V4SF:
> +    case VSX_BUILTIN_STXVW4X_V4SI:
> +    case VSX_BUILTIN_STXVD2X_V2DF:
> +    case VSX_BUILTIN_STXVD2X_V2DI:
> +      {
> +        arg0 = gimple_call_arg (stmt, 0); /* Value to be stored.  */
> +        arg1 = gimple_call_arg (stmt, 1); /* Offset.  */
> +        tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address.  */
> +        location_t loc = gimple_location (stmt);
> +        tree arg0_type = TREE_TYPE (arg0);
> +        /* Use ptr_type_node (no TBAA) for the arg2_type.  */
> +        tree arg2_type = ptr_type_node;
> +        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
> +           the tree using the value from arg0.  The resulting type will match
> +           the type of arg2.  */
> +        gimple_seq stmts = NULL;
> +        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
> +        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
> +                                      arg2_type, arg2, temp_offset);
> +        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> +        gimple *g;
> +        g = gimple_build_assign (build2 (MEM_REF, arg0_type, temp_addr,
> +                                          build_int_cst (arg2_type, 0)), arg0);

Here you are using the alignment of a "value", that is, you originally
have sth like

__builtin_store (val_1, offset, ptr);

where val_1 is an SSA name of say, 'vector<int>' type (or vector<int>
__attribute__((aligned(1))) type!).

I wonder if, given you don't play similar games as with the load the
HW requirement
is sth like vector element alignment?

GIMPLE generally doesn't care about the alignment of registers (why
should it) but you
can't be sure they do not "leak" there.

Richard.

> +        gimple_set_location (g, loc);
> +        gsi_replace (gsi, g, true);
> +        return true;
> +      }
> +
>      /* Vector Fused multiply-add (fma).  */
>      case ALTIVEC_BUILTIN_VMADDFP:
>      case VSX_BUILTIN_XVMADDDP:
>      case ALTIVEC_BUILTIN_VMLADDUHM:
>        {
>
>
Bill Schmidt July 10, 2018, 1:33 p.m. | #2
> On Jul 10, 2018, at 2:10 AM, Richard Biener <richard.guenther@gmail.com> wrote:
> 
> On Mon, Jul 9, 2018 at 9:08 PM Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
>> 
>> Hi,
>>  Re-posting.  Richard provided feedback on a previous version of this
>> patch, I wanted to make sure he was/is OK with the latest. :-)
>> 
>> Add support for Gimple folding for unaligned vector loads and stores.
>> 
>> Regtest completed across variety of systems, P6,P7,P8,P9.
>> 
>> [v2] Added the type for the MEM_REF, per feedback.
>> Testcases for gimple-folding of the same are currently in-tree
>> as powerpc/fold-vec-load-*.c and powerpc/fold-vec-store-*.c.
>> Re-tested, still looks good. :-)
>> 
>> [v3] Updated the alignment for the MEM_REF to be 4bytes.
>> Updated/added/removed comments in the code for clarity.
>> 
>> OK for trunk?
>> 
>> Thanks
>> -Will
>> 
>> [gcc]
>> 
>> 2018-07-09 Will Schmidt <will_schmidt@vnet.ibm.com>
>> 
>>        * config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add
>>        vec_xst variants to the list.
>>        (rs6000_gimple_fold_builtin): Add support for folding unaligned
>>        vector loads and stores.
>> 
>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>> index 8bc4109..774c60a 100644
>> --- a/gcc/config/rs6000/rs6000.c
>> +++ b/gcc/config/rs6000/rs6000.c
>> @@ -15401,10 +15401,16 @@ rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
>>     case ALTIVEC_BUILTIN_STVX_V8HI:
>>     case ALTIVEC_BUILTIN_STVX_V4SI:
>>     case ALTIVEC_BUILTIN_STVX_V4SF:
>>     case ALTIVEC_BUILTIN_STVX_V2DI:
>>     case ALTIVEC_BUILTIN_STVX_V2DF:
>> +    case VSX_BUILTIN_STXVW4X_V16QI:
>> +    case VSX_BUILTIN_STXVW4X_V8HI:
>> +    case VSX_BUILTIN_STXVW4X_V4SF:
>> +    case VSX_BUILTIN_STXVW4X_V4SI:
>> +    case VSX_BUILTIN_STXVD2X_V2DF:
>> +    case VSX_BUILTIN_STXVD2X_V2DI:
>>       return true;
>>     default:
>>       return false;
>>     }
>> }
>> @@ -15910,10 +15916,79 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>>        gimple_set_location (g, loc);
>>        gsi_replace (gsi, g, true);
>>        return true;
>>       }
>> 
>> +    /* unaligned Vector loads.  */
>> +    case VSX_BUILTIN_LXVW4X_V16QI:
>> +    case VSX_BUILTIN_LXVW4X_V8HI:
>> +    case VSX_BUILTIN_LXVW4X_V4SF:
>> +    case VSX_BUILTIN_LXVW4X_V4SI:
>> +    case VSX_BUILTIN_LXVD2X_V2DF:
>> +    case VSX_BUILTIN_LXVD2X_V2DI:
>> +      {
>> +        arg0 = gimple_call_arg (stmt, 0);  // offset
>> +        arg1 = gimple_call_arg (stmt, 1);  // address
>> +        lhs = gimple_call_lhs (stmt);
>> +        location_t loc = gimple_location (stmt);
>> +        /* Since arg1 may be cast to a different type, just use ptr_type_node
>> +           here instead of trying to enforce TBAA on pointer types.  */
>> +        tree arg1_type = ptr_type_node;
>> +        tree lhs_type = TREE_TYPE (lhs);
>> +        /* in GIMPLE the type of the MEM_REF specifies the alignment.  The
>> +          required alignment (power) is 4 bytes regardless of data type.  */
>> +        tree align_ltype = build_aligned_type (lhs_type, 4);
>> +        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
>> +           the tree using the value from arg0.  The resulting type will match
>> +           the type of arg1.  */
>> +        gimple_seq stmts = NULL;
>> +        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
>> +        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
>> +                                      arg1_type, arg1, temp_offset);
>> +        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>> +        /* Use the build2 helper to set up the mem_ref.  The MEM_REF could also
>> +           take an offset, but since we've already incorporated the offset
>> +           above, here we just pass in a zero.  */
>> +        gimple *g;
>> +        g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
>> +                                               build_int_cst (arg1_type, 0)));
>> +        gimple_set_location (g, loc);
>> +        gsi_replace (gsi, g, true);
>> +        return true;
>> +      }
>> +
>> +    /* unaligned Vector stores.  */
>> +    case VSX_BUILTIN_STXVW4X_V16QI:
>> +    case VSX_BUILTIN_STXVW4X_V8HI:
>> +    case VSX_BUILTIN_STXVW4X_V4SF:
>> +    case VSX_BUILTIN_STXVW4X_V4SI:
>> +    case VSX_BUILTIN_STXVD2X_V2DF:
>> +    case VSX_BUILTIN_STXVD2X_V2DI:
>> +      {
>> +        arg0 = gimple_call_arg (stmt, 0); /* Value to be stored.  */
>> +        arg1 = gimple_call_arg (stmt, 1); /* Offset.  */
>> +        tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address.  */
>> +        location_t loc = gimple_location (stmt);
>> +        tree arg0_type = TREE_TYPE (arg0);
>> +        /* Use ptr_type_node (no TBAA) for the arg2_type.  */
>> +        tree arg2_type = ptr_type_node;
>> +        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
>> +           the tree using the value from arg0.  The resulting type will match
>> +           the type of arg2.  */
>> +        gimple_seq stmts = NULL;
>> +        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
>> +        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
>> +                                      arg2_type, arg2, temp_offset);
>> +        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>> +        gimple *g;
>> +        g = gimple_build_assign (build2 (MEM_REF, arg0_type, temp_addr,
>> +                                          build_int_cst (arg2_type, 0)), arg0);
> 
> Here you are using the alignment of a "value", that is, you originally
> have sth like
> 
> __builtin_store (val_1, offset, ptr);
> 
> where val_1 is an SSA name of say, 'vector<int>' type (or vector<int>
> __attribute__((aligned(1))) type!).
> 
> I wonder if, given you don't play similar games as with the load the
> HW requirement
> is sth like vector element alignment?

Right, same alignment rules for stores as for loads.  Minimum alignment
is word alignment for all data types.

Bill
> 
> GIMPLE generally doesn't care about the alignment of registers (why
> should it) but you
> can't be sure they do not "leak" there.
> 
> Richard.
> 
>> +        gimple_set_location (g, loc);
>> +        gsi_replace (gsi, g, true);
>> +        return true;
>> +      }
>> +
>>     /* Vector Fused multiply-add (fma).  */
>>     case ALTIVEC_BUILTIN_VMADDFP:
>>     case VSX_BUILTIN_XVMADDDP:
>>     case ALTIVEC_BUILTIN_VMLADDUHM:
>>       {
Richard Biener July 10, 2018, 1:48 p.m. | #3
On Tue, Jul 10, 2018 at 3:33 PM Bill Schmidt <wschmidt@linux.ibm.com> wrote:
>
>
> > On Jul 10, 2018, at 2:10 AM, Richard Biener <richard.guenther@gmail.com> wrote:
> >
> > On Mon, Jul 9, 2018 at 9:08 PM Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
> >>
> >> Hi,
> >>  Re-posting.  Richard provided feedback on a previous version of this
> >> patch, I wanted to make sure he was/is OK with the latest. :-)
> >>
> >> Add support for Gimple folding for unaligned vector loads and stores.
> >>
> >> Regtest completed across variety of systems, P6,P7,P8,P9.
> >>
> >> [v2] Added the type for the MEM_REF, per feedback.
> >> Testcases for gimple-folding of the same are currently in-tree
> >> as powerpc/fold-vec-load-*.c and powerpc/fold-vec-store-*.c.
> >> Re-tested, still looks good. :-)
> >>
> >> [v3] Updated the alignment for the MEM_REF to be 4bytes.
> >> Updated/added/removed comments in the code for clarity.
> >>
> >> OK for trunk?
> >>
> >> Thanks
> >> -Will
> >>
> >> [gcc]
> >>
> >> 2018-07-09 Will Schmidt <will_schmidt@vnet.ibm.com>
> >>
> >>        * config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add
> >>        vec_xst variants to the list.
> >>        (rs6000_gimple_fold_builtin): Add support for folding unaligned
> >>        vector loads and stores.
> >>
> >> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> >> index 8bc4109..774c60a 100644
> >> --- a/gcc/config/rs6000/rs6000.c
> >> +++ b/gcc/config/rs6000/rs6000.c
> >> @@ -15401,10 +15401,16 @@ rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
> >>     case ALTIVEC_BUILTIN_STVX_V8HI:
> >>     case ALTIVEC_BUILTIN_STVX_V4SI:
> >>     case ALTIVEC_BUILTIN_STVX_V4SF:
> >>     case ALTIVEC_BUILTIN_STVX_V2DI:
> >>     case ALTIVEC_BUILTIN_STVX_V2DF:
> >> +    case VSX_BUILTIN_STXVW4X_V16QI:
> >> +    case VSX_BUILTIN_STXVW4X_V8HI:
> >> +    case VSX_BUILTIN_STXVW4X_V4SF:
> >> +    case VSX_BUILTIN_STXVW4X_V4SI:
> >> +    case VSX_BUILTIN_STXVD2X_V2DF:
> >> +    case VSX_BUILTIN_STXVD2X_V2DI:
> >>       return true;
> >>     default:
> >>       return false;
> >>     }
> >> }
> >> @@ -15910,10 +15916,79 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >>        gimple_set_location (g, loc);
> >>        gsi_replace (gsi, g, true);
> >>        return true;
> >>       }
> >>
> >> +    /* unaligned Vector loads.  */
> >> +    case VSX_BUILTIN_LXVW4X_V16QI:
> >> +    case VSX_BUILTIN_LXVW4X_V8HI:
> >> +    case VSX_BUILTIN_LXVW4X_V4SF:
> >> +    case VSX_BUILTIN_LXVW4X_V4SI:
> >> +    case VSX_BUILTIN_LXVD2X_V2DF:
> >> +    case VSX_BUILTIN_LXVD2X_V2DI:
> >> +      {
> >> +        arg0 = gimple_call_arg (stmt, 0);  // offset
> >> +        arg1 = gimple_call_arg (stmt, 1);  // address
> >> +        lhs = gimple_call_lhs (stmt);
> >> +        location_t loc = gimple_location (stmt);
> >> +        /* Since arg1 may be cast to a different type, just use ptr_type_node
> >> +           here instead of trying to enforce TBAA on pointer types.  */
> >> +        tree arg1_type = ptr_type_node;
> >> +        tree lhs_type = TREE_TYPE (lhs);
> >> +        /* in GIMPLE the type of the MEM_REF specifies the alignment.  The
> >> +          required alignment (power) is 4 bytes regardless of data type.  */
> >> +        tree align_ltype = build_aligned_type (lhs_type, 4);
> >> +        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
> >> +           the tree using the value from arg0.  The resulting type will match
> >> +           the type of arg1.  */
> >> +        gimple_seq stmts = NULL;
> >> +        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
> >> +        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
> >> +                                      arg1_type, arg1, temp_offset);
> >> +        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >> +        /* Use the build2 helper to set up the mem_ref.  The MEM_REF could also
> >> +           take an offset, but since we've already incorporated the offset
> >> +           above, here we just pass in a zero.  */
> >> +        gimple *g;
> >> +        g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
> >> +                                               build_int_cst (arg1_type, 0)));
> >> +        gimple_set_location (g, loc);
> >> +        gsi_replace (gsi, g, true);
> >> +        return true;
> >> +      }
> >> +
> >> +    /* unaligned Vector stores.  */
> >> +    case VSX_BUILTIN_STXVW4X_V16QI:
> >> +    case VSX_BUILTIN_STXVW4X_V8HI:
> >> +    case VSX_BUILTIN_STXVW4X_V4SF:
> >> +    case VSX_BUILTIN_STXVW4X_V4SI:
> >> +    case VSX_BUILTIN_STXVD2X_V2DF:
> >> +    case VSX_BUILTIN_STXVD2X_V2DI:
> >> +      {
> >> +        arg0 = gimple_call_arg (stmt, 0); /* Value to be stored.  */
> >> +        arg1 = gimple_call_arg (stmt, 1); /* Offset.  */
> >> +        tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address.  */
> >> +        location_t loc = gimple_location (stmt);
> >> +        tree arg0_type = TREE_TYPE (arg0);
> >> +        /* Use ptr_type_node (no TBAA) for the arg2_type.  */
> >> +        tree arg2_type = ptr_type_node;
> >> +        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
> >> +           the tree using the value from arg0.  The resulting type will match
> >> +           the type of arg2.  */
> >> +        gimple_seq stmts = NULL;
> >> +        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
> >> +        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
> >> +                                      arg2_type, arg2, temp_offset);
> >> +        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >> +        gimple *g;
> >> +        g = gimple_build_assign (build2 (MEM_REF, arg0_type, temp_addr,
> >> +                                          build_int_cst (arg2_type, 0)), arg0);
> >
> > Here you are using the alignment of a "value", that is, you originally
> > have sth like
> >
> > __builtin_store (val_1, offset, ptr);
> >
> > where val_1 is an SSA name of say, 'vector<int>' type (or vector<int>
> > __attribute__((aligned(1))) type!).
> >
> > I wonder if, given you don't play similar games as with the load the
> > HW requirement
> > is sth like vector element alignment?
>
> Right, same alignment rules for stores as for loads.  Minimum alignment
> is word alignment for all data types.

So you need to fix it similarly.

Richard.

> Bill
> >
> > GIMPLE generally doesn't care about the alignment of registers (why
> > should it) but you
> > can't be sure they do not "leak" there.
> >
> > Richard.
> >
> >> +        gimple_set_location (g, loc);
> >> +        gsi_replace (gsi, g, true);
> >> +        return true;
> >> +      }
> >> +
> >>     /* Vector Fused multiply-add (fma).  */
> >>     case ALTIVEC_BUILTIN_VMADDFP:
> >>     case VSX_BUILTIN_XVMADDDP:
> >>     case ALTIVEC_BUILTIN_VMLADDUHM:
> >>       {
>
Bill Schmidt July 10, 2018, 1:51 p.m. | #4
> On Jul 10, 2018, at 8:48 AM, Richard Biener <richard.guenther@gmail.com> wrote:
> 
> On Tue, Jul 10, 2018 at 3:33 PM Bill Schmidt <wschmidt@linux.ibm.com> wrote:
>> 
>> 
>>> On Jul 10, 2018, at 2:10 AM, Richard Biener <richard.guenther@gmail.com> wrote:
>>> 
>>> On Mon, Jul 9, 2018 at 9:08 PM Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
>>>> 
>>>> Hi,
>>>> Re-posting.  Richard provided feedback on a previous version of this
>>>> patch, I wanted to make sure he was/is OK with the latest. :-)
>>>> 
>>>> Add support for Gimple folding for unaligned vector loads and stores.
>>>> 
>>>> Regtest completed across variety of systems, P6,P7,P8,P9.
>>>> 
>>>> [v2] Added the type for the MEM_REF, per feedback.
>>>> Testcases for gimple-folding of the same are currently in-tree
>>>> as powerpc/fold-vec-load-*.c and powerpc/fold-vec-store-*.c.
>>>> Re-tested, still looks good. :-)
>>>> 
>>>> [v3] Updated the alignment for the MEM_REF to be 4bytes.
>>>> Updated/added/removed comments in the code for clarity.
>>>> 
>>>> OK for trunk?
>>>> 
>>>> Thanks
>>>> -Will
>>>> 
>>>> [gcc]
>>>> 
>>>> 2018-07-09 Will Schmidt <will_schmidt@vnet.ibm.com>
>>>> 
>>>>       * config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add
>>>>       vec_xst variants to the list.
>>>>       (rs6000_gimple_fold_builtin): Add support for folding unaligned
>>>>       vector loads and stores.
>>>> 
>>>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>>>> index 8bc4109..774c60a 100644
>>>> --- a/gcc/config/rs6000/rs6000.c
>>>> +++ b/gcc/config/rs6000/rs6000.c
>>>> @@ -15401,10 +15401,16 @@ rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
>>>>    case ALTIVEC_BUILTIN_STVX_V8HI:
>>>>    case ALTIVEC_BUILTIN_STVX_V4SI:
>>>>    case ALTIVEC_BUILTIN_STVX_V4SF:
>>>>    case ALTIVEC_BUILTIN_STVX_V2DI:
>>>>    case ALTIVEC_BUILTIN_STVX_V2DF:
>>>> +    case VSX_BUILTIN_STXVW4X_V16QI:
>>>> +    case VSX_BUILTIN_STXVW4X_V8HI:
>>>> +    case VSX_BUILTIN_STXVW4X_V4SF:
>>>> +    case VSX_BUILTIN_STXVW4X_V4SI:
>>>> +    case VSX_BUILTIN_STXVD2X_V2DF:
>>>> +    case VSX_BUILTIN_STXVD2X_V2DI:
>>>>      return true;
>>>>    default:
>>>>      return false;
>>>>    }
>>>> }
>>>> @@ -15910,10 +15916,79 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>>>>       gimple_set_location (g, loc);
>>>>       gsi_replace (gsi, g, true);
>>>>       return true;
>>>>      }
>>>> 
>>>> +    /* unaligned Vector loads.  */
>>>> +    case VSX_BUILTIN_LXVW4X_V16QI:
>>>> +    case VSX_BUILTIN_LXVW4X_V8HI:
>>>> +    case VSX_BUILTIN_LXVW4X_V4SF:
>>>> +    case VSX_BUILTIN_LXVW4X_V4SI:
>>>> +    case VSX_BUILTIN_LXVD2X_V2DF:
>>>> +    case VSX_BUILTIN_LXVD2X_V2DI:
>>>> +      {
>>>> +        arg0 = gimple_call_arg (stmt, 0);  // offset
>>>> +        arg1 = gimple_call_arg (stmt, 1);  // address
>>>> +        lhs = gimple_call_lhs (stmt);
>>>> +        location_t loc = gimple_location (stmt);
>>>> +        /* Since arg1 may be cast to a different type, just use ptr_type_node
>>>> +           here instead of trying to enforce TBAA on pointer types.  */
>>>> +        tree arg1_type = ptr_type_node;
>>>> +        tree lhs_type = TREE_TYPE (lhs);
>>>> +        /* in GIMPLE the type of the MEM_REF specifies the alignment.  The
>>>> +          required alignment (power) is 4 bytes regardless of data type.  */
>>>> +        tree align_ltype = build_aligned_type (lhs_type, 4);
>>>> +        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
>>>> +           the tree using the value from arg0.  The resulting type will match
>>>> +           the type of arg1.  */
>>>> +        gimple_seq stmts = NULL;
>>>> +        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
>>>> +        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
>>>> +                                      arg1_type, arg1, temp_offset);
>>>> +        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>>>> +        /* Use the build2 helper to set up the mem_ref.  The MEM_REF could also
>>>> +           take an offset, but since we've already incorporated the offset
>>>> +           above, here we just pass in a zero.  */
>>>> +        gimple *g;
>>>> +        g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
>>>> +                                               build_int_cst (arg1_type, 0)));
>>>> +        gimple_set_location (g, loc);
>>>> +        gsi_replace (gsi, g, true);
>>>> +        return true;
>>>> +      }
>>>> +
>>>> +    /* unaligned Vector stores.  */
>>>> +    case VSX_BUILTIN_STXVW4X_V16QI:
>>>> +    case VSX_BUILTIN_STXVW4X_V8HI:
>>>> +    case VSX_BUILTIN_STXVW4X_V4SF:
>>>> +    case VSX_BUILTIN_STXVW4X_V4SI:
>>>> +    case VSX_BUILTIN_STXVD2X_V2DF:
>>>> +    case VSX_BUILTIN_STXVD2X_V2DI:
>>>> +      {
>>>> +        arg0 = gimple_call_arg (stmt, 0); /* Value to be stored.  */
>>>> +        arg1 = gimple_call_arg (stmt, 1); /* Offset.  */
>>>> +        tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address.  */
>>>> +        location_t loc = gimple_location (stmt);
>>>> +        tree arg0_type = TREE_TYPE (arg0);
>>>> +        /* Use ptr_type_node (no TBAA) for the arg2_type.  */
>>>> +        tree arg2_type = ptr_type_node;
>>>> +        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
>>>> +           the tree using the value from arg0.  The resulting type will match
>>>> +           the type of arg2.  */
>>>> +        gimple_seq stmts = NULL;
>>>> +        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
>>>> +        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
>>>> +                                      arg2_type, arg2, temp_offset);
>>>> +        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>>>> +        gimple *g;
>>>> +        g = gimple_build_assign (build2 (MEM_REF, arg0_type, temp_addr,
>>>> +                                          build_int_cst (arg2_type, 0)), arg0);
>>> 
>>> Here you are using the alignment of a "value", that is, you originally
>>> have sth like
>>> 
>>> __builtin_store (val_1, offset, ptr);
>>> 
>>> where val_1 is an SSA name of say, 'vector<int>' type (or vector<int>
>>> __attribute__((aligned(1))) type!).
>>> 
>>> I wonder if, given you don't play similar games as with the load the
>>> HW requirement
>>> is sth like vector element alignment?
>> 
>> Right, same alignment rules for stores as for loads.  Minimum alignment
>> is word alignment for all data types.
> 
> So you need to fix it similarly.

Agree, *Will* needs to fix it similarly. :-)

It's really confusing having two of us with the same name...

Bill
> 
> Richard.
> 
>> Bill
>>> 
>>> GIMPLE generally doesn't care about the alignment of registers (why
>>> should it) but you
>>> can't be sure they do not "leak" there.
>>> 
>>> Richard.
>>> 
>>>> +        gimple_set_location (g, loc);
>>>> +        gsi_replace (gsi, g, true);
>>>> +        return true;
>>>> +      }
>>>> +
>>>>    /* Vector Fused multiply-add (fma).  */
>>>>    case ALTIVEC_BUILTIN_VMADDFP:
>>>>    case VSX_BUILTIN_XVMADDDP:
>>>>    case ALTIVEC_BUILTIN_VMLADDUHM:
>>>>      {
Richard Biener July 10, 2018, 2:23 p.m. | #5
On Tue, Jul 10, 2018 at 3:51 PM Bill Schmidt <wschmidt@linux.ibm.com> wrote:
>
>
> > On Jul 10, 2018, at 8:48 AM, Richard Biener <richard.guenther@gmail.com> wrote:
> >
> > On Tue, Jul 10, 2018 at 3:33 PM Bill Schmidt <wschmidt@linux.ibm.com> wrote:
> >>
> >>
> >>> On Jul 10, 2018, at 2:10 AM, Richard Biener <richard.guenther@gmail.com> wrote:
> >>>
> >>> On Mon, Jul 9, 2018 at 9:08 PM Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
> >>>>
> >>>> Hi,
> >>>> Re-posting.  Richard provided feedback on a previous version of this
> >>>> patch, I wanted to make sure he was/is OK with the latest. :-)
> >>>>
> >>>> Add support for Gimple folding for unaligned vector loads and stores.
> >>>>
> >>>> Regtest completed across variety of systems, P6,P7,P8,P9.
> >>>>
> >>>> [v2] Added the type for the MEM_REF, per feedback.
> >>>> Testcases for gimple-folding of the same are currently in-tree
> >>>> as powerpc/fold-vec-load-*.c and powerpc/fold-vec-store-*.c.
> >>>> Re-tested, still looks good. :-)
> >>>>
> >>>> [v3] Updated the alignment for the MEM_REF to be 4bytes.
> >>>> Updated/added/removed comments in the code for clarity.
> >>>>
> >>>> OK for trunk?
> >>>>
> >>>> Thanks
> >>>> -Will
> >>>>
> >>>> [gcc]
> >>>>
> >>>> 2018-07-09 Will Schmidt <will_schmidt@vnet.ibm.com>
> >>>>
> >>>>       * config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add
> >>>>       vec_xst variants to the list.
> >>>>       (rs6000_gimple_fold_builtin): Add support for folding unaligned
> >>>>       vector loads and stores.
> >>>>
> >>>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> >>>> index 8bc4109..774c60a 100644
> >>>> --- a/gcc/config/rs6000/rs6000.c
> >>>> +++ b/gcc/config/rs6000/rs6000.c
> >>>> @@ -15401,10 +15401,16 @@ rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
> >>>>    case ALTIVEC_BUILTIN_STVX_V8HI:
> >>>>    case ALTIVEC_BUILTIN_STVX_V4SI:
> >>>>    case ALTIVEC_BUILTIN_STVX_V4SF:
> >>>>    case ALTIVEC_BUILTIN_STVX_V2DI:
> >>>>    case ALTIVEC_BUILTIN_STVX_V2DF:
> >>>> +    case VSX_BUILTIN_STXVW4X_V16QI:
> >>>> +    case VSX_BUILTIN_STXVW4X_V8HI:
> >>>> +    case VSX_BUILTIN_STXVW4X_V4SF:
> >>>> +    case VSX_BUILTIN_STXVW4X_V4SI:
> >>>> +    case VSX_BUILTIN_STXVD2X_V2DF:
> >>>> +    case VSX_BUILTIN_STXVD2X_V2DI:
> >>>>      return true;
> >>>>    default:
> >>>>      return false;
> >>>>    }
> >>>> }
> >>>> @@ -15910,10 +15916,79 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >>>>       gimple_set_location (g, loc);
> >>>>       gsi_replace (gsi, g, true);
> >>>>       return true;
> >>>>      }
> >>>>
> >>>> +    /* unaligned Vector loads.  */
> >>>> +    case VSX_BUILTIN_LXVW4X_V16QI:
> >>>> +    case VSX_BUILTIN_LXVW4X_V8HI:
> >>>> +    case VSX_BUILTIN_LXVW4X_V4SF:
> >>>> +    case VSX_BUILTIN_LXVW4X_V4SI:
> >>>> +    case VSX_BUILTIN_LXVD2X_V2DF:
> >>>> +    case VSX_BUILTIN_LXVD2X_V2DI:
> >>>> +      {
> >>>> +        arg0 = gimple_call_arg (stmt, 0);  // offset
> >>>> +        arg1 = gimple_call_arg (stmt, 1);  // address
> >>>> +        lhs = gimple_call_lhs (stmt);
> >>>> +        location_t loc = gimple_location (stmt);
> >>>> +        /* Since arg1 may be cast to a different type, just use ptr_type_node
> >>>> +           here instead of trying to enforce TBAA on pointer types.  */
> >>>> +        tree arg1_type = ptr_type_node;
> >>>> +        tree lhs_type = TREE_TYPE (lhs);
> >>>> +        /* in GIMPLE the type of the MEM_REF specifies the alignment.  The
> >>>> +          required alignment (power) is 4 bytes regardless of data type.  */
> >>>> +        tree align_ltype = build_aligned_type (lhs_type, 4);
> >>>> +        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
> >>>> +           the tree using the value from arg0.  The resulting type will match
> >>>> +           the type of arg1.  */
> >>>> +        gimple_seq stmts = NULL;
> >>>> +        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
> >>>> +        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
> >>>> +                                      arg1_type, arg1, temp_offset);
> >>>> +        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >>>> +        /* Use the build2 helper to set up the mem_ref.  The MEM_REF could also
> >>>> +           take an offset, but since we've already incorporated the offset
> >>>> +           above, here we just pass in a zero.  */
> >>>> +        gimple *g;
> >>>> +        g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
> >>>> +                                               build_int_cst (arg1_type, 0)));
> >>>> +        gimple_set_location (g, loc);
> >>>> +        gsi_replace (gsi, g, true);
> >>>> +        return true;
> >>>> +      }
> >>>> +
> >>>> +    /* unaligned Vector stores.  */
> >>>> +    case VSX_BUILTIN_STXVW4X_V16QI:
> >>>> +    case VSX_BUILTIN_STXVW4X_V8HI:
> >>>> +    case VSX_BUILTIN_STXVW4X_V4SF:
> >>>> +    case VSX_BUILTIN_STXVW4X_V4SI:
> >>>> +    case VSX_BUILTIN_STXVD2X_V2DF:
> >>>> +    case VSX_BUILTIN_STXVD2X_V2DI:
> >>>> +      {
> >>>> +        arg0 = gimple_call_arg (stmt, 0); /* Value to be stored.  */
> >>>> +        arg1 = gimple_call_arg (stmt, 1); /* Offset.  */
> >>>> +        tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address.  */
> >>>> +        location_t loc = gimple_location (stmt);
> >>>> +        tree arg0_type = TREE_TYPE (arg0);
> >>>> +        /* Use ptr_type_node (no TBAA) for the arg2_type.  */
> >>>> +        tree arg2_type = ptr_type_node;
> >>>> +        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
> >>>> +           the tree using the value from arg0.  The resulting type will match
> >>>> +           the type of arg2.  */
> >>>> +        gimple_seq stmts = NULL;
> >>>> +        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
> >>>> +        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
> >>>> +                                      arg2_type, arg2, temp_offset);
> >>>> +        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >>>> +        gimple *g;
> >>>> +        g = gimple_build_assign (build2 (MEM_REF, arg0_type, temp_addr,
> >>>> +                                          build_int_cst (arg2_type, 0)), arg0);
> >>>
> >>> Here you are using the alignment of a "value", that is, you originally
> >>> have sth like
> >>>
> >>> __builtin_store (val_1, offset, ptr);
> >>>
> >>> where val_1 is an SSA name of say, 'vector<int>' type (or vector<int>
> >>> __attribute__((aligned(1))) type!).
> >>>
> >>> I wonder if, given you don't play similar games as with the load the
> >>> HW requirement
> >>> is sth like vector element alignment?
> >>
> >> Right, same alignment rules for stores as for loads.  Minimum alignment
> >> is word alignment for all data types.
> >
> > So you need to fix it similarly.
>
> Agree, *Will* needs to fix it similarly. :-)
>
> It's really confusing having two of us with the same name...

Oh, and I even didn't notice ;)

Richard.

> Bill
> >
> > Richard.
> >
> >> Bill
> >>>
> >>> GIMPLE generally doesn't care about the alignment of registers (why
> >>> should it) but you
> >>> can't be sure they do not "leak" there.
> >>>
> >>> Richard.
> >>>
> >>>> +        gimple_set_location (g, loc);
> >>>> +        gsi_replace (gsi, g, true);
> >>>> +        return true;
> >>>> +      }
> >>>> +
> >>>>    /* Vector Fused multiply-add (fma).  */
> >>>>    case ALTIVEC_BUILTIN_VMADDFP:
> >>>>    case VSX_BUILTIN_XVMADDDP:
> >>>>    case ALTIVEC_BUILTIN_VMLADDUHM:
> >>>>      {
>
Will Schmidt July 10, 2018, 5:05 p.m. | #6
On Tue, 2018-07-10 at 16:23 +0200, Richard Biener wrote:
> On Tue, Jul 10, 2018 at 3:51 PM Bill Schmidt <wschmidt@linux.ibm.com> wrote:
> >
> >
> > > On Jul 10, 2018, at 8:48 AM, Richard Biener <richard.guenther@gmail.com> wrote:
> > >
> > > On Tue, Jul 10, 2018 at 3:33 PM Bill Schmidt <wschmidt@linux.ibm.com> wrote:
> > >>
> > >>
> > >>> On Jul 10, 2018, at 2:10 AM, Richard Biener <richard.guenther@gmail.com> wrote:
> > >>>
> > >>> On Mon, Jul 9, 2018 at 9:08 PM Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
> > >>>>
> > >>>> Hi,
> > >>>> Re-posting.  Richard provided feedback on a previous version of this
> > >>>> patch, I wanted to make sure he was/is OK with the latest. :-)
> > >>>>
> > >>>> Add support for Gimple folding for unaligned vector loads and stores.
> > >>>>
> > >>>> Regtest completed across variety of systems, P6,P7,P8,P9.
> > >>>>
> > >>>> [v2] Added the type for the MEM_REF, per feedback.
> > >>>> Testcases for gimple-folding of the same are currently in-tree
> > >>>> as powerpc/fold-vec-load-*.c and powerpc/fold-vec-store-*.c.
> > >>>> Re-tested, still looks good. :-)
> > >>>>
> > >>>> [v3] Updated the alignment for the MEM_REF to be 4bytes.
> > >>>> Updated/added/removed comments in the code for clarity.
> > >>>>
> > >>>> OK for trunk?
> > >>>>
> > >>>> Thanks
> > >>>> -Will
> > >>>>
> > >>>> [gcc]
> > >>>>
> > >>>> 2018-07-09 Will Schmidt <will_schmidt@vnet.ibm.com>
> > >>>>
> > >>>>       * config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add
> > >>>>       vec_xst variants to the list.
> > >>>>       (rs6000_gimple_fold_builtin): Add support for folding unaligned
> > >>>>       vector loads and stores.
> > >>>>
> > >>>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> > >>>> index 8bc4109..774c60a 100644
> > >>>> --- a/gcc/config/rs6000/rs6000.c
> > >>>> +++ b/gcc/config/rs6000/rs6000.c
> > >>>> @@ -15401,10 +15401,16 @@ rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
> > >>>>    case ALTIVEC_BUILTIN_STVX_V8HI:
> > >>>>    case ALTIVEC_BUILTIN_STVX_V4SI:
> > >>>>    case ALTIVEC_BUILTIN_STVX_V4SF:
> > >>>>    case ALTIVEC_BUILTIN_STVX_V2DI:
> > >>>>    case ALTIVEC_BUILTIN_STVX_V2DF:
> > >>>> +    case VSX_BUILTIN_STXVW4X_V16QI:
> > >>>> +    case VSX_BUILTIN_STXVW4X_V8HI:
> > >>>> +    case VSX_BUILTIN_STXVW4X_V4SF:
> > >>>> +    case VSX_BUILTIN_STXVW4X_V4SI:
> > >>>> +    case VSX_BUILTIN_STXVD2X_V2DF:
> > >>>> +    case VSX_BUILTIN_STXVD2X_V2DI:
> > >>>>      return true;
> > >>>>    default:
> > >>>>      return false;
> > >>>>    }
> > >>>> }
> > >>>> @@ -15910,10 +15916,79 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> > >>>>       gimple_set_location (g, loc);
> > >>>>       gsi_replace (gsi, g, true);
> > >>>>       return true;
> > >>>>      }
> > >>>>
> > >>>> +    /* unaligned Vector loads.  */
> > >>>> +    case VSX_BUILTIN_LXVW4X_V16QI:
> > >>>> +    case VSX_BUILTIN_LXVW4X_V8HI:
> > >>>> +    case VSX_BUILTIN_LXVW4X_V4SF:
> > >>>> +    case VSX_BUILTIN_LXVW4X_V4SI:
> > >>>> +    case VSX_BUILTIN_LXVD2X_V2DF:
> > >>>> +    case VSX_BUILTIN_LXVD2X_V2DI:
> > >>>> +      {
> > >>>> +        arg0 = gimple_call_arg (stmt, 0);  // offset
> > >>>> +        arg1 = gimple_call_arg (stmt, 1);  // address
> > >>>> +        lhs = gimple_call_lhs (stmt);
> > >>>> +        location_t loc = gimple_location (stmt);
> > >>>> +        /* Since arg1 may be cast to a different type, just use ptr_type_node
> > >>>> +           here instead of trying to enforce TBAA on pointer types.  */
> > >>>> +        tree arg1_type = ptr_type_node;
> > >>>> +        tree lhs_type = TREE_TYPE (lhs);
> > >>>> +        /* in GIMPLE the type of the MEM_REF specifies the alignment.  The
> > >>>> +          required alignment (power) is 4 bytes regardless of data type.  */
> > >>>> +        tree align_ltype = build_aligned_type (lhs_type, 4);
> > >>>> +        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
> > >>>> +           the tree using the value from arg0.  The resulting type will match
> > >>>> +           the type of arg1.  */
> > >>>> +        gimple_seq stmts = NULL;
> > >>>> +        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
> > >>>> +        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
> > >>>> +                                      arg1_type, arg1, temp_offset);
> > >>>> +        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > >>>> +        /* Use the build2 helper to set up the mem_ref.  The MEM_REF could also
> > >>>> +           take an offset, but since we've already incorporated the offset
> > >>>> +           above, here we just pass in a zero.  */
> > >>>> +        gimple *g;
> > >>>> +        g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
> > >>>> +                                               build_int_cst (arg1_type, 0)));
> > >>>> +        gimple_set_location (g, loc);
> > >>>> +        gsi_replace (gsi, g, true);
> > >>>> +        return true;
> > >>>> +      }
> > >>>> +
> > >>>> +    /* unaligned Vector stores.  */
> > >>>> +    case VSX_BUILTIN_STXVW4X_V16QI:
> > >>>> +    case VSX_BUILTIN_STXVW4X_V8HI:
> > >>>> +    case VSX_BUILTIN_STXVW4X_V4SF:
> > >>>> +    case VSX_BUILTIN_STXVW4X_V4SI:
> > >>>> +    case VSX_BUILTIN_STXVD2X_V2DF:
> > >>>> +    case VSX_BUILTIN_STXVD2X_V2DI:
> > >>>> +      {
> > >>>> +        arg0 = gimple_call_arg (stmt, 0); /* Value to be stored.  */
> > >>>> +        arg1 = gimple_call_arg (stmt, 1); /* Offset.  */
> > >>>> +        tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address.  */
> > >>>> +        location_t loc = gimple_location (stmt);
> > >>>> +        tree arg0_type = TREE_TYPE (arg0);
> > >>>> +        /* Use ptr_type_node (no TBAA) for the arg2_type.  */
> > >>>> +        tree arg2_type = ptr_type_node;
> > >>>> +        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
> > >>>> +           the tree using the value from arg0.  The resulting type will match
> > >>>> +           the type of arg2.  */
> > >>>> +        gimple_seq stmts = NULL;
> > >>>> +        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
> > >>>> +        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
> > >>>> +                                      arg2_type, arg2, temp_offset);
> > >>>> +        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > >>>> +        gimple *g;
> > >>>> +        g = gimple_build_assign (build2 (MEM_REF, arg0_type, temp_addr,
> > >>>> +                                          build_int_cst (arg2_type, 0)), arg0);
> > >>>
> > >>> Here you are using the alignment of a "value", that is, you originally
> > >>> have sth like
> > >>>
> > >>> __builtin_store (val_1, offset, ptr);
> > >>>
> > >>> where val_1 is an SSA name of say, 'vector<int>' type (or vector<int>
> > >>> __attribute__((aligned(1))) type!).
> > >>>
> > >>> I wonder if, given you don't play similar games as with the load the
> > >>> HW requirement
> > >>> is sth like vector element alignment?
> > >>
> > >> Right, same alignment rules for stores as for loads.  Minimum alignment
> > >> is word alignment for all data types.
> > >
> > > So you need to fix it similarly.

Ok.   Patch updated, sniff-tests pass.   I'll have v4 posted shortly.

> >
> > Agree, *Will* needs to fix it similarly. :-)
> >
> > It's really confusing having two of us with the same name...
> 
> Oh, and I even didn't notice ;)

On a really good day we can confuse ourselves on which one of us said
what. :-)


Thanks,
-Will

> 
> Richard.
> 
> > Bill
> > >
> > > Richard.
> > >
> > >> Bill
> > >>>
> > >>> GIMPLE generally doesn't care about the alignment of registers (why
> > >>> should it) but you
> > >>> can't be sure they do not "leak" there.
> > >>>
> > >>> Richard.
> > >>>
> > >>>> +        gimple_set_location (g, loc);
> > >>>> +        gsi_replace (gsi, g, true);
> > >>>> +        return true;
> > >>>> +      }
> > >>>> +
> > >>>>    /* Vector Fused multiply-add (fma).  */
> > >>>>    case ALTIVEC_BUILTIN_VMADDFP:
> > >>>>    case VSX_BUILTIN_XVMADDDP:
> > >>>>    case ALTIVEC_BUILTIN_VMLADDUHM:
> > >>>>      {
> >
>

Patch

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 8bc4109..774c60a 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -15401,10 +15401,16 @@  rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
     case ALTIVEC_BUILTIN_STVX_V8HI:
     case ALTIVEC_BUILTIN_STVX_V4SI:
     case ALTIVEC_BUILTIN_STVX_V4SF:
     case ALTIVEC_BUILTIN_STVX_V2DI:
     case ALTIVEC_BUILTIN_STVX_V2DF:
+    case VSX_BUILTIN_STXVW4X_V16QI:
+    case VSX_BUILTIN_STXVW4X_V8HI:
+    case VSX_BUILTIN_STXVW4X_V4SF:
+    case VSX_BUILTIN_STXVW4X_V4SI:
+    case VSX_BUILTIN_STXVD2X_V2DF:
+    case VSX_BUILTIN_STXVD2X_V2DI:
       return true;
     default:
       return false;
     }
 }
@@ -15910,10 +15916,79 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	gimple_set_location (g, loc);
 	gsi_replace (gsi, g, true);
 	return true;
       }
 
+    /* unaligned Vector loads.  */
+    case VSX_BUILTIN_LXVW4X_V16QI:
+    case VSX_BUILTIN_LXVW4X_V8HI:
+    case VSX_BUILTIN_LXVW4X_V4SF:
+    case VSX_BUILTIN_LXVW4X_V4SI:
+    case VSX_BUILTIN_LXVD2X_V2DF:
+    case VSX_BUILTIN_LXVD2X_V2DI:
+      {
+	 arg0 = gimple_call_arg (stmt, 0);  // offset
+	 arg1 = gimple_call_arg (stmt, 1);  // address
+	 lhs = gimple_call_lhs (stmt);
+	 location_t loc = gimple_location (stmt);
+	 /* Since arg1 may be cast to a different type, just use ptr_type_node
+	    here instead of trying to enforce TBAA on pointer types.  */
+	 tree arg1_type = ptr_type_node;
+	 tree lhs_type = TREE_TYPE (lhs);
+	 /* in GIMPLE the type of the MEM_REF specifies the alignment.  The
+	   required alignment (power) is 4 bytes regardless of data type.  */
+	 tree align_ltype = build_aligned_type (lhs_type, 4);
+	 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
+	    the tree using the value from arg0.  The resulting type will match
+	    the type of arg1.  */
+	 gimple_seq stmts = NULL;
+	 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
+	 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
+				       arg1_type, arg1, temp_offset);
+	 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	 /* Use the build2 helper to set up the mem_ref.  The MEM_REF could also
+	    take an offset, but since we've already incorporated the offset
+	    above, here we just pass in a zero.  */
+	 gimple *g;
+	 g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
+						build_int_cst (arg1_type, 0)));
+	 gimple_set_location (g, loc);
+	 gsi_replace (gsi, g, true);
+	 return true;
+      }
+
+    /* unaligned Vector stores.  */
+    case VSX_BUILTIN_STXVW4X_V16QI:
+    case VSX_BUILTIN_STXVW4X_V8HI:
+    case VSX_BUILTIN_STXVW4X_V4SF:
+    case VSX_BUILTIN_STXVW4X_V4SI:
+    case VSX_BUILTIN_STXVD2X_V2DF:
+    case VSX_BUILTIN_STXVD2X_V2DI:
+      {
+	 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored.  */
+	 arg1 = gimple_call_arg (stmt, 1); /* Offset.  */
+	 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address.  */
+	 location_t loc = gimple_location (stmt);
+	 tree arg0_type = TREE_TYPE (arg0);
+	 /* Use ptr_type_node (no TBAA) for the arg2_type.  */
+	 tree arg2_type = ptr_type_node;
+	 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
+	    the tree using the value from arg0.  The resulting type will match
+	    the type of arg2.  */
+	 gimple_seq stmts = NULL;
+	 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
+	 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
+				       arg2_type, arg2, temp_offset);
+	 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	 gimple *g;
+	 g = gimple_build_assign (build2 (MEM_REF, arg0_type, temp_addr,
+					   build_int_cst (arg2_type, 0)), arg0);
+	 gimple_set_location (g, loc);
+	 gsi_replace (gsi, g, true);
+	 return true;
+      }
+
     /* Vector Fused multiply-add (fma).  */
     case ALTIVEC_BUILTIN_VMADDFP:
     case VSX_BUILTIN_XVMADDDP:
     case ALTIVEC_BUILTIN_VMLADDUHM:
       {