diff mbox

Vectorize 2*x as x+x if needed

Message ID alpine.DEB.2.02.1606121105540.12173@laptop-mg.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse June 12, 2016, 9:19 a.m. UTC
Hello,

canonicalizing x+x to x*2 made us regress some vectorization tests on 
sparc. As suggested by Richard, this lets the vectorizer handle x*2 as x+x 
if that helps. Let me copy a few remarks I had in the PR:

« We could probably also handle x*3 as x+x+x, but where to stop?

I don't understand why the optab test for LSHIFT_EXPR was using 
optab_vector, as far as I understand we are creating vec<<3, so 
optab_scalar makes more sense.

I gave priority to x+x over x<<1, not sure if that's right, it probably 
doesn't matter much as one will probably be turned into the other in later 
passes. »

Rainer bootstrapped and regtested the patch on sparc. As a bonus, it now 
vectorizes one more loop in gcc.dg/vect/vect-iv-9.c, I'll let someone else 
tweak the test (which will temporarily appear as a FAIL).

2016-06-13  Marc Glisse  <marc.glisse@inria.fr>

 	PR tree-optimization/70923
 	* tree-vect-patterns.c (vect_recog_mult_pattern): Use optab_scalar
 	for LSHIFT_EXPR. Handle 2 * X as X + X.

Comments

Richard Biener June 13, 2016, 10:24 a.m. UTC | #1
On Sun, Jun 12, 2016 at 11:19 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> canonicalizing x+x to x*2 made us regress some vectorization tests on sparc.
> As suggested by Richard, this lets the vectorizer handle x*2 as x+x if that
> helps. Let me copy a few remarks I had in the PR:
>
> « We could probably also handle x*3 as x+x+x, but where to stop?
>
> I don't understand why the optab test for LSHIFT_EXPR was using
> optab_vector, as far as I understand we are creating vec<<3, so optab_scalar
> makes more sense.

I think it should test both (ok if either one is available) and the
current optab_vector makes more sense
since it is more generic.

Ok with either not changing optab_vector to optab_scalar or testing both with ||

Thanks,
Richard.

> I gave priority to x+x over x<<1, not sure if that's right, it probably
> doesn't matter much as one will probably be turned into the other in later
> passes. »
>
> Rainer bootstrapped and regtested the patch on sparc. As a bonus, it now
> vectorizes one more loop in gcc.dg/vect/vect-iv-9.c, I'll let someone else
> tweak the test (which will temporarily appear as a FAIL).
>
> 2016-06-13  Marc Glisse  <marc.glisse@inria.fr>
>
>         PR tree-optimization/70923
>         * tree-vect-patterns.c (vect_recog_mult_pattern): Use optab_scalar
>         for LSHIFT_EXPR. Handle 2 * X as X + X.
>
> --
> Marc Glisse
> Index: gcc/tree-vect-patterns.c
> ===================================================================
> *** gcc/tree-vect-patterns.c    (revision 237336)
> --- gcc/tree-vect-patterns.c    (working copy)
> *************** vect_recog_vector_vector_shift_pattern (
> *** 2166,2189 ****
>
>     * Return value: A new stmt that will be used to replace the
> multiplication
>       S1 or S2 stmt.  */
>
>   static gimple *
>   vect_recog_mult_pattern (vec<gimple *> *stmts,
>                          tree *type_in, tree *type_out)
>   {
>     gimple *last_stmt = stmts->pop ();
>     tree oprnd0, oprnd1, vectype, itype;
> !   gimple *pattern_stmt, *def_stmt;
>     optab optab;
>     stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
> !   int power2_val, power2_neg_val;
>     tree shift;
>
>     if (!is_gimple_assign (last_stmt))
>       return NULL;
>
>     if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
>       return NULL;
>
>     oprnd0 = gimple_assign_rhs1 (last_stmt);
>     oprnd1 = gimple_assign_rhs2 (last_stmt);
> --- 2166,2189 ----
>
>     * Return value: A new stmt that will be used to replace the
> multiplication
>       S1 or S2 stmt.  */
>
>   static gimple *
>   vect_recog_mult_pattern (vec<gimple *> *stmts,
>                          tree *type_in, tree *type_out)
>   {
>     gimple *last_stmt = stmts->pop ();
>     tree oprnd0, oprnd1, vectype, itype;
> !   gimple *pattern_stmt;
>     optab optab;
>     stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
> !   int power2_val;
>     tree shift;
>
>     if (!is_gimple_assign (last_stmt))
>       return NULL;
>
>     if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
>       return NULL;
>
>     oprnd0 = gimple_assign_rhs1 (last_stmt);
>     oprnd1 = gimple_assign_rhs2 (last_stmt);
> *************** vect_recog_mult_pattern (vec<gimple *> *
> *** 2203,2261 ****
>        don't attempt to optimize this.  */
>     optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
>     if (optab != unknown_optab)
>       {
>         machine_mode vec_mode = TYPE_MODE (vectype);
>         int icode = (int) optab_handler (optab, vec_mode);
>         if (icode != CODE_FOR_nothing)
>         return NULL;
>       }
>
> !   /* If target cannot handle vector left shift then we cannot
> !      optimize and bail out.  */
> !   optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
> !   if (!optab
> !       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
> !     return NULL;
> !
> !   power2_val = wi::exact_log2 (oprnd1);
> !   power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
>
> !   /* Handle constant operands that are postive or negative powers of 2.
> */
> !   if (power2_val != -1)
> !     {
> !       shift = build_int_cst (itype, power2_val);
> !       pattern_stmt
> !       = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> !                              LSHIFT_EXPR, oprnd0, shift);
> !     }
> !   else if (power2_neg_val != -1)
>       {
>         /* If the target cannot handle vector NEGATE then we cannot
>          do the optimization.  */
> !       optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
>         if (!optab
>           || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
>         return NULL;
>
> !       shift = build_int_cst (itype, power2_neg_val);
> !       def_stmt
>         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> !                              LSHIFT_EXPR, oprnd0, shift);
> !       new_pattern_def_seq (stmt_vinfo, def_stmt);
>         pattern_stmt
> !        = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> !                               NEGATE_EXPR, gimple_assign_lhs (def_stmt));
>       }
>     else
>       return NULL;
>
>     /* Pattern detected.  */
>     if (dump_enabled_p ())
>       dump_printf_loc (MSG_NOTE, vect_location,
>                      "vect_recog_mult_pattern: detected:\n");
>
>     if (dump_enabled_p ())
>       dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
>                           pattern_stmt,0);
>
>     stmts->safe_push (last_stmt);
> --- 2203,2271 ----
>        don't attempt to optimize this.  */
>     optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
>     if (optab != unknown_optab)
>       {
>         machine_mode vec_mode = TYPE_MODE (vectype);
>         int icode = (int) optab_handler (optab, vec_mode);
>         if (icode != CODE_FOR_nothing)
>         return NULL;
>       }
>
> !   bool negate = wi::neg_p (oprnd1, TYPE_SIGN (TREE_TYPE (oprnd1)));
>
> !   if (negate)
>       {
>         /* If the target cannot handle vector NEGATE then we cannot
>          do the optimization.  */
> !       optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_default);
>         if (!optab
>           || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
>         return NULL;
> +       power2_val = wi::exact_log2 (wi::neg (oprnd1));
> +     }
> +   else
> +     {
> +       power2_val = wi::exact_log2 (oprnd1);
> +     }
>
> !   /* Handle constant operands +-2 if target can handle vector addition.
> */
> !   if (power2_val == 1
> !       && (optab = optab_for_tree_code (PLUS_EXPR, vectype, optab_default))
> !       && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
> !     {
> !       pattern_stmt
>         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> !                              PLUS_EXPR, oprnd0, oprnd0);
> !     }
> !   /* Handle constant operands that are positive or negative powers of 2
> !      if target can handle vector left shift.  */
> !   else if (power2_val != -1
> !          && (optab = optab_for_tree_code (LSHIFT_EXPR, vectype,
> optab_scalar))
> !          && optab_handler (optab, TYPE_MODE (vectype)) !=
> CODE_FOR_nothing)
> !     {
> !       shift = build_int_cst (itype, power2_val);
>         pattern_stmt
> !       = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> !                              LSHIFT_EXPR, oprnd0, shift);
>       }
>     else
>       return NULL;
>
> +   if (negate)
> +     {
> +       tree tmp_var = gimple_assign_lhs (pattern_stmt);
> +       new_pattern_def_seq (stmt_vinfo, pattern_stmt);
> +       pattern_stmt
> +        = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> +                               NEGATE_EXPR, tmp_var);
> +     }
> +
>     /* Pattern detected.  */
>     if (dump_enabled_p ())
>       dump_printf_loc (MSG_NOTE, vect_location,
>                      "vect_recog_mult_pattern: detected:\n");
>
>     if (dump_enabled_p ())
>       dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
>                           pattern_stmt,0);
>
>     stmts->safe_push (last_stmt);
>
Marc Glisse June 13, 2016, 11:04 a.m. UTC | #2
On Mon, 13 Jun 2016, Richard Biener wrote:

> On Sun, Jun 12, 2016 at 11:19 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> Hello,
>>
>> canonicalizing x+x to x*2 made us regress some vectorization tests on sparc.
>> As suggested by Richard, this lets the vectorizer handle x*2 as x+x if that
>> helps. Let me copy a few remarks I had in the PR:
>>
>> « We could probably also handle x*3 as x+x+x, but where to stop?
>>
>> I don't understand why the optab test for LSHIFT_EXPR was using
>> optab_vector, as far as I understand we are creating vec<<3, so optab_scalar
>> makes more sense.
>
> I think it should test both (ok if either one is available) and the

I was hoping there would already be some magic in place so that asking for 
the scalar one would answer yes if there is only a vector version :-(

> current optab_vector makes more sense since it is more generic.

On the other hand, some platforms can only handle the optab_scalar 
version I believe.

> Ok with either not changing optab_vector to optab_scalar or testing both with ||

Actually, I just noticed vect_supportable_shift which does "both", I'll 
see if I can call that function instead of redoing things manually.

Thanks,
diff mbox

Patch

Index: gcc/tree-vect-patterns.c
===================================================================
*** gcc/tree-vect-patterns.c	(revision 237336)
--- gcc/tree-vect-patterns.c	(working copy)
*************** vect_recog_vector_vector_shift_pattern (
*** 2166,2189 ****
  
    * Return value: A new stmt that will be used to replace the multiplication
      S1 or S2 stmt.  */
  
  static gimple *
  vect_recog_mult_pattern (vec<gimple *> *stmts,
  			 tree *type_in, tree *type_out)
  {
    gimple *last_stmt = stmts->pop ();
    tree oprnd0, oprnd1, vectype, itype;
!   gimple *pattern_stmt, *def_stmt;
    optab optab;
    stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
!   int power2_val, power2_neg_val;
    tree shift;
  
    if (!is_gimple_assign (last_stmt))
      return NULL;
  
    if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
      return NULL;
  
    oprnd0 = gimple_assign_rhs1 (last_stmt);
    oprnd1 = gimple_assign_rhs2 (last_stmt);
--- 2166,2189 ----
  
    * Return value: A new stmt that will be used to replace the multiplication
      S1 or S2 stmt.  */
  
  static gimple *
  vect_recog_mult_pattern (vec<gimple *> *stmts,
  			 tree *type_in, tree *type_out)
  {
    gimple *last_stmt = stmts->pop ();
    tree oprnd0, oprnd1, vectype, itype;
!   gimple *pattern_stmt;
    optab optab;
    stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
!   int power2_val;
    tree shift;
  
    if (!is_gimple_assign (last_stmt))
      return NULL;
  
    if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
      return NULL;
  
    oprnd0 = gimple_assign_rhs1 (last_stmt);
    oprnd1 = gimple_assign_rhs2 (last_stmt);
*************** vect_recog_mult_pattern (vec<gimple *> *
*** 2203,2261 ****
       don't attempt to optimize this.  */
    optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
    if (optab != unknown_optab)
      {
        machine_mode vec_mode = TYPE_MODE (vectype);
        int icode = (int) optab_handler (optab, vec_mode);
        if (icode != CODE_FOR_nothing)
  	return NULL;
      }
  
!   /* If target cannot handle vector left shift then we cannot
!      optimize and bail out.  */
!   optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
!   if (!optab
!       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
!     return NULL;
! 
!   power2_val = wi::exact_log2 (oprnd1);
!   power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
  
!   /* Handle constant operands that are postive or negative powers of 2.  */
!   if (power2_val != -1)
!     {
!       shift = build_int_cst (itype, power2_val);
!       pattern_stmt
! 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
! 			       LSHIFT_EXPR, oprnd0, shift);
!     }
!   else if (power2_neg_val != -1)
      {
        /* If the target cannot handle vector NEGATE then we cannot
  	 do the optimization.  */
!       optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
        if (!optab
  	  || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
  	return NULL;
  
!       shift = build_int_cst (itype, power2_neg_val);
!       def_stmt
  	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
! 			       LSHIFT_EXPR, oprnd0, shift);
!       new_pattern_def_seq (stmt_vinfo, def_stmt);
        pattern_stmt
! 	 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
! 				NEGATE_EXPR, gimple_assign_lhs (def_stmt));
      }
    else
      return NULL;
  
    /* Pattern detected.  */
    if (dump_enabled_p ())
      dump_printf_loc (MSG_NOTE, vect_location,
  		     "vect_recog_mult_pattern: detected:\n");
  
    if (dump_enabled_p ())
      dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
  			  pattern_stmt,0);
  
    stmts->safe_push (last_stmt);
--- 2203,2271 ----
       don't attempt to optimize this.  */
    optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
    if (optab != unknown_optab)
      {
        machine_mode vec_mode = TYPE_MODE (vectype);
        int icode = (int) optab_handler (optab, vec_mode);
        if (icode != CODE_FOR_nothing)
  	return NULL;
      }
  
!   bool negate = wi::neg_p (oprnd1, TYPE_SIGN (TREE_TYPE (oprnd1)));
  
!   if (negate)
      {
        /* If the target cannot handle vector NEGATE then we cannot
  	 do the optimization.  */
!       optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_default);
        if (!optab
  	  || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
  	return NULL;
+       power2_val = wi::exact_log2 (wi::neg (oprnd1));
+     }
+   else
+     {
+       power2_val = wi::exact_log2 (oprnd1);
+     }
  
!   /* Handle constant operands +-2 if target can handle vector addition.  */
!   if (power2_val == 1
!       && (optab = optab_for_tree_code (PLUS_EXPR, vectype, optab_default))
!       && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
!     {
!       pattern_stmt
  	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
! 			       PLUS_EXPR, oprnd0, oprnd0);
!     }
!   /* Handle constant operands that are positive or negative powers of 2
!      if target can handle vector left shift.  */
!   else if (power2_val != -1
! 	   && (optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar))
! 	   && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
!     {
!       shift = build_int_cst (itype, power2_val);
        pattern_stmt
! 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
! 			       LSHIFT_EXPR, oprnd0, shift);
      }
    else
      return NULL;
  
+   if (negate)
+     {
+       tree tmp_var = gimple_assign_lhs (pattern_stmt);
+       new_pattern_def_seq (stmt_vinfo, pattern_stmt);
+       pattern_stmt
+ 	 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+ 				NEGATE_EXPR, tmp_var);
+     }
+ 
    /* Pattern detected.  */
    if (dump_enabled_p ())
      dump_printf_loc (MSG_NOTE, vect_location,
  		     "vect_recog_mult_pattern: detected:\n");
  
    if (dump_enabled_p ())
      dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
  			  pattern_stmt,0);
  
    stmts->safe_push (last_stmt);