diff mbox

Move some bit and binary optimizations in simplify and match

Message ID SN2PR0701MB1024DCB71D27D1879DE3A7C48E3E0@SN2PR0701MB1024.namprd07.prod.outlook.com
State New
Headers show

Commit Message

Hurugalawadi, Naveen Oct. 15, 2015, 6:11 a.m. UTC
Hi,

Thanks for all the suggestions.
Please find attached the modified patch as per your suggestions.

I had missed a mail as pointed by Marc Glisse. Now I have implemented
everything suggested.
Please review the patch and let me know if any further modifications are required.

I have some queries regarding the whole discussions along the course of
this patch.
It would be very helpful for my understanding of the code.

>> /* Fold (A & ~B) - (A & B) into (A ^ B) - B.  */
>> Likewise the fold code handles both constant and non-constant B.

How do we know that fold code handles both constant and non-constant B
and not A?

>> Fold (a * (1 << b)) into (a << b)
>> So either way I think we should only allow nop conversions
>> here (as fold-const.c did).

How do we recognize from the fold-const code that it allows only nop
conversions.

>> We want to minimize the number of lines in match.pd and this doesn't
>> really achieve this compared to duplicating the whole pattern.

Yes. Even, I have the same question to understand this better.
Is it worth and does it acheive any extra optimization after moving
it using simplify and match?

Should I have the other three patterns duplicated and implemented
or leave it in fold-const code?

Thanks,
Naveen

Comments

Richard Biener Oct. 15, 2015, 12:38 p.m. UTC | #1
On Thu, Oct 15, 2015 at 8:11 AM, Hurugalawadi, Naveen
<Naveen.Hurugalawadi@caviumnetworks.com> wrote:
> Hi,
>
> Thanks for all the suggestions.
> Please find attached the modified patch as per your suggestions.
>
> I had missed a mail as pointed by Marc Glisse. Now I have implemented
> everything suggested.
> Please review the patch and let me know if any further modifications are required.

+/* Fold (a * (1 << b)) into (a << b)  */
+(simplify
+ (mult:c @0 (lshift integer_onep@1 @2))
+  (if (! FLOAT_TYPE_P (type)
+       && tree_nop_conversion_p (type, TREE_TYPE (@1)))
+   (lshift @0 @2)))

you are missing the convert? on the lshift now, without it the
tree_nop_conversion_p check always evaluates to true.

+/* Fold (C1/X)*C2 into (C1*C2)/X.  */
+(simplify
+ (mult (rdiv:s REAL_CST@0 @1) REAL_CST@2)
+  (if (flag_associative_math)
+  (with
+   { tree tem = const_binop (MULT_EXPR, type, @0, @2); }
+  (if (tem)
+   (rdiv { tem; } @1)))))

this looks good.

+/* Simplify ~X & X as zero.  */
+(simplify
+ (bit_and:c (convert? @0) (convert? (bit_not @0)))
+  { build_zero_cst (type); })

the pattern as-is is ok but note you are removing

-      /* ~X & X, (X == 0) & X, and !X & X are always zero.  */
-      if ((TREE_CODE (arg0) == BIT_NOT_EXPR
-          || TREE_CODE (arg0) == TRUTH_NOT_EXPR
-          || (TREE_CODE (arg0) == EQ_EXPR
-              && integer_zerop (TREE_OPERAND (arg0, 1))))
-         && operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0))
-       return omit_one_operand_loc (loc, type, integer_zero_node, arg1);

from fold-const.c which handles TRUTH_NOT_EXPR but logical_inverted_value
does not handle it.  I suggest to add

(match (logical_inverted_value @0)
 (truth_not @0))

where the other logical_inverted_value predicates are defined

+/* (-A) * (-B) -> A * B  */
+(simplify
+ (mult:c (convert1? (negate @0)) (convert2? negate_expr_p@1))
+  (if (tree_nop_conversion_p (type, TREE_TYPE (@0))
+       && tree_nop_conversion_p (type, TREE_TYPE (@1)))
+   (mult (convert @0) (convert (negate @1)))))

this one looks ok now.

> I have some queries regarding the whole discussions along the course of
> this patch.
> It would be very helpful for my understanding of the code.

Sure.

>>> /* Fold (A & ~B) - (A & B) into (A ^ B) - B.  */
>>> Likewise the fold code handles both constant and non-constant B.
>
> How do we know that fold code handles both constant and non-constant B
> and not A?

          /* Fold (A & ~B) - (A & B) into (A ^ B) - B, where B is
             any power of 2 minus 1.  */
          if (TREE_CODE (arg0) == BIT_AND_EXPR
              && TREE_CODE (arg1) == BIT_AND_EXPR
              && operand_equal_p (TREE_OPERAND (arg0, 0),
                                  TREE_OPERAND (arg1, 0), 0))

so it requires equal first operands.  First operand position means
unless the second operands
are constant (in which case the whole BIT_AND would be gone and replaced
by a constant) the first operands are non-constant due to canonicalization
rules for commutative operators.

              tree mask0 = TREE_OPERAND (arg0, 1);
              tree mask1 = TREE_OPERAND (arg1, 1);
              tree tem = fold_build1_loc (loc, BIT_NOT_EXPR, type, mask0);

              if (operand_equal_p (tem, mask1, 0))

this makes sure that mask0 == ~mask1 and thus B == B.  This
works symbolically as fold_build1 will transform (bit_not (bit_not mask0))
to mask0 as well as for constant B as fold_build1 will apply constant
folding here as well and thus the resulting two constants will be
equal.

>>> Fold (a * (1 << b)) into (a << b)
>>> So either way I think we should only allow nop conversions
>>> here (as fold-const.c did).
>
> How do we recognize from the fold-const code that it allows only nop
> conversions.

Because of

tree
fold_binary_loc (location_t loc,
             enum tree_code code, tree type, tree op0, tree op1)
{
...
  arg0 = op0;
  arg1 = op1;

  /* Strip any conversions that don't change the mode.  This is
     safe for every expression, except for a comparison expression
     because its signedness is derived from its operands.  So, in
     the latter case, only strip conversions that don't change the
     signedness.  MIN_EXPR/MAX_EXPR also need signedness of arguments
     preserved.

     Note that this is done as an internal manipulation within the
     constant folder, in order to find the simplest representation
     of the arguments so that their form can be studied.  In any
     cases, the appropriate type conversions should be put back in
     the tree that will get out of the constant folder.  */

  if (kind == tcc_comparison || code == MIN_EXPR || code == MAX_EXPR)
    {
      STRIP_SIGN_NOPS (arg0);
      STRIP_SIGN_NOPS (arg1);
    }
  else
    {
      STRIP_NOPS (arg0);
      STRIP_NOPS (arg1);
    }

STRIP_NOPS exactly does strip nop-conversions (STRIP_SIGN_NOPS as well,
but it preserves the signedness).

So all patterns looking at arg[01] are handling nop-conversions on their
outermost operands while those looking at op[01] do not.

>>> We want to minimize the number of lines in match.pd and this doesn't
>>> really achieve this compared to duplicating the whole pattern.
>
> Yes. Even, I have the same question to understand this better.
> Is it worth and does it acheive any extra optimization after moving
> it using simplify and match?

Yes, the code in fold-const.c only applies to GENERIC which means
expressions literally written in the source while match.pd will also
apply to those expressions appearing after CSE or constant folding.

> Should I have the other three patterns duplicated and implemented
> or leave it in fold-const code?

You should move them to match.pd.  It requires duplication to
handle the const vs. non-const cases the fold-const.c code handled.

Thanks,
Richard.


> Thanks,
> Naveen
diff mbox

Patch

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index de45a2c..1e7fbb4 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -9803,20 +9803,6 @@  fold_binary_loc (location_t loc,
       goto associate;
 
     case MULT_EXPR:
-      /* (-A) * (-B) -> A * B  */
-      if (TREE_CODE (arg0) == NEGATE_EXPR && negate_expr_p (arg1))
-	return fold_build2_loc (loc, MULT_EXPR, type,
-			    fold_convert_loc (loc, type,
-					      TREE_OPERAND (arg0, 0)),
-			    fold_convert_loc (loc, type,
-					      negate_expr (arg1)));
-      if (TREE_CODE (arg1) == NEGATE_EXPR && negate_expr_p (arg0))
-	return fold_build2_loc (loc, MULT_EXPR, type,
-			    fold_convert_loc (loc, type,
-					      negate_expr (arg0)),
-			    fold_convert_loc (loc, type,
-					      TREE_OPERAND (arg1, 0)));
-
       if (! FLOAT_TYPE_P (type))
 	{
 	  /* Transform x * -C into -x * C if x is easily negatable.  */
@@ -9830,16 +9816,6 @@  fold_binary_loc (location_t loc,
 						  negate_expr (arg0)),
 				tem);
 
-	  /* (a * (1 << b)) is (a << b)  */
-	  if (TREE_CODE (arg1) == LSHIFT_EXPR
-	      && integer_onep (TREE_OPERAND (arg1, 0)))
-	    return fold_build2_loc (loc, LSHIFT_EXPR, type, op0,
-				TREE_OPERAND (arg1, 1));
-	  if (TREE_CODE (arg0) == LSHIFT_EXPR
-	      && integer_onep (TREE_OPERAND (arg0, 0)))
-	    return fold_build2_loc (loc, LSHIFT_EXPR, type, op1,
-				TREE_OPERAND (arg0, 1));
-
 	  /* (A + A) * C -> A * 2 * C  */
 	  if (TREE_CODE (arg0) == PLUS_EXPR
 	      && TREE_CODE (arg1) == INTEGER_CST
@@ -9882,21 +9858,6 @@  fold_binary_loc (location_t loc,
 	}
       else
 	{
-	  /* Convert (C1/X)*C2 into (C1*C2)/X.  This transformation may change
-             the result for floating point types due to rounding so it is applied
-             only if -fassociative-math was specify.  */
-	  if (flag_associative_math
-	      && TREE_CODE (arg0) == RDIV_EXPR
-	      && TREE_CODE (arg1) == REAL_CST
-	      && TREE_CODE (TREE_OPERAND (arg0, 0)) == REAL_CST)
-	    {
-	      tree tem = const_binop (MULT_EXPR, TREE_OPERAND (arg0, 0),
-				      arg1);
-	      if (tem)
-		return fold_build2_loc (loc, RDIV_EXPR, type, tem,
-				    TREE_OPERAND (arg0, 1));
-	    }
-
           /* Strip sign operations from X in X*X, i.e. -Y*-Y -> Y*Y.  */
 	  if (operand_equal_p (arg0, arg1, 0))
 	    {
@@ -10053,22 +10014,6 @@  fold_binary_loc (location_t loc,
       goto bit_rotate;
 
     case BIT_AND_EXPR:
-      /* ~X & X, (X == 0) & X, and !X & X are always zero.  */
-      if ((TREE_CODE (arg0) == BIT_NOT_EXPR
-	   || TREE_CODE (arg0) == TRUTH_NOT_EXPR
-	   || (TREE_CODE (arg0) == EQ_EXPR
-	       && integer_zerop (TREE_OPERAND (arg0, 1))))
-	  && operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0))
-	return omit_one_operand_loc (loc, type, integer_zero_node, arg1);
-
-      /* X & ~X , X & (X == 0), and X & !X are always zero.  */
-      if ((TREE_CODE (arg1) == BIT_NOT_EXPR
-	   || TREE_CODE (arg1) == TRUTH_NOT_EXPR
-	   || (TREE_CODE (arg1) == EQ_EXPR
-	       && integer_zerop (TREE_OPERAND (arg1, 1))))
-	  && operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0))
-	return omit_one_operand_loc (loc, type, integer_zero_node, arg0);
-
       /* Fold (X ^ 1) & 1 as (X & 1) == 0.  */
       if (TREE_CODE (arg0) == BIT_XOR_EXPR
 	  && INTEGRAL_TYPE_P (type)
diff --git a/gcc/match.pd b/gcc/match.pd
index 655c9ff..2e0b919 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -323,6 +323,27 @@  along with GCC; see the file COPYING3.  If not see
     (if (real_isinteger (&TREE_REAL_CST (@1), &n) && (n & 1) == 0)
      (pows @0 @1))))))
 
+/* Fold (a * (1 << b)) into (a << b)  */
+(simplify
+ (mult:c @0 (lshift integer_onep@1 @2))
+  (if (! FLOAT_TYPE_P (type)
+       && tree_nop_conversion_p (type, TREE_TYPE (@1)))
+   (lshift @0 @2)))
+
+/* Fold (C1/X)*C2 into (C1*C2)/X.  */
+(simplify
+ (mult (rdiv:s REAL_CST@0 @1) REAL_CST@2)
+  (if (flag_associative_math)
+  (with
+   { tree tem = const_binop (MULT_EXPR, type, @0, @2); }
+  (if (tem)
+   (rdiv { tem; } @1)))))
+
+/* Simplify ~X & X as zero.  */
+(simplify
+ (bit_and:c (convert? @0) (convert? (bit_not @0)))
+  { build_zero_cst (type); })
+
 /* X % Y is smaller than Y.  */
 (for cmp (lt ge)
  (simplify
@@ -542,6 +563,13 @@  along with GCC; see the file COPYING3.  If not see
 (match negate_expr_p
  VECTOR_CST
  (if (FLOAT_TYPE_P (TREE_TYPE (type)) || TYPE_OVERFLOW_WRAPS (type))))
+
+/* (-A) * (-B) -> A * B  */
+(simplify
+ (mult:c (convert1? (negate @0)) (convert2? negate_expr_p@1))
+  (if (tree_nop_conversion_p (type, TREE_TYPE (@0))
+       && tree_nop_conversion_p (type, TREE_TYPE (@1)))
+   (mult (convert @0) (convert (negate @1)))))
  
 /* -(A + B) -> (-B) - A.  */
 (simplify