diff mbox series

Add pattern for pointer-diff on addresses with same base/offset (PR 94234)

Message ID BYAPR01MB46169F5BC28D175F88418064F78A0@BYAPR01MB4616.prod.exchangelabs.com
State New
Headers show
Series Add pattern for pointer-diff on addresses with same base/offset (PR 94234) | expand

Commit Message

Feng Xue OS June 1, 2020, 8:09 a.m. UTC
This patch is meant to add match rules to simplify patterns as:

o. (pointer + offset_a) - (pointer + offset_b)   ->   (ptrdiff_t) (offset_a - offset_b)
o. (pointer_a + offset) - (pointer_b + offset)   ->   (pointer_a - pointer_b)

Bootstrapped/regtested on x86_64-linux and aarch64-linux.

Feng
---
2020-06-01  Feng Xue  <fxue@os.amperecomputing.com>

gcc/
	PR tree-optimization/94234
	* match.pd ((PTR + A) - (PTR + B)) -> (ptrdiff_t)(A - B): New
	simplification.
	* ((PTR_A + O) - (PTR_B + O)) -> (PTR_A - PTR_B): New simplification.

gcc/testsuite/
	PR tree-optimization/94234
	* gcc.dg/pr94234.c: New test.
---
 gcc/match.pd                   | 19 +++++++++----------
 gcc/testsuite/gcc.dg/pr94234.c | 24 ++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr94234.c

Comments

Richard Biener June 2, 2020, 8:38 a.m. UTC | #1
On Mon, Jun 1, 2020 at 10:37 AM Feng Xue OS via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This patch is meant to add match rules to simplify patterns as:
>
> o. (pointer + offset_a) - (pointer + offset_b)   ->   (ptrdiff_t) (offset_a - offset_b)
> o. (pointer_a + offset) - (pointer_b + offset)   ->   (pointer_a - pointer_b)

You are also changing the existing pattern which IIRC tries to
preserve the undefinedness of overflow in offset_a - offset_b.
Without an explanation it's hard to guess why you think eliding
this conversion is correct.

Adding the TYPE_OVERFLOW_UNDEFINED guard also looks
odd - AFAICS overflow of the pointer type does not matter
but overflow of the generated minus?  Thus at least
a || !TYPE_OVERFLOW_UNDEFINED (type) would be
in order?

Thanks,
Richard.

> Bootstrapped/regtested on x86_64-linux and aarch64-linux.
>
> Feng
> ---
> 2020-06-01  Feng Xue  <fxue@os.amperecomputing.com>
>
> gcc/
>         PR tree-optimization/94234
>         * match.pd ((PTR + A) - (PTR + B)) -> (ptrdiff_t)(A - B): New
>         simplification.
>         * ((PTR_A + O) - (PTR_B + O)) -> (PTR_A - PTR_B): New simplification.
>
> gcc/testsuite/
>         PR tree-optimization/94234
>         * gcc.dg/pr94234.c: New test.
> ---
>  gcc/match.pd                   | 19 +++++++++----------
>  gcc/testsuite/gcc.dg/pr94234.c | 24 ++++++++++++++++++++++++
>  2 files changed, 33 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr94234.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 33ee1a920bf..6553be4822e 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -2515,16 +2515,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>              && TREE_CODE (@2) == INTEGER_CST
>              && tree_int_cst_sign_bit (@2) == 0))
>       (minus (convert @1) (convert @2)))))
> -   (simplify
> -    (pointer_diff (pointer_plus @@0 @1) (pointer_plus @0 @2))
> -    /* The second argument of pointer_plus must be interpreted as signed, and
> -       thus sign-extended if necessary.  */
> -    (with { tree stype = signed_type_for (TREE_TYPE (@1)); }
> -     /* Use view_convert instead of convert here, as POINTER_PLUS_EXPR
> -       second arg is unsigned even when we need to consider it as signed,
> -       we don't want to diagnose overflow here.  */
> -     (minus (convert (view_convert:stype @1))
> -           (convert (view_convert:stype @2)))))))
> +  (simplify
> +   (pointer_diff (pointer_plus@3 @0 @1) (pointer_plus @0 @2))
> +    (if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@3)))
> +      (convert (minus @1 @2))))
> +  (simplify
> +   (pointer_diff (pointer_plus@3 @0 @2) (pointer_plus @1 @2))
> +    (if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@3))
> +        && !integer_zerop (@2))
> +     (pointer_diff @0 @1)))))
>
>  /* (A * C) +- (B * C) -> (A+-B) * C and (A * C) +- A -> A * (C+-1).
>      Modeled after fold_plusminus_mult_expr.  */
> diff --git a/gcc/testsuite/gcc.dg/pr94234.c b/gcc/testsuite/gcc.dg/pr94234.c
> new file mode 100644
> index 00000000000..ef9076c80da
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr94234.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-ccp1" } */
> +
> +typedef __SIZE_TYPE__ size_t;
> +typedef __PTRDIFF_TYPE__ ptrdiff_t;
> +
> +ptrdiff_t foo (char *a, size_t n)
> +{
> +  char *b1 = a + 8 * n;
> +  char *b2 = a + 8 * (n - 1);
> +
> +  return b1 - b2;
> +}
> +
> +ptrdiff_t goo (char *a, size_t n, size_t m)
> +{
> +  char *b1 = a + 8 * n;
> +  char *b2 = a + 8 * (n + 1);
> +
> +  return (b1 + m) - (b2 + m);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "return 8;" 1 "ccp1" } } */
> +/* { dg-final { scan-tree-dump-times "return -8;" 1 "ccp1" } } */
Marc Glisse June 2, 2020, 10:24 p.m. UTC | #2
On Mon, 1 Jun 2020, Feng Xue OS via Gcc-patches wrote:

> 	* match.pd ((PTR + A) - (PTR + B)) -> (ptrdiff_t)(A - B): New
> 	simplification.

Not new, modified.

> 	* ((PTR_A + O) - (PTR_B + O)) -> (PTR_A - PTR_B): New simplification.

O might not be the best choice because of how close it looks to 0.

> -   (simplify
> -    (pointer_diff (pointer_plus @@0 @1) (pointer_plus @0 @2))
> -    /* The second argument of pointer_plus must be interpreted as signed, and
> -       thus sign-extended if necessary.  */
> -    (with { tree stype = signed_type_for (TREE_TYPE (@1)); }
> -     /* Use view_convert instead of convert here, as POINTER_PLUS_EXPR
> -	second arg is unsigned even when we need to consider it as signed,
> -	we don't want to diagnose overflow here.  */
> -     (minus (convert (view_convert:stype @1))
> -	    (convert (view_convert:stype @2)))))))
> +  (simplify
> +   (pointer_diff (pointer_plus@3 @0 @1) (pointer_plus @0 @2))
> +    (if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@3)))
> +      (convert (minus @1 @2))))

What don't you like about the existing transformation? You are replacing a 
transformation that always folds by one that folds only in some cases, and 
looses the information that some overflows cannot happen. That looks like 
it is making things worse from an optimization point of view. Do you 
consider the transformation as unsafe with -fsanitize=pointer-overflow 
(does that correspond to the case where TYPE_OVERFLOW_UNDEFINED is true 
for a pointer type?)?

Ah, looking at the PR, you decided to perform the operation as unsigned 
because that has fewer NOP conversions, which, in that particular testcase 
where the offsets are originally unsigned, means we simplify better. But I 
would expect it to regress other testcases (in particular if the offsets 
were originally signed). Also, changing the second argument of 
pointer_plus to be signed, as is supposed to eventually happen, would 
break your testcase again.

At the very least we want to keep a comment next to the transformation 
explaining the situation.

If there are platforms where the second argument of pointer_plus is a 
smaller type than the result of pointer_diff (can this happen? I keep 
forgetting all the weird things some platforms do), this version may do an 
unsafe zero-extension.

I think I would rather try to extend the transformation for A*C-B*C.

> +  (simplify
> +   (pointer_diff (pointer_plus@3 @0 @2) (pointer_plus @1 @2))
> +    (if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@3))
> +	 && !integer_zerop (@2))
> +     (pointer_diff @0 @1)))))

Why would it be a problem if @2 is 0? We should have already simplified 
(pointer_plus @i integer_zerop) to just @i before reaching this, but it 
doesn't make the transformation wrong.

Same remark as above for TYPE_OVERFLOW_UNDEFINED, is that for the sake of 
-fsanitize=pointer-overflow?
Feng Xue OS June 3, 2020, 8:39 a.m. UTC | #3
>> This patch is meant to add match rules to simplify patterns as:
>>
>> o. (pointer + offset_a) - (pointer + offset_b)   ->   (ptrdiff_t) (offset_a - offset_b)
>> o. (pointer_a + offset) - (pointer_b + offset)   ->   (pointer_a - pointer_b)

> You are also changing the existing pattern which IIRC tries to
> preserve the undefinedness of overflow in offset_a - offset_b.
> Without an explanation it's hard to guess why you think eliding
> this conversion is correct.
The old rule adds signed cast to both offset_b and offset_b, and does
minus on them, as

   (stype)offset_a - (stype)offset_b

Suppose that offset_a = (signed_int_max)UL, offset_b = 1UL, the trans
will generate overflow result, while original computation will not.

Alternative way is to add type cast after minus are done on offset_a and offset b.
And to avoid unsigned overflow warning, we should add a view_convert 
instead of convert, which was missed in my patch.

> Adding the TYPE_OVERFLOW_UNDEFINED guard also looks
> odd - AFAICS overflow of the pointer type does not matter
> but overflow of the generated minus?  Thus at least
> a || !TYPE_OVERFLOW_UNDEFINED (type) would be
> in order?
Yes, it is. will remove TYPE_OVERFLOW_UNDEFINED on pointer.

Thanks,
Feng
Feng Xue OS June 3, 2020, 9:12 a.m. UTC | #4
>>       * match.pd ((PTR + A) - (PTR + B)) -> (ptrdiff_t)(A - B): New
>>       simplification.

> Not new, modified.
OK.

>>       * ((PTR_A + O) - (PTR_B + O)) -> (PTR_A - PTR_B): New simplification.

> O might not be the best choice because of how close it looks to 0.
OK.

> What don't you like about the existing transformation? You are replacing a
> transformation that always folds by one that folds only in some cases, and
> looses the information that some overflows cannot happen. That looks like
> it is making things worse from an optimization point of view. Do you
> consider the transformation as unsafe with -fsanitize=pointer-overflow
> (does that correspond to the case where TYPE_OVERFLOW_UNDEFINED is true
> for a pointer type?)?
Yes. We should use !TYPE_OVERFLOW_SANITIZED, not TYPE_OVERFLOW_UNDEFINED.
But even for !TYPE_OVERFLOW_SANITIZED, some ptr_diff rules have the check, and some
do not. Here we could also remove it?

> Ah, looking at the PR, you decided to perform the operation as unsigned
> because that has fewer NOP conversions, which, in that particular testcase
> where the offsets are originally unsigned, means we simplify better. But I
> would expect it to regress other testcases (in particular if the offsets
> were originally signed). Also, changing the second argument of
> pointer_plus to be signed, as is supposed to eventually happen, would
> break your testcase again.
The old rule might produce overflow result (offset_a = (signed_int_max)UL, 
offset_b = 1UL). 

Additionally, (stype)(offset_a - offset_b) is more compact, there might be
further simplification opportunities on offset_a - offset_b, even it is not
in form of (A * C - B * C), for example (~A - 1 -> -A). But for old rule, we have
to introduce another rule as (T)A - (T)(B) -> (T)(A - B), which seems to
be too generic to benefit performance in all situations.

If the 2nd argument is signed, we can add a specific rule as your suggestion
(T)(A * C) - (T)(B * C) -> (T) (A - B) * C.

> At the very least we want to keep a comment next to the transformation
> explaining the situation.

> If there are platforms where the second argument of pointer_plus is a
> smaller type than the result of pointer_diff (can this happen? I keep
> forgetting all the weird things some platforms do), this version may do an
> unsafe zero-extension.
If the 2nd argument is a smaller type, this might bring confuse semantic to
pointer_plus operator. Suppose the type is a (unsigned) char, the expression
"ptr + ((char) -1)" represents ptr + 255 or ptr - 1?

Regards,
Feng
Marc Glisse June 3, 2020, 2:32 p.m. UTC | #5
On Wed, 3 Jun 2020, Feng Xue OS via Gcc-patches wrote:

>> Ah, looking at the PR, you decided to perform the operation as unsigned
>> because that has fewer NOP conversions, which, in that particular testcase
>> where the offsets are originally unsigned, means we simplify better. But I
>> would expect it to regress other testcases (in particular if the offsets
>> were originally signed). Also, changing the second argument of
>> pointer_plus to be signed, as is supposed to eventually happen, would
>> break your testcase again.
> The old rule might produce overflow result (offset_a = (signed_int_max)UL, 
> offset_b = 1UL).

signed_int_max-1 does not overflow. But the point is that pointer_plus / 
pointer_diff are defined in a way that if that subtraction would overflow, 
then one of the pointer_plus or pointed_diff would have been undefined 
already. In particular, you cannot have objects larger than half the 
address space, and pointer_plus/pointer_diff have to remain inside an 
object. Doing the subtraction in a signed type keeps (part of) that 
information.

> Additionally, (stype)(offset_a - offset_b) is more compact,

Not if offset_a comes from (utype)a and offset_b from (utype)b with a and 
b signed. Using size_t indices as in the bugzilla testcase is not 
recommended practice. Change it to ssize_t, and we do optimize the 
testcase in CCP1 already.

> there might be
> further simplification opportunities on offset_a - offset_b, even it is not
> in form of (A * C - B * C), for example (~A - 1 -> -A). But for old rule, we have
> to introduce another rule as (T)A - (T)(B) -> (T)(A - B), which seems to
> be too generic to benefit performance in all situations.

Sadly, conversions complicate optimizations and are all over the place, we 
need to handle them in more places. I sometimes dream of getting rid of 
NOP conversions, and having a single PLUS_EXPR with some kind of flag 
saying if it can wrap/saturate/trap when seen as a signed/unsigned 
operation, i.e. push the information on the operations instead of objects.

> If the 2nd argument is signed, we can add a specific rule as your suggestion
> (T)(A * C) - (T)(B * C) -> (T) (A - B) * C.
>
>> At the very least we want to keep a comment next to the transformation
>> explaining the situation.
>
>> If there are platforms where the second argument of pointer_plus is a
>> smaller type than the result of pointer_diff (can this happen? I keep
>> forgetting all the weird things some platforms do), this version may do an
>> unsafe zero-extension.
> If the 2nd argument is a smaller type, this might bring confuse semantic to
> pointer_plus operator. Suppose the type is a (unsigned) char, the expression
> "ptr + ((char) -1)" represents ptr + 255 or ptr - 1?

(pointer_plus ptr 255) would mean ptr - 1 on a platform where the second 
argument of pointer_plus has size 1 byte.


Do note that I am not a reviewer, what I say isn't final.
Richard Biener June 4, 2020, 8:30 a.m. UTC | #6
On Wed, Jun 3, 2020 at 4:33 PM Marc Glisse <marc.glisse@inria.fr> wrote:
>
> On Wed, 3 Jun 2020, Feng Xue OS via Gcc-patches wrote:
>
> >> Ah, looking at the PR, you decided to perform the operation as unsigned
> >> because that has fewer NOP conversions, which, in that particular testcase
> >> where the offsets are originally unsigned, means we simplify better. But I
> >> would expect it to regress other testcases (in particular if the offsets
> >> were originally signed). Also, changing the second argument of
> >> pointer_plus to be signed, as is supposed to eventually happen, would
> >> break your testcase again.
> > The old rule might produce overflow result (offset_a = (signed_int_max)UL,
> > offset_b = 1UL).
>
> signed_int_max-1 does not overflow. But the point is that pointer_plus /
> pointer_diff are defined in a way that if that subtraction would overflow,
> then one of the pointer_plus or pointed_diff would have been undefined
> already. In particular, you cannot have objects larger than half the
> address space, and pointer_plus/pointer_diff have to remain inside an
> object. Doing the subtraction in a signed type keeps (part of) that
> information.
>
> > Additionally, (stype)(offset_a - offset_b) is more compact,
>
> Not if offset_a comes from (utype)a and offset_b from (utype)b with a and
> b signed. Using size_t indices as in the bugzilla testcase is not
> recommended practice. Change it to ssize_t, and we do optimize the
> testcase in CCP1 already.
>
> > there might be
> > further simplification opportunities on offset_a - offset_b, even it is not
> > in form of (A * C - B * C), for example (~A - 1 -> -A). But for old rule, we have
> > to introduce another rule as (T)A - (T)(B) -> (T)(A - B), which seems to
> > be too generic to benefit performance in all situations.
>
> Sadly, conversions complicate optimizations and are all over the place, we
> need to handle them in more places. I sometimes dream of getting rid of
> NOP conversions, and having a single PLUS_EXPR with some kind of flag
> saying if it can wrap/saturate/trap when seen as a signed/unsigned
> operation, i.e. push the information on the operations instead of objects.
>
> > If the 2nd argument is signed, we can add a specific rule as your suggestion
> > (T)(A * C) - (T)(B * C) -> (T) (A - B) * C.
> >
> >> At the very least we want to keep a comment next to the transformation
> >> explaining the situation.
> >
> >> If there are platforms where the second argument of pointer_plus is a
> >> smaller type than the result of pointer_diff (can this happen? I keep
> >> forgetting all the weird things some platforms do), this version may do an
> >> unsafe zero-extension.
> > If the 2nd argument is a smaller type, this might bring confuse semantic to
> > pointer_plus operator. Suppose the type is a (unsigned) char, the expression
> > "ptr + ((char) -1)" represents ptr + 255 or ptr - 1?
>
> (pointer_plus ptr 255) would mean ptr - 1 on a platform where the second
> argument of pointer_plus has size 1 byte.

Yes.  Note this is the reason for using a signed type for the minus as comment

    /* The second argument of pointer_plus must be interpreted as signed, and
       thus sign-extended if necessary.  */

explains.  That the type of the offset operand in a pointer-plus is always
unsigned is semantically incorrect.  Whenever taken out of context you
have to re-interpret the offset value as a signed entity.

And yes, there do exist targets where pointers are larger than offsets.
The size of pointers (ptr_mode) is specified by POINTER_SIZE
while the pointer-plus offset type is SIZETYPE (not SIZE_TYPE,
the C ptrdiff_t type is derived from SIZE_TYPE).

As Marc said for this particular reason pointer-plus offsets should
be instead forced to have signed type [or not forced a particular
sign and precision so the operands sign specifies how to extend it].
But it's far from a trivial task to rectify this IL mistake.

Richard.

>
>
> Do note that I am not a reviewer, what I say isn't final.
>
> --
> Marc Glisse
Feng Xue OS June 5, 2020, 9:20 a.m. UTC | #7
As Marc suggested, removed the new pointer_diff rule, and add another rule to fold
convert-add expression. This new rule is:

  (T)(A * C) +- (T)(B * C) -> (T) ((A +- B) * C)

Regards,
Feng

---
2020-06-01  Feng Xue  <fxue@os.amperecomputing.com>

gcc/
        PR tree-optimization/94234
        * match.pd ((T)(A * C) +- (T)(B * C)) -> (T)((A +- B) * C): New
        simplification.
        * ((PTR_A + OFF) - (PTR_B + OFF)) -> (PTR_A - PTR_B): New
        simplification.

gcc/testsuite/
        PR tree-optimization/94234
        * gcc.dg/pr94234.c: New test.
---
 gcc/match.pd                   | 28 ++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/pr94234.c | 24 ++++++++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr94234.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 33ee1a920bf..4f340bfe40a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2515,6 +2515,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
             && TREE_CODE (@2) == INTEGER_CST
             && tree_int_cst_sign_bit (@2) == 0))
      (minus (convert @1) (convert @2)))))
+   (simplify
+    (pointer_diff (pointer_plus @0 @2) (pointer_plus @1 @2))
+     (pointer_diff @0 @1))
    (simplify
     (pointer_diff (pointer_plus @@0 @1) (pointer_plus @0 @2))
     /* The second argument of pointer_plus must be interpreted as signed, and
@@ -2526,6 +2529,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      (minus (convert (view_convert:stype @1))
            (convert (view_convert:stype @2)))))))

+/* (T)(A * C) +- (T)(B * C) -> (T)((A +- B) * C) and
+   (T)(A * C) +- (T)(A) -> (T)(A * (C +- 1)). */
+(if (INTEGRAL_TYPE_P (type))
+ (for plusminus (plus minus)
+  (simplify
+   (plusminus (convert:s (mult:cs @0 @1)) (convert:s (mult:cs @0 @2)))
+   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
+       && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
+       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+    (convert (mult (plusminus @1 @2) @0))))
+  (simplify
+   (plusminus (convert @0) (convert@2 (mult:c@3 @0 @1)))
+   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
+       && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
+       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
+       && single_use (@2) && single_use (@3))
+    (convert (mult (plusminus { build_one_cst (TREE_TYPE (@1)); } @1) @0))))
+  (simplify
+   (plusminus (convert@2 (mult:c@3 @0 @1)) (convert @0))
+   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
+       && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
+       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
+       && single_use (@2) && single_use (@3))
+    (convert (mult (plusminus @1 { build_one_cst (TREE_TYPE (@1)); }) @0))))))
+
 /* (A * C) +- (B * C) -> (A+-B) * C and (A * C) +- A -> A * (C+-1).
     Modeled after fold_plusminus_mult_expr.  */
 (if (!TYPE_SATURATING (type)
diff --git a/gcc/testsuite/gcc.dg/pr94234.c b/gcc/testsuite/gcc.dg/pr94234.c
new file mode 100644
index 00000000000..3f7c7a5e58f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr94234.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
+
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+ptrdiff_t foo (char *a, size_t n)
+{
+  char *b1 = a + 8 * n;
+  char *b2 = a + 8 * (n - 1);
+
+  return b1 - b2;
+}
+
+ptrdiff_t goo (char *a, size_t n, size_t m)
+{
+  char *b1 = a + 8 * n;
+  char *b2 = a + 8 * (n + 1);
+
+  return (b1 + m) - (b2 + m);
+}
+
+/* { dg-final { scan-tree-dump-times "return 8;" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "return -8;" 1 "forwprop1" } } */
--
Richard Biener June 15, 2020, 7:41 a.m. UTC | #8
On Fri, Jun 5, 2020 at 11:20 AM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> As Marc suggested, removed the new pointer_diff rule, and add another rule to fold
> convert-add expression. This new rule is:
>
>   (T)(A * C) +- (T)(B * C) -> (T) ((A +- B) * C)
>
> Regards,
> Feng
>
> ---
> 2020-06-01  Feng Xue  <fxue@os.amperecomputing.com>
>
> gcc/
>         PR tree-optimization/94234
>         * match.pd ((T)(A * C) +- (T)(B * C)) -> (T)((A +- B) * C): New
>         simplification.
>         * ((PTR_A + OFF) - (PTR_B + OFF)) -> (PTR_A - PTR_B): New
>         simplification.
>
> gcc/testsuite/
>         PR tree-optimization/94234
>         * gcc.dg/pr94234.c: New test.
> ---
>  gcc/match.pd                   | 28 ++++++++++++++++++++++++++++
>  gcc/testsuite/gcc.dg/pr94234.c | 24 ++++++++++++++++++++++++
>  2 files changed, 52 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr94234.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 33ee1a920bf..4f340bfe40a 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -2515,6 +2515,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>              && TREE_CODE (@2) == INTEGER_CST
>              && tree_int_cst_sign_bit (@2) == 0))
>       (minus (convert @1) (convert @2)))))
> +   (simplify
> +    (pointer_diff (pointer_plus @0 @2) (pointer_plus @1 @2))
> +     (pointer_diff @0 @1))

This new pattern is OK.  Please commit it separately.

>     (simplify
>      (pointer_diff (pointer_plus @@0 @1) (pointer_plus @0 @2))
>      /* The second argument of pointer_plus must be interpreted as signed, and
> @@ -2526,6 +2529,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>       (minus (convert (view_convert:stype @1))
>             (convert (view_convert:stype @2)))))))
>
> +/* (T)(A * C) +- (T)(B * C) -> (T)((A +- B) * C) and
> +   (T)(A * C) +- (T)(A) -> (T)(A * (C +- 1)). */
> +(if (INTEGRAL_TYPE_P (type))
> + (for plusminus (plus minus)
> +  (simplify
> +   (plusminus (convert:s (mult:cs @0 @1)) (convert:s (mult:cs @0 @2)))
> +   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
> +       && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
> +       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
> +    (convert (mult (plusminus @1 @2) @0))))
> +  (simplify
> +   (plusminus (convert @0) (convert@2 (mult:c@3 @0 @1)))
> +   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
> +       && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
> +       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
> +       && single_use (@2) && single_use (@3))
> +    (convert (mult (plusminus { build_one_cst (TREE_TYPE (@1)); } @1) @0))))
> +  (simplify
> +   (plusminus (convert@2 (mult:c@3 @0 @1)) (convert @0))
> +   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
> +       && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
> +       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
> +       && single_use (@2) && single_use (@3))
> +    (convert (mult (plusminus @1 { build_one_cst (TREE_TYPE (@1)); }) @0))))))
> +

This shows the limit of pattern matching IMHO.  I'm also not convinced
it gets the
overflow cases correct (but I didn't spend too much time here).  Note we have
similar functionality implemented in fold_plusminus_mult_expr.  IMHO instead
of doing the above moving fold_plusminus_mult_expr to GIMPLE by executing
it from inside the forwprop pass would make more sense.  Or finally biting the
bullet and try to teach reassociation about how to handle signed arithmetic
with non-wrapping overflow behavior.

Richard.

>  /* (A * C) +- (B * C) -> (A+-B) * C and (A * C) +- A -> A * (C+-1).
>      Modeled after fold_plusminus_mult_expr.  */
>  (if (!TYPE_SATURATING (type)
> diff --git a/gcc/testsuite/gcc.dg/pr94234.c b/gcc/testsuite/gcc.dg/pr94234.c
> new file mode 100644
> index 00000000000..3f7c7a5e58f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr94234.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-forwprop1" } */
> +
> +typedef __SIZE_TYPE__ size_t;
> +typedef __PTRDIFF_TYPE__ ptrdiff_t;
> +
> +ptrdiff_t foo (char *a, size_t n)
> +{
> +  char *b1 = a + 8 * n;
> +  char *b2 = a + 8 * (n - 1);
> +
> +  return b1 - b2;
> +}
> +
> +ptrdiff_t goo (char *a, size_t n, size_t m)
> +{
> +  char *b1 = a + 8 * n;
> +  char *b2 = a + 8 * (n + 1);
> +
> +  return (b1 + m) - (b2 + m);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "return 8;" 1 "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-times "return -8;" 1 "forwprop1" } } */
> --
>
>
> ________________________________________
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Thursday, June 4, 2020 4:30 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Feng Xue OS
> Subject: Re: [PATCH] Add pattern for pointer-diff on addresses with same base/offset (PR 94234)
>
> On Wed, Jun 3, 2020 at 4:33 PM Marc Glisse <marc.glisse@inria.fr> wrote:
> >
> > On Wed, 3 Jun 2020, Feng Xue OS via Gcc-patches wrote:
> >
> > >> Ah, looking at the PR, you decided to perform the operation as unsigned
> > >> because that has fewer NOP conversions, which, in that particular testcase
> > >> where the offsets are originally unsigned, means we simplify better. But I
> > >> would expect it to regress other testcases (in particular if the offsets
> > >> were originally signed). Also, changing the second argument of
> > >> pointer_plus to be signed, as is supposed to eventually happen, would
> > >> break your testcase again.
> > > The old rule might produce overflow result (offset_a = (signed_int_max)UL,
> > > offset_b = 1UL).
> >
> > signed_int_max-1 does not overflow. But the point is that pointer_plus /
> > pointer_diff are defined in a way that if that subtraction would overflow,
> > then one of the pointer_plus or pointed_diff would have been undefined
> > already. In particular, you cannot have objects larger than half the
> > address space, and pointer_plus/pointer_diff have to remain inside an
> > object. Doing the subtraction in a signed type keeps (part of) that
> > information.
> >
> > > Additionally, (stype)(offset_a - offset_b) is more compact,
> >
> > Not if offset_a comes from (utype)a and offset_b from (utype)b with a and
> > b signed. Using size_t indices as in the bugzilla testcase is not
> > recommended practice. Change it to ssize_t, and we do optimize the
> > testcase in CCP1 already.
> >
> > > there might be
> > > further simplification opportunities on offset_a - offset_b, even it is not
> > > in form of (A * C - B * C), for example (~A - 1 -> -A). But for old rule, we have
> > > to introduce another rule as (T)A - (T)(B) -> (T)(A - B), which seems to
> > > be too generic to benefit performance in all situations.
> >
> > Sadly, conversions complicate optimizations and are all over the place, we
> > need to handle them in more places. I sometimes dream of getting rid of
> > NOP conversions, and having a single PLUS_EXPR with some kind of flag
> > saying if it can wrap/saturate/trap when seen as a signed/unsigned
> > operation, i.e. push the information on the operations instead of objects.
> >
> > > If the 2nd argument is signed, we can add a specific rule as your suggestion
> > > (T)(A * C) - (T)(B * C) -> (T) (A - B) * C.
> > >
> > >> At the very least we want to keep a comment next to the transformation
> > >> explaining the situation.
> > >
> > >> If there are platforms where the second argument of pointer_plus is a
> > >> smaller type than the result of pointer_diff (can this happen? I keep
> > >> forgetting all the weird things some platforms do), this version may do an
> > >> unsafe zero-extension.
> > > If the 2nd argument is a smaller type, this might bring confuse semantic to
> > > pointer_plus operator. Suppose the type is a (unsigned) char, the expression
> > > "ptr + ((char) -1)" represents ptr + 255 or ptr - 1?
> >
> > (pointer_plus ptr 255) would mean ptr - 1 on a platform where the second
> > argument of pointer_plus has size 1 byte.
>
> Yes.  Note this is the reason for using a signed type for the minus as comment
>
>     /* The second argument of pointer_plus must be interpreted as signed, and
>        thus sign-extended if necessary.  */
>
> explains.  That the type of the offset operand in a pointer-plus is always
> unsigned is semantically incorrect.  Whenever taken out of context you
> have to re-interpret the offset value as a signed entity.
>
> And yes, there do exist targets where pointers are larger than offsets.
> The size of pointers (ptr_mode) is specified by POINTER_SIZE
> while the pointer-plus offset type is SIZETYPE (not SIZE_TYPE,
> the C ptrdiff_t type is derived from SIZE_TYPE).
>
> As Marc said for this particular reason pointer-plus offsets should
> be instead forced to have signed type [or not forced a particular
> sign and precision so the operands sign specifies how to extend it].
> But it's far from a trivial task to rectify this IL mistake.
>
> Richard.
>
> >
> >
> > Do note that I am not a reviewer, what I say isn't final.
> >
> > --
> > Marc Glisse
Feng Xue OS June 16, 2020, 6:08 a.m. UTC | #9
Here is an question about pointer operation: 

Pointer is treated as unsigned in comparison operation, while distance between
pointers is signed. Then we can not assume the below conclusion is true?

 (ptr_a > ptr_b)     =>     (ptr_a - ptr_b) >= 0

Thanks,
Feng
Marc Glisse June 16, 2020, 7:10 a.m. UTC | #10
On Tue, 16 Jun 2020, Feng Xue OS wrote:

> Here is an question about pointer operation: 
>
> Pointer is treated as unsigned in comparison operation, while distance between
> pointers is signed. Then we can not assume the below conclusion is true?
>
> (ptr_a > ptr_b)     =>     (ptr_a - ptr_b) >= 0

Yes you can. It is illegal to use either expression if ptr_a and ptr_b do 
not point inside the same object, and objects are not allowed to be larger 
than half the address space.

Comparing arbitrary pointers (not in the same object) is done with 
(uintptr_t)ptr_a > (uintptr_t)ptr_b.
diff mbox series

Patch

From 160eaeb151197844005837dc4b8e1e27bb6dfadf Mon Sep 17 00:00:00 2001
From: Feng Xue <fxue@os.amperecomputing.com>
Date: Mon, 1 Jun 2020 11:57:35 +0800
Subject: [PATCH] tree-optimization/94234 - add ptr-diff pattern for addresses
 with same base or offset

2020-06-01  Feng Xue  <fxue@os.amperecomputing.com>

gcc/
	PR tree-optimization/94234
	* match.pd ((PTR + A) - (PTR + B)) -> (ptrdiff_t)(A - B): New
	simplification.
	* ((PTR_A + O) - (PTR_B + O)) -> (PTR_A - PTR_B): New simplification.

gcc/testsuite/
	PR tree-optimization/94234
	* gcc.dg/pr94234.c: New test.
---
 gcc/match.pd                   | 19 +++++++++----------
 gcc/testsuite/gcc.dg/pr94234.c | 24 ++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr94234.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 33ee1a920bf..6553be4822e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2515,16 +2515,15 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	     && TREE_CODE (@2) == INTEGER_CST
 	     && tree_int_cst_sign_bit (@2) == 0))
      (minus (convert @1) (convert @2)))))
-   (simplify
-    (pointer_diff (pointer_plus @@0 @1) (pointer_plus @0 @2))
-    /* The second argument of pointer_plus must be interpreted as signed, and
-       thus sign-extended if necessary.  */
-    (with { tree stype = signed_type_for (TREE_TYPE (@1)); }
-     /* Use view_convert instead of convert here, as POINTER_PLUS_EXPR
-	second arg is unsigned even when we need to consider it as signed,
-	we don't want to diagnose overflow here.  */
-     (minus (convert (view_convert:stype @1))
-	    (convert (view_convert:stype @2)))))))
+  (simplify
+   (pointer_diff (pointer_plus@3 @0 @1) (pointer_plus @0 @2))
+    (if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@3)))
+      (convert (minus @1 @2))))
+  (simplify
+   (pointer_diff (pointer_plus@3 @0 @2) (pointer_plus @1 @2))
+    (if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@3))
+	 && !integer_zerop (@2))
+     (pointer_diff @0 @1)))))
 
 /* (A * C) +- (B * C) -> (A+-B) * C and (A * C) +- A -> A * (C+-1).
     Modeled after fold_plusminus_mult_expr.  */
diff --git a/gcc/testsuite/gcc.dg/pr94234.c b/gcc/testsuite/gcc.dg/pr94234.c
new file mode 100644
index 00000000000..ef9076c80da
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr94234.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ccp1" } */ 
+
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+ptrdiff_t foo (char *a, size_t n)
+{
+  char *b1 = a + 8 * n;
+  char *b2 = a + 8 * (n - 1);
+
+  return b1 - b2;
+}
+
+ptrdiff_t goo (char *a, size_t n, size_t m)
+{
+  char *b1 = a + 8 * n;
+  char *b2 = a + 8 * (n + 1);
+
+  return (b1 + m) - (b2 + m);
+}
+
+/* { dg-final { scan-tree-dump-times "return 8;" 1 "ccp1" } } */
+/* { dg-final { scan-tree-dump-times "return -8;" 1 "ccp1" } } */
-- 
2.17.1