diff mbox

Do not handle SUBREG in apply_distributive_law (Re: RFC: allowing fwprop to propagate subregs)

Message ID 201203071740.q27He4YE009342@d06av02.portsmouth.uk.ibm.com
State New
Headers show

Commit Message

Ulrich Weigand March 7, 2012, 5:40 p.m. UTC
Richard Kenner wrote:

> > Given the current set of results, since I do not have any way to verify
> > whether my simplify_set changes would actually trigger correctly, I'd
> > rather propose to just remove the SUBREG case in apply_distributive_law
> > (i.e. only apply the first patch below).
> > 
> > Thoughts?
> 
> I think that's reasonable.  But I'd replace it with a comment saying
> what used to be there and why it was removed.

Now that we're back in stage 1, I'd like to try and move forward with
this again.  Here's a patch that implements your suggestion.

Tested on arm-linux-gnueabi, i386-linux-gnu and s390-linux-gnu.

OK for mainline?

Bye,
Ulrich

2012-03-07  Ulrich Weigand  <ulrich.weigand@linaro.org>

	gcc/
	* combine.c (apply_distributive_law): Do not distribute SUBREG.

Comments

Richard Biener March 12, 2012, 10:22 a.m. UTC | #1
On Wed, Mar 7, 2012 at 6:40 PM, Ulrich Weigand <uweigand@de.ibm.com> wrote:
> Richard Kenner wrote:
>
>> > Given the current set of results, since I do not have any way to verify
>> > whether my simplify_set changes would actually trigger correctly, I'd
>> > rather propose to just remove the SUBREG case in apply_distributive_law
>> > (i.e. only apply the first patch below).
>> >
>> > Thoughts?
>>
>> I think that's reasonable.  But I'd replace it with a comment saying
>> what used to be there and why it was removed.
>
> Now that we're back in stage 1, I'd like to try and move forward with
> this again.  Here's a patch that implements your suggestion.
>
> Tested on arm-linux-gnueabi, i386-linux-gnu and s390-linux-gnu.
>
> OK for mainline?

Ok.

Thanks,
Richard.

> Bye,
> Ulrich
>
> 2012-03-07  Ulrich Weigand  <ulrich.weigand@linaro.org>
>
>        gcc/
>        * combine.c (apply_distributive_law): Do not distribute SUBREG.
>
> === modified file 'gcc/combine.c'
> --- gcc/combine.c       2012-02-07 15:48:52 +0000
> +++ gcc/combine.c       2012-02-22 11:57:19 +0000
> @@ -9286,36 +9286,22 @@
>       /* This is also a multiply, so it distributes over everything.  */
>       break;
>
> -    case SUBREG:
> -      /* Non-paradoxical SUBREGs distributes over all operations,
> -        provided the inner modes and byte offsets are the same, this
> -        is an extraction of a low-order part, we don't convert an fp
> -        operation to int or vice versa, this is not a vector mode,
> -        and we would not be converting a single-word operation into a
> -        multi-word operation.  The latter test is not required, but
> -        it prevents generating unneeded multi-word operations.  Some
> -        of the previous tests are redundant given the latter test,
> -        but are retained because they are required for correctness.
> -
> -        We produce the result slightly differently in this case.  */
> -
> -      if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs))
> -         || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs)
> -         || ! subreg_lowpart_p (lhs)
> -         || (GET_MODE_CLASS (GET_MODE (lhs))
> -             != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs))))
> -         || paradoxical_subreg_p (lhs)
> -         || VECTOR_MODE_P (GET_MODE (lhs))
> -         || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD
> -         /* Result might need to be truncated.  Don't change mode if
> -            explicit truncation is needed.  */
> -         || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x),
> -                                            GET_MODE (SUBREG_REG (lhs))))
> -       return x;
> -
> -      tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)),
> -                                SUBREG_REG (lhs), SUBREG_REG (rhs));
> -      return gen_lowpart (GET_MODE (x), tem);
> +    /* This used to handle SUBREG, but this turned out to be counter-
> +       productive, since (subreg (op ...)) usually is not handled by
> +       insn patterns, and this "optimization" therefore transformed
> +       recognizable patterns into unrecognizable ones.  Therefore the
> +       SUBREG case was removed from here.
> +
> +       It is possible that distributing SUBREG over arithmetic operations
> +       leads to an intermediate result than can then be optimized further,
> +       e.g. by moving the outer SUBREG to the other side of a SET as done
> +       in simplify_set.  This seems to have been the original intent of
> +       handling SUBREGs here.
> +
> +       However, with current GCC this does not appear to actually happen,
> +       at least on major platforms.  If some case is found where removing
> +       the SUBREG case here prevents follow-on optimizations, distributing
> +       SUBREGs ought to be re-added at that place, e.g. in simplify_set.  */
>
>     default:
>       return x;
>
>
>
> --
>  Dr. Ulrich Weigand
>  GNU Toolchain for Linux on System z and Cell BE
>  Ulrich.Weigand@de.ibm.com
>
diff mbox

Patch

=== modified file 'gcc/combine.c'
--- gcc/combine.c	2012-02-07 15:48:52 +0000
+++ gcc/combine.c	2012-02-22 11:57:19 +0000
@@ -9286,36 +9286,22 @@ 
       /* This is also a multiply, so it distributes over everything.  */
       break;
 
-    case SUBREG:
-      /* Non-paradoxical SUBREGs distributes over all operations,
-	 provided the inner modes and byte offsets are the same, this
-	 is an extraction of a low-order part, we don't convert an fp
-	 operation to int or vice versa, this is not a vector mode,
-	 and we would not be converting a single-word operation into a
-	 multi-word operation.  The latter test is not required, but
-	 it prevents generating unneeded multi-word operations.  Some
-	 of the previous tests are redundant given the latter test,
-	 but are retained because they are required for correctness.
-
-	 We produce the result slightly differently in this case.  */
-
-      if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs))
-	  || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs)
-	  || ! subreg_lowpart_p (lhs)
-	  || (GET_MODE_CLASS (GET_MODE (lhs))
-	      != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs))))
-	  || paradoxical_subreg_p (lhs)
-	  || VECTOR_MODE_P (GET_MODE (lhs))
-	  || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD
-	  /* Result might need to be truncated.  Don't change mode if
-	     explicit truncation is needed.  */
-	  || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x),
-					     GET_MODE (SUBREG_REG (lhs))))
-	return x;
-
-      tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)),
-				 SUBREG_REG (lhs), SUBREG_REG (rhs));
-      return gen_lowpart (GET_MODE (x), tem);
+    /* This used to handle SUBREG, but this turned out to be counter-
+       productive, since (subreg (op ...)) usually is not handled by
+       insn patterns, and this "optimization" therefore transformed
+       recognizable patterns into unrecognizable ones.  Therefore the
+       SUBREG case was removed from here.
+
+       It is possible that distributing SUBREG over arithmetic operations
+       leads to an intermediate result than can then be optimized further,
+       e.g. by moving the outer SUBREG to the other side of a SET as done
+       in simplify_set.  This seems to have been the original intent of
+       handling SUBREGs here.
+
+       However, with current GCC this does not appear to actually happen,
+       at least on major platforms.  If some case is found where removing
+       the SUBREG case here prevents follow-on optimizations, distributing
+       SUBREGs ought to be re-added at that place, e.g. in simplify_set.  */
 
     default:
       return x;