Decrease compile time memory with heavy find_base_{value,term} on i?86/x86_64 (PR rtl-optimization/63191, take 2)

Submitted by Jakub Jelinek on March 20, 2017, 11:15 a.m.

Details

Message ID 20170320111551.GE11094@tucnak
State New
Headers show

Commit Message

Jakub Jelinek March 20, 2017, 11:15 a.m.
Hi!

On Fri, Mar 10, 2017 at 07:57:39PM +0100, Jakub Jelinek wrote:
> On Fri, Mar 10, 2017 at 07:52:37PM +0100, Bernd Schmidt wrote:
> > On 03/10/2017 06:53 PM, Jakub Jelinek wrote:
> > > +
> > > +template <bool base_term_p>
> > > +static inline rtx
> > > +ix86_delegitimize_address_tmpl (rtx x)
> > >  {
> > 
> > Why is this a template and not a function arg?
> 
> That was just an attempt to ensure that the compiler actually
> either inlines it, or doesn't share code between the two versions, so the
> base_term_p argument isn't checked at runtime.
> But, as I said, I have no problem changing that, i.e.
> remove the template line, s/_tmpl/_1/, add , bool base_term_p
> and tweak both callers.

Here is the other variant of the patch with inline function instead of
template.  Also bootstrapped/regtested on x86_64-linux and i686-linux, ok
for trunk (or is the other patch ok)?

2017-03-20  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/63191
	* config/i386/i386.c (ix86_delegitimize_address): Turn into small
	wrapper function, moved the whole old content into ...
	(ix86_delegitimize_address_1): ... this.  New inline function.
	(ix86_find_base_term): Use ix86_delegitimize_address_1 with
	true as last argument instead of ix86_delegitimize_address.



	Jakub

Comments

Uros Bizjak March 22, 2017, 3:38 p.m.
On Mon, Mar 20, 2017 at 12:15 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> On Fri, Mar 10, 2017 at 07:57:39PM +0100, Jakub Jelinek wrote:
>> On Fri, Mar 10, 2017 at 07:52:37PM +0100, Bernd Schmidt wrote:
>> > On 03/10/2017 06:53 PM, Jakub Jelinek wrote:
>> > > +
>> > > +template <bool base_term_p>
>> > > +static inline rtx
>> > > +ix86_delegitimize_address_tmpl (rtx x)
>> > >  {
>> >
>> > Why is this a template and not a function arg?
>>
>> That was just an attempt to ensure that the compiler actually
>> either inlines it, or doesn't share code between the two versions, so the
>> base_term_p argument isn't checked at runtime.
>> But, as I said, I have no problem changing that, i.e.
>> remove the template line, s/_tmpl/_1/, add , bool base_term_p
>> and tweak both callers.
>
> Here is the other variant of the patch with inline function instead of
> template.  Also bootstrapped/regtested on x86_64-linux and i686-linux, ok
> for trunk (or is the other patch ok)?
>
> 2017-03-20  Jakub Jelinek  <jakub@redhat.com>
>
>         PR rtl-optimization/63191
>         * config/i386/i386.c (ix86_delegitimize_address): Turn into small
>         wrapper function, moved the whole old content into ...
>         (ix86_delegitimize_address_1): ... this.  New inline function.
>         (ix86_find_base_term): Use ix86_delegitimize_address_1 with
>         true as last argument instead of ix86_delegitimize_address.

LGTM, but I don't want to step on Bernd's toes, so let's wait for his opinion.

Thanks,
Uros.

> --- gcc/config/i386/i386.c.jj   2017-03-07 20:04:52.000000000 +0100
> +++ gcc/config/i386/i386.c      2017-03-10 14:46:24.351775710 +0100
> @@ -17255,10 +17255,16 @@ ix86_delegitimize_tls_address (rtx orig_
>     has a different PIC label for each routine but the DWARF debugging
>     information is not associated with any particular routine, so it's
>     necessary to remove references to the PIC label from RTL stored by
> -   the DWARF output code.  */
> +   the DWARF output code.
>
> -static rtx
> -ix86_delegitimize_address (rtx x)
> +   This helper is used in the normal ix86_delegitimize_address
> +   entrypoint (e.g. used in the target delegitimization hook) and
> +   in ix86_find_base_term.  As compile time memory optimization, we
> +   avoid allocating rtxes that will not change anything on the outcome
> +   of the callers (find_base_value and find_base_term).  */
> +
> +static inline rtx
> +ix86_delegitimize_address_1 (rtx x, bool base_term_p)
>  {
>    rtx orig_x = delegitimize_mem_from_attrs (x);
>    /* addend is NULL or some rtx if x is something+GOTOFF where
> @@ -17285,6 +17291,10 @@ ix86_delegitimize_address (rtx x)
>            && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
>            && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
>          {
> +         /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
> +            base.  A CONST can't be arg_pointer_rtx based.  */
> +         if (base_term_p && MEM_P (orig_x))
> +           return orig_x;
>           rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
>           x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
>           if (MEM_P (orig_x))
> @@ -17361,7 +17371,9 @@ ix86_delegitimize_address (rtx x)
>    if (! result)
>      return ix86_delegitimize_tls_address (orig_x);
>
> -  if (const_addend)
> +  /* For (PLUS something CONST_INT) both find_base_{value,term} just
> +     recurse on the first operand.  */
> +  if (const_addend && !base_term_p)
>      result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
>    if (reg_addend)
>      result = gen_rtx_PLUS (Pmode, reg_addend, result);
> @@ -17399,6 +17411,14 @@ ix86_delegitimize_address (rtx x)
>    return result;
>  }
>
> +/* The normal instantiation of the above template.  */
> +
> +static rtx
> +ix86_delegitimize_address (rtx x)
> +{
> +  return ix86_delegitimize_address_1 (x, false);
> +}
> +
>  /* If X is a machine specific address (i.e. a symbol or label being
>     referenced as a displacement from the GOT implemented using an
>     UNSPEC), then return the base term.  Otherwise return X.  */
> @@ -17424,7 +17444,7 @@ ix86_find_base_term (rtx x)
>        return XVECEXP (term, 0, 0);
>      }
>
> -  return ix86_delegitimize_address (x);
> +  return ix86_delegitimize_address_1 (x, true);
>  }
>
>  static void
>
>
>         Jakub
Bernd Schmidt March 22, 2017, 6:08 p.m.
On 03/22/2017 04:38 PM, Uros Bizjak wrote:
>
> LGTM, but I don't want to step on Bernd's toes, so let's wait for his opinion.

I was waiting for yours really, that's the one that counts.


Bernd

Patch hide | download patch | download mbox

--- gcc/config/i386/i386.c.jj	2017-03-07 20:04:52.000000000 +0100
+++ gcc/config/i386/i386.c	2017-03-10 14:46:24.351775710 +0100
@@ -17255,10 +17255,16 @@  ix86_delegitimize_tls_address (rtx orig_
    has a different PIC label for each routine but the DWARF debugging
    information is not associated with any particular routine, so it's
    necessary to remove references to the PIC label from RTL stored by
-   the DWARF output code.  */
+   the DWARF output code.
 
-static rtx
-ix86_delegitimize_address (rtx x)
+   This helper is used in the normal ix86_delegitimize_address
+   entrypoint (e.g. used in the target delegitimization hook) and
+   in ix86_find_base_term.  As compile time memory optimization, we
+   avoid allocating rtxes that will not change anything on the outcome
+   of the callers (find_base_value and find_base_term).  */
+
+static inline rtx
+ix86_delegitimize_address_1 (rtx x, bool base_term_p)
 {
   rtx orig_x = delegitimize_mem_from_attrs (x);
   /* addend is NULL or some rtx if x is something+GOTOFF where
@@ -17285,6 +17291,10 @@  ix86_delegitimize_address (rtx x)
           && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
           && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
         {
+	  /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
+	     base.  A CONST can't be arg_pointer_rtx based.  */
+	  if (base_term_p && MEM_P (orig_x))
+	    return orig_x;
 	  rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
 	  x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
 	  if (MEM_P (orig_x))
@@ -17361,7 +17371,9 @@  ix86_delegitimize_address (rtx x)
   if (! result)
     return ix86_delegitimize_tls_address (orig_x);
 
-  if (const_addend)
+  /* For (PLUS something CONST_INT) both find_base_{value,term} just
+     recurse on the first operand.  */
+  if (const_addend && !base_term_p)
     result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
   if (reg_addend)
     result = gen_rtx_PLUS (Pmode, reg_addend, result);
@@ -17399,6 +17411,14 @@  ix86_delegitimize_address (rtx x)
   return result;
 }
 
+/* The normal instantiation of the above template.  */
+
+static rtx
+ix86_delegitimize_address (rtx x)
+{
+  return ix86_delegitimize_address_1 (x, false);
+}
+
 /* If X is a machine specific address (i.e. a symbol or label being
    referenced as a displacement from the GOT implemented using an
    UNSPEC), then return the base term.  Otherwise return X.  */
@@ -17424,7 +17444,7 @@  ix86_find_base_term (rtx x)
       return XVECEXP (term, 0, 0);
     }
 
-  return ix86_delegitimize_address (x);
+  return ix86_delegitimize_address_1 (x, true);
 }
 
 static void