Patchwork PATCH: Always avoid lea if possible on x86

login
register
mail settings
Submitter H.J. Lu
Date Aug. 17, 2010, 2:49 p.m.
Message ID <20100817144925.GA26996@intel.com>
Download mbox | patch
Permalink /patch/61920/
State New
Headers show

Comments

H.J. Lu - Aug. 17, 2010, 2:49 p.m.
Hi,

We added ix86_lea_for_add_ok and modified *add<mode>_1 to make sure
that we use LEA on address and ADD on non-address for TARGET_OPT_AGU.
It turned out ADD is always faster than LEA on all processors, except for
TARGET_OPT_AGU.  This patch changes *add<mode>_1 and ix86_lea_for_add_ok
to avoid lea for !TARGET_OPT_AGU processors.  OK for trunk?

Thanks.


H.J.
---
2010-08-17  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386.c (ix86_lea_for_add_ok): For !TARGET_OPT_AGU
	or optimizing for size, always avoid lea if possible.

	* config/i386/i386.md (*add<mode>_1): Always avoid lea if
	possible.
Bernd Schmidt - Aug. 17, 2010, 3:05 p.m.
On 08/17/2010 04:49 PM, H.J. Lu wrote:
> We added ix86_lea_for_add_ok and modified *add<mode>_1 to make sure
> that we use LEA on address and ADD on non-address for TARGET_OPT_AGU.
> It turned out ADD is always faster than LEA on all processors, except for
> TARGET_OPT_AGU.  This patch changes *add<mode>_1 and ix86_lea_for_add_ok
> to avoid lea for !TARGET_OPT_AGU processors.  OK for trunk?

Yes, looks better.  Thanks.


Bernd
Richard Henderson - Aug. 17, 2010, 3:18 p.m.
On 08/17/2010 07:49 AM, H.J. Lu wrote:
>      default:
> -      /* Use add as much as possible to replace lea for AGU optimization. */
> -      if (which_alternative == 2 && TARGET_OPT_AGU)
> +      /* This alternative was added for TARGET_OPT_AGU to use add as
> +	 much as possible.  But add is also faster than lea for
> +	 !TARGET_OPT_AGU.  */
> +      if (which_alternative == 2)
>          return "add{<imodesuffix>}\t{%1, %0|%0, %1}";
>          
>        gcc_assert (rtx_equal_p (operands[0], operands[1]));
> @@ -5825,10 +5827,7 @@
>      }
>  }
>    [(set (attr "type")
> -     (cond [(and (eq_attr "alternative" "2") 
> -                 (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
> -	      (const_string "lea")
> -            (eq_attr "alternative" "3")
> +     (cond [(eq_attr "alternative" "3")
>                (const_string "lea")
>  	    (match_operand:SWI48 2 "incdec_operand" "")
>  	      (const_string "incdec")

Is there any reason not to remove alternative 2 now that
you're not doing anything special with it?


r~
Bernd Schmidt - Aug. 17, 2010, 3:20 p.m.
On 08/17/2010 05:18 PM, Richard Henderson wrote:
> Is there any reason not to remove alternative 2 now that
> you're not doing anything special with it?

If I understood things correctly, the problem was that in this case we'd
match what is now alternative 3, and generate lea.


Bernd
Richard Henderson - Aug. 17, 2010, 3:22 p.m.
On 08/17/2010 08:20 AM, Bernd Schmidt wrote:
> On 08/17/2010 05:18 PM, Richard Henderson wrote:
>> Is there any reason not to remove alternative 2 now that
>> you're not doing anything special with it?
> 
> If I understood things correctly, the problem was that in this case we'd
> match what is now alternative 3, and generate lea.

Ah, right, I see.  The patch is ok.


r~

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b925122..f1d4402 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14802,10 +14802,10 @@  distance_agu_use (unsigned int regno0, rtx insn)
 #define IX86_LEA_PRIORITY 2
 
 /* Return true if it is ok to optimize an ADD operation to LEA
-   operation to avoid flag register consumation.  For the processors
-   like ATOM, if the destination register of LEA holds an actual
-   address which will be used soon, LEA is better and otherwise ADD
-   is better.  */
+   operation to avoid flag register consumation.  For most processors,
+   ADD is faster than LEA.  For the processors like ATOM, if the
+   destination register of LEA holds an actual address which will be
+   used soon, LEA is better and otherwise ADD is better.  */
 
 bool
 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
@@ -14813,16 +14813,14 @@  ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
 {
   unsigned int regno0 = true_regnum (operands[0]);
   unsigned int regno1 = true_regnum (operands[1]);
-  unsigned int regno2;
-
-  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
-    return regno0 != regno1;
-
-  regno2 = true_regnum (operands[2]);
+  unsigned int regno2 = true_regnum (operands[2]);
 
   /* If a = b + c, (a!=b && a!=c), must use lea form. */
   if (regno0 != regno1 && regno0 != regno2)
     return true;
+
+  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+    return false;
   else
     {
       int dist_define, dist_use;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 0041158..f6ab0e2 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -5813,8 +5813,10 @@ 
 	}
 
     default:
-      /* Use add as much as possible to replace lea for AGU optimization. */
-      if (which_alternative == 2 && TARGET_OPT_AGU)
+      /* This alternative was added for TARGET_OPT_AGU to use add as
+	 much as possible.  But add is also faster than lea for
+	 !TARGET_OPT_AGU.  */
+      if (which_alternative == 2)
         return "add{<imodesuffix>}\t{%1, %0|%0, %1}";
         
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
@@ -5825,10 +5827,7 @@ 
     }
 }
   [(set (attr "type")
-     (cond [(and (eq_attr "alternative" "2") 
-                 (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
-	      (const_string "lea")
-            (eq_attr "alternative" "3")
+     (cond [(eq_attr "alternative" "3")
               (const_string "lea")
 	    (match_operand:SWI48 2 "incdec_operand" "")
 	      (const_string "incdec")