diff mbox

[v2,i386] : Handle zero extended addresses in ix86_avoid_lea_for_addr

Message ID CAFULd4bVjET+=+p1=JQ10nEr0fPJBXvgWYGbigkiSSO+ChTD1A@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak July 27, 2012, 5:34 p.m. UTC
On Fri, Jul 27, 2012 at 7:16 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Fri, Jul 27, 2012 at 11:29 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>
>> Attached patch enables ix86_avoid_lea_for_addr to process
>> zero-extended addresses. This patch should help atom performance,
>> especially in x32 mode.
>>
>> Please note the complication with insn re-recognition in
>> ix86_avoid_lea_for_addr, to solve the problem as described in the
>> comment:
>>
>>   /* ix86_avoid_lea_for_addr re-recognizes insn and changes operands[]
>>      array behind our backs.  To make things worse, zero-extended oeprands
>>      (zero_extend:DI (addr:SI)) are re-recognized as (addr:DI), since they
>>      also satisfy operand constraints of one of many *lea<mode> insn patterns.
>
> Actually, the instruction gets re-recognized as
> *zero_extendsidi2_rex64, this is the reason why we got DImode
> (addr:DI) operand. This fact further uncovers existing problem with
> ix86_avoid_lea_for_addr. This function should not mark addresses
> having less than two operands for splitting. These patterns are
> re-recognized as MOV (and now as zero-extending MOVL) due to the
> approach, described in the comment above, and due to the fact that we
> define *mov{si,di} and *zero_extendsidi2_rex64 patterns before
> *lea<mode> in the i386.md.
>
> However, here is no point messing with these patterns in splitters,
> they are conditionally converted to LEAs at the insn emission phase
> (see i.e. *zero_extendsidi2_rex64 change in attached patch). The
> attached patch prevents splitting by a simple criteria function.
>
> As a bonus, the patch also includes conditional splitter for
> non-destructive zero-extended adds.
>
> 2012-07-27  Uros Bizjak  <ubizjak@gmail.com>
>
>         * config/i386/i386.c (ix86_avoid_lea_for_addr): Handle
>         zero-extended addresses.  Return false if the address has less
>         than two components.
>         (ix86_split_lea_for_addr): Unconditionally convert target and
>         all address operands to requested mode.
>         * config/i386/i386.md (*lea<mode>): Pass SImode to
>         ix86_split_lea_for_addr when splitting zero-extended address.
>         (zero-extended add splitter): New splitter to conditionally split
>         non-destructive adds.
>         (*zero_extendsidi2_rex64): Conditionally emit leal instead of movl.
>
> I am currently re-testing v2 patch.

... now with correct v2 patch attached.

Uros.
diff mbox

Patch

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 189915)
+++ config/i386/i386.md	(working copy)
@@ -3474,13 +3474,28 @@ 
 	 (match_operand:SI 1 "x86_64_zext_general_operand"
 	        	"rmWz,0,r   ,m  ,r   ,m")))]
   "TARGET_64BIT"
-  "@
-   mov{l}\t{%1, %k0|%k0, %1}
-   #
-   movd\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}
-   %vmovd\t{%1, %0|%0, %1}
-   %vmovd\t{%1, %0|%0, %1}"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      if (ix86_use_lea_for_mov (insn, operands))
+	return "lea{l}\t{%E1, %k0|%k0, %E1}";
+      else
+	return "mov{l}\t{%1, %k0|%k0, %1}";
+
+    case TYPE_MULTI:
+      return "#";
+
+    case TYPE_MMXMOV:
+      return "movd\t{%1, %0|%0, %1}";
+
+    case TYPE_SSEMOV:
+      return "%vmovd\t{%1, %0|%0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
   [(set_attr "type" "imovx,multi,mmxmov,mmxmov,ssemov,ssemov")
    (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
    (set_attr "prefix_0f" "0,*,*,*,*,*")
@@ -5479,7 +5494,26 @@ 
   "reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
   [(const_int 0)]
 {
-  ix86_split_lea_for_addr (operands, <MODE>mode);
+  enum machine_mode mode = <MODE>mode;
+  rtx addr;
+
+  /* ix86_avoid_lea_for_addr re-recognizes insn and changes operands[]
+     array behind our backs.  To make things worse, zero-extended oeprands
+     (zero_extend:DI (addr:SI)) are re-recognized as (addr:DI), since they
+     also satisfy operand constraints of other insn patterns.
+
+     However, at this point we are looking only if the original insn
+     is performing inherent zero extension, and will emit
+     split insn sequence in SImode for this case.  */
+  addr = SET_SRC (PATTERN (curr_insn));
+
+  /* Emit all operations in SImode for zero-extended addresses.  Recall
+     that x86_64 inheretly zero-extends SImode operations to DImode.  */
+  if (GET_CODE (addr) == ZERO_EXTEND
+      || GET_CODE (addr) == AND)
+    mode = SImode;
+
+  ix86_split_lea_for_addr (operands, mode);
   DONE;
 }
   [(set_attr "type" "lea")
@@ -5807,11 +5841,11 @@ 
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
 	(plus:SWI48 (match_operand:SWI48 1 "register_operand")
-              (match_operand:SWI48 2 "nonmemory_operand")))
+		    (match_operand:SWI48 2 "x86_64_nonmemory_operand")))
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed && ix86_avoid_lea_for_add (insn, operands)"
   [(set (match_dup 0) (match_dup 1))
-   (parallel [(set (match_dup 0) (plus:<MODE> (match_dup 0) (match_dup 2)))
+   (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
 	      (clobber (reg:CC FLAGS_REG))])])
 
 ;; Convert add to the lea pattern to avoid flags dependency.
@@ -5840,6 +5874,21 @@ 
   DONE;
 })
 
+;; Split non destructive adds if we cannot use lea.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+  	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "register_operand")
+		   (match_operand:SI 2 "x86_64_nonmemory_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && reload_completed && ix86_avoid_lea_for_add (insn, operands)"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (match_dup 0)
+   	     	   (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[3] = gen_lowpart (SImode, operands[0]);")
+
 ;; Convert add to the lea pattern to avoid flags dependency.
 (define_split
   [(set (match_operand:DI 0 "register_operand")
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 189915)
+++ config/i386/i386.c	(working copy)
@@ -17036,11 +17036,6 @@  ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
   struct ix86_address parts;
   int ok;
 
-  /* FIXME: Handle zero-extended addresses.  */
-  if (GET_CODE (operands[1]) == ZERO_EXTEND
-      || GET_CODE (operands[1]) == AND)
-    return false;
-
   /* Check we need to optimize.  */
   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
     return false;
@@ -17052,6 +17047,11 @@  ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
   ok = ix86_decompose_address (operands[1], &parts);
   gcc_assert (ok);
 
+  /* There should be at least two components in the address.  */
+  if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
+      + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
+    return false;
+
   /* We should not split into add if non legitimate pic
      operand is used as displacement. */
   if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
@@ -17124,7 +17124,7 @@  ix86_emit_binop (enum rtx_code code, enum machine_
    It is assumed that it is allowed to clobber flags register
    at lea position.  */
 
-extern void
+void
 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
 {
   unsigned int regno0, regno1, regno2;
@@ -17135,7 +17135,7 @@  ix86_split_lea_for_addr (rtx operands[], enum mach
   ok = ix86_decompose_address (operands[1], &parts);
   gcc_assert (ok);
 
-  target = operands[0];
+  target = gen_lowpart (mode, operands[0]);
 
   regno0 = true_regnum (target);
   regno1 = INVALID_REGNUM;
@@ -17143,18 +17143,19 @@  ix86_split_lea_for_addr (rtx operands[], enum mach
 
   if (parts.base)
     {
-      if (GET_MODE (parts.base) != mode)
-	parts.base = gen_lowpart (mode, parts.base);
+      parts.base = gen_lowpart (mode, parts.base);
       regno1 = true_regnum (parts.base);
     }
 
   if (parts.index)
     {
-      if (GET_MODE (parts.index) != mode)
-	parts.index = gen_lowpart (mode, parts.index);
+      parts.index = gen_lowpart (mode, parts.index);
       regno2 = true_regnum (parts.index);
     }
 
+  if (parts.disp)
+    parts.disp = gen_lowpart (mode, parts.disp);
+
   if (parts.scale > 1)
     {
       /* Case r1 = r1 + ...  */