Patchwork [i386] : Handle zero extended addresses in ix86_avoid_lea_for_addr

login
register
mail settings
Submitter Uros Bizjak
Date July 27, 2012, 9:29 a.m.
Message ID <CAFULd4a0gHS3_bsNSg85Oz=0DiaX5GyafiEZgOfiOaRvKkSvFw@mail.gmail.com>
Download mbox | patch
Permalink /patch/173609/
State New
Headers show

Comments

Uros Bizjak - July 27, 2012, 9:29 a.m.
Hello!

Attached patch enables ix86_avoid_lea_for_addr to process
zero-extended addresses. This patch should help atom performance,
especially in x32 mode.

Please note the complication with insn re-recognition in
ix86_avoid_lea_for_addr, to solve the problem as described in the
comment:

  /* ix86_avoid_lea_for_addr re-recognizes insn and changes operands[]
     array behind our backs.  To make things worse, zero-extended oeprands
     (zero_extend:DI (addr:SI)) are re-recognized as (addr:DI), since they
     also satisfy operand constraints of one of many *lea<mode> insn patterns.

     However, at this point we are looking only if the original insn
     is performing inherent zero extension, and will emit
     split insn sequence in SImode for this case.  */

2012-07-27  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.c (ix86_avoid_lea_for_addr): Handle
	zero-extended addresses.
	(ix86_split_lea_for_addr): Unconditionally convert target and
	all address operands to requested mode.
	* config/i386/i386.md (*lea<mode>): Determine mode of split insn
	sequence from the original insn pattern.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32}, also when configured with "--with-arch=core2 --with-cpu=atom"

I will wait a day or two for possible comments, before the patch is
committed to mainline SVN.

Uros.

Patch

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 189904)
+++ config/i386/i386.md	(working copy)
@@ -3474,13 +3474,28 @@ 
 	 (match_operand:SI 1 "x86_64_zext_general_operand"
 	        	"rmWz,0,r   ,m  ,r   ,m")))]
   "TARGET_64BIT"
-  "@
-   mov{l}\t{%1, %k0|%k0, %1}
-   #
-   movd\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}
-   %vmovd\t{%1, %0|%0, %1}
-   %vmovd\t{%1, %0|%0, %1}"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      if (ix86_use_lea_for_mov (insn, operands))
+	return "lea{l}\t{%E1, %k0|%k0, %E1}";
+      else
+	return "mov{l}\t{%1, %k0|%k0, %1}";
+
+    case TYPE_MULTI:
+      return "#";
+
+    case TYPE_MMXMOV:
+      return "movd\t{%1, %0|%0, %1}";
+
+    case TYPE_SSEMOV:
+      return "%vmovd\t{%1, %0|%0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
   [(set_attr "type" "imovx,multi,mmxmov,mmxmov,ssemov,ssemov")
    (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
    (set_attr "prefix_0f" "0,*,*,*,*,*")
@@ -5479,7 +5494,26 @@ 
   "reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
   [(const_int 0)]
 {
-  ix86_split_lea_for_addr (operands, <MODE>mode);
+  enum machine_mode mode = <MODE>mode;
+  rtx addr;
+
+  /* ix86_avoid_lea_for_addr re-recognizes insn and changes operands[]
+     array behind our backs.  To make things worse, zero-extended oeprands
+     (zero_extend:DI (addr:SI)) are re-recognized as (addr:DI), since they
+     also satisfy operand constraints of one of many *lea<mode> insn patterns.
+
+     However, at this point we are looking only if the original insn
+     is performing inherent zero extension, and will emit
+     split insn sequence in SImode for this case.  */
+  addr = SET_SRC (PATTERN (curr_insn));
+
+  /* Emit all operations in SImode for zero-extended addresses.  Recall
+     that x86_64 inheretly zero-extends SImode operations to DImode.  */
+  if (GET_CODE (addr) == ZERO_EXTEND
+      || GET_CODE (addr) == AND)
+    mode = SImode;
+
+  ix86_split_lea_for_addr (operands, mode);
   DONE;
 }
   [(set_attr "type" "lea")
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 189904)
+++ config/i386/i386.c	(working copy)
@@ -17036,11 +17036,6 @@  ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
   struct ix86_address parts;
   int ok;
 
-  /* FIXME: Handle zero-extended addresses.  */
-  if (GET_CODE (operands[1]) == ZERO_EXTEND
-      || GET_CODE (operands[1]) == AND)
-    return false;
-
   /* Check we need to optimize.  */
   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
     return false;
@@ -17124,7 +17119,7 @@  ix86_emit_binop (enum rtx_code code, enum machine_
    It is assumed that it is allowed to clobber flags register
    at lea position.  */
 
-extern void
+void
 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
 {
   unsigned int regno0, regno1, regno2;
@@ -17135,7 +17130,7 @@  ix86_split_lea_for_addr (rtx operands[], enum mach
   ok = ix86_decompose_address (operands[1], &parts);
   gcc_assert (ok);
 
-  target = operands[0];
+  target = gen_lowpart (mode, operands[0]);
 
   regno0 = true_regnum (target);
   regno1 = INVALID_REGNUM;
@@ -17143,18 +17138,19 @@  ix86_split_lea_for_addr (rtx operands[], enum mach
 
   if (parts.base)
     {
-      if (GET_MODE (parts.base) != mode)
-	parts.base = gen_lowpart (mode, parts.base);
+      parts.base = gen_lowpart (mode, parts.base);
       regno1 = true_regnum (parts.base);
     }
 
   if (parts.index)
     {
-      if (GET_MODE (parts.index) != mode)
-	parts.index = gen_lowpart (mode, parts.index);
+      parts.index = gen_lowpart (mode, parts.index);
       regno2 = true_regnum (parts.index);
     }
 
+  if (parts.disp)
+    parts.disp = gen_lowpart (mode, parts.disp);
+
   if (parts.scale > 1)
     {
       /* Case r1 = r1 + ...  */