Comments
Patch
===================================================================
@@ -801,6 +801,10 @@
struct ix86_address parts;
int ok;
+ /* LEA handles zero-extend by itself. */
+ if (GET_CODE (op) == ZERO_EXTEND)
+ return false;
+
ok = ix86_decompose_address (op, &parts);
gcc_assert (ok);
return parts.seg == SEG_DEFAULT;
===================================================================
@@ -11142,6 +11142,14 @@ ix86_decompose_address (rtx addr, struct ix86_addr
int retval = 1;
enum ix86_address_seg seg = SEG_DEFAULT;
+ /* Allow zero-extended SImode addresses,
+ they will be emitted with addr32 prefix. */
+ if (TARGET_64BIT
+ && GET_CODE (addr) == ZERO_EXTEND
+ && GET_MODE (addr) == DImode
+ && GET_MODE (XEXP (addr, 0)) == SImode)
+ addr = XEXP (addr, 0);
+
if (REG_P (addr))
base = addr;
else if (GET_CODE (addr) == SUBREG)
@@ -14159,9 +14167,13 @@ ix86_print_operand_address (FILE *file, rtx addr)
}
else
{
- /* Print DImode registers on 64bit targets to avoid addr32 prefixes. */
- int code = TARGET_64BIT ? 'q' : 0;
+ int code = 0;
+ /* Print SImode registers for zero-extended addresses to force
+ addr32 prefix. Otherwise print DImode registers to avoid it. */
+ if (TARGET_64BIT)
+ code = (GET_CODE (addr) == ZERO_EXTEND) ? 'l' : 'q';
+
if (ASSEMBLER_DIALECT == ASM_ATT)
{
if (disp)
@@ -21772,7 +21784,8 @@ assign_386_stack_local (enum machine_mode mode, en
}
/* Calculate the length of the memory address in the instruction
- encoding. Does not include the one-byte modrm, opcode, or prefix. */
+ encoding. Includes addr32 prefix, does not include the one-byte modrm,
+ opcode, or other prefixes. */
int
memory_address_length (rtx addr)
@@ -21799,8 +21812,10 @@ memory_address_length (rtx addr)
base = parts.base;
index = parts.index;
disp = parts.disp;
- len = 0;
+ /* Add length of addr32 prefix. */
+ len = (GET_CODE (addr) == ZERO_EXTEND);
+
/* Rule of thumb:
- esp as the base always wants an index,
- ebp as the base always wants a displacement,
@@ -28233,6 +28248,15 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class
enum machine_mode mode,
secondary_reload_info *sri ATTRIBUTE_UNUSED)
{
+ /* Double-word spills from general registers to non-offsettable memory
+ references (zero-extended addresses) go through XMM register. */
+ if (TARGET_64BIT
+ && MEM_P (x)
+ && GET_MODE_SIZE (mode) > UNITS_PER_WORD
+ && rclass == GENERAL_REGS
+ && !offsettable_memref_p (x))
+ return SSE_REGS;
+
/* QImode spills from non-QI registers require
intermediate register on 32bit targets. */
if (!TARGET_64BIT
Hello! Attached patch implements addr32 prefixed addresses for x86_64 targets, where memory locations are accessed with 32bit base and index registers in the form (zero_extend:DI (... SImode registers ...)). The optimization rarely (if at all) triggers on x86_64, but is very important on x32 (see [1]), where many LEAs get moved into addresses of the operators. Of some interest is inability of reload to fix-up its own generated moves for offsetable memory operand constraint "o", as it happens with TImode moves. See [2] for further analysis and [3] for the workaround. 2011-08-08 Uros Bizjak <ubizjak@gmail.com> PR target/49781 * config/i386/i386.c (ix86_decompose_address): Allow zero-extended SImode addresses. (ix86_print_operand_address): Handle zero-extended addresses. (memory_address_length): Add length of addr32 prefix for zero-extended addresses. (ix86_secondary_reload): Handle moves to/from double-word general registers from/to zero-extended addresses. * config/i386/predicates.md (lea_address_operand): Reject zero-extended operands. Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}. Additionally, H.J. tested the patch on x32 target with GCC bootstrap/regression tests, build of glibc (+regression tests) and SPEC2000/2006. Patch was committed to mainline SVN. BTW: There is a strange optimization in combine pass, where zero-extended address is converted on-the-fly to: Trying 9 -> 10: Failed to match this instruction: (... (and:DI (subreg:DI (plus:SI (ashift:SI (reg/v:SI 63 [ i ]) (const_int 2 [0x2])) (subreg:SI (reg/v/f:DI 62 [ a ]) 0)) 0) (const_int 4294967295 [0xffffffff])) ...) While it is easy to add a pattern recognizer for this RTX to ix86_decompose_address/ix86_legitimate_address_p, I would like to understand the purpose of the conversion better and eventually fix it in combine pass. [1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49781 [2] http://gcc.gnu.org/ml/gcc/2011-08/msg00129.html [3] http://gcc.gnu.org/ml/gcc/2011-08/msg00157.html Uros.