diff mbox

[i386] : Allow non-REX memory operand for x86_64 zero-extracts involving high registers

Message ID CAFULd4Zxhonn_TKujBt-wgYnL6xKTLPF8QX13VbwiqumqxDkQQ@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak Dec. 28, 2016, 3:37 p.m. UTC
Hello!

Attached patch allows non-REX memory operands for x86_64 zero-extracts
involving high registers. This needs to be implemented using peephole2
pattern, since we can't distinguish non-REX memory operand from normal
memory operands.

2016-12-28  Uros Bizjak  <ubizjak@gmail.com>

    PR target/78904
    * config/i386/constraints.md (Bn): New special memory constraint.
    * config/i386/predicates.md (norex_memory_operand): New predicate.
    * config/i386/i386.md (*extzvqi_mem_rex64): New insn pattern and
    corresponding peephole2 pattern.

testsuite/ChangeLog:

2016-12-28  Uros Bizjak  <ubizjak@gmail.com>

    PR target/78904
    * gcc.target/i386/pr78904-4.c: New test.
    * gcc.target/i386/pr78904-5.c: Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
diff mbox

Patch

Index: config/i386/constraints.md
===================================================================
--- config/i386/constraints.md	(revision 243954)
+++ config/i386/constraints.md	(working copy)
@@ -169,6 +169,7 @@ 
 ;;  g  GOT memory operand.
 ;;  m  Vector memory operand
 ;;  c  Constant memory operand
+;;  n  Memory operand without REX prefix
 ;;  s  Sibcall memory operand, not valid for TARGET_X32
 ;;  w  Call memory operand, not valid for TARGET_X32
 ;;  z  Constant call address operand.
@@ -191,6 +192,10 @@ 
   (and (match_operand 0 "memory_operand")
        (match_test "constant_address_p (XEXP (op, 0))")))
 
+(define_special_memory_constraint "Bn"
+  "@internal Memory operand without REX prefix."
+  (match_operand 0 "norex_memory_operand"))
+
 (define_constraint "Bs"
   "@internal Sibcall memory operand."
   (ior (and (not (match_test "TARGET_X32"))
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 243954)
+++ config/i386/i386.md	(working copy)
@@ -2835,9 +2835,20 @@ 
   [(set_attr "type" "imovx")
    (set_attr "mode" "SI")])
 
+(define_insn "*extzvqi_mem_rex64"
+  [(set (match_operand:QI 0 "norex_memory_operand" "=Bn")
+	(subreg:QI
+	  (zero_extract:SI (match_operand 1 "ext_register_operand" "Q")
+			   (const_int 8)
+			   (const_int 8)) 0))]
+  "TARGET_64BIT && reload_completed"
+  "mov{b}\t{%h1, %0|%0, %h1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
 (define_insn "*extzvqi"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=QBc,?R,m")
-        (subreg:QI
+	(subreg:QI
 	  (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q,Q")
 			   (const_int 8)
 			   (const_int 8)) 0))]
@@ -2863,6 +2874,21 @@ 
 	(const_string "SI")
 	(const_string "QI")))])
 
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand")
+	(subreg:QI
+	  (zero_extract:SI (match_operand 1 "ext_register_operand")
+			   (const_int 8)
+			   (const_int 8)) 0))
+   (set (match_operand:QI 2 "norex_memory_operand") (match_dup 0))]
+  "TARGET_64BIT
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(subreg:QI
+	  (zero_extract:SI (match_dup 1)
+			   (const_int 8)
+			   (const_int 8)) 0))])
+
 (define_expand "insv<mode>"
   [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand")
 			     (match_operand:SI 1 "const_int_operand")
Index: config/i386/predicates.md
===================================================================
--- config/i386/predicates.md	(revision 243954)
+++ config/i386/predicates.md	(working copy)
@@ -1037,6 +1037,10 @@ 
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "const0_operand")))
 
+(define_predicate "norex_memory_operand"
+  (and (match_operand 0 "memory_operand")
+       (not (match_test "x86_extended_reg_mentioned_p (op)"))))
+
 ;; Return true for RTX codes that force SImode address.
 (define_predicate "SImode_address_operand"
   (match_code "subreg,zero_extend,and"))
Index: testsuite/gcc.target/i386/pr78904-4.c
===================================================================
--- testsuite/gcc.target/i386/pr78904-4.c	(nonexistent)
+++ testsuite/gcc.target/i386/pr78904-4.c	(working copy)
@@ -0,0 +1,21 @@ 
+/* PR target/78904 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+
+typedef __SIZE_TYPE__ size_t;
+
+struct S1
+{
+  unsigned char pad1;
+  unsigned char val;
+  unsigned short pad2;
+};
+
+extern unsigned char t[256];
+
+void foo (struct S1 a, size_t i)
+{
+  t[i] = a.val;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]movb\[\t \]*%.h," } } */
Index: testsuite/gcc.target/i386/pr78904-5.c
===================================================================
--- testsuite/gcc.target/i386/pr78904-5.c	(nonexistent)
+++ testsuite/gcc.target/i386/pr78904-5.c	(working copy)
@@ -0,0 +1,27 @@ 
+/* PR target/78904 */
+/* { dg-do assemble { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+typedef __SIZE_TYPE__ size_t;
+
+struct S1
+{
+  unsigned char pad1;
+  unsigned char val;
+  unsigned short pad2;
+};
+
+extern unsigned char t[256];
+
+void foo (struct S1 a, size_t i)
+{
+  t[i] = a.val;
+}
+
+void bar (struct S1 a, size_t i)
+{
+  register size_t _i __asm ("r10") = i;
+
+  asm volatile ("" : "+r" (_i));
+  t[_i] = a.val;
+}