diff mbox

[i386] : Some further improvements to patterns with embedded zero-extract RTXes

Message ID CAFULd4az-bW8D8V0XiroEc8YthzcXMxveC=Y5c4ATsdMS=2OvA@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak Dec. 27, 2016, 9:19 a.m. UTC
Hello!

While we still don't allow memory operands for x86_64, we can improve
generated code by allowing memory operands until reload. This way,
combine is free to create complex patterns, illustrated by following
test:

--cut here--
struct S1
{
  unsigned char pad1;
  unsigned char val;
  unsigned short pad2;
};

extern struct S1 t;

struct S1 test_add (struct S1 a, struct S1 b)
{
  a.val += t.val;

  return a;
}
--cut here--

Unpatched x86_64 compiler generates (-O2):

        movl    %edi, %eax
        movzbl  %ah, %edx
        addb    t+1(%rip), %dl
        movb    %dl, %ah

while patched compiler generates:

        movzbl  t+1(%rip), %edx
        movl    %edi, %eax
        addb    %dl, %ah

Ideally, the memory operand would be merged to the operation, but high
registers can't be used with REX prefix, so we have to take care there
is no REX registers in the address.

FWIW, define_memory_constraint can't be used to solve the above
limitation, since reload converts memory operand to the form involving
BASE_REG_CLASS, which on x86_64 includes REX registers as well.

2016-12-27  Uros Bizjak  <ubizjak@gmail.com>

    PR target/78904
    * config/i386/i386.md (*cmpqi_ext_1, *extvqi, *extzvqi): Use
    nonimmediate_operand instead of nonimmediate_x64nomem_operand.
    (*cmpqi_ext_3, insv<mode>_1, addqi_ext_1, *testqi_ext_1, andqi_ext_1)
    (*<any_or:code>qi_ext_1, *xorqi_ext_1_cc): Use general_operand
    instead of general_x64nomem_operand.
    * config/i386/predicates.md (nonimmediate_x64nomem_operand): Remove.
    (general_x64nomem_operand): Ditto.

testsuite/ChangeLog:

2016-12-27  Uros Bizjak  <ubizjak@gmail.com>

    PR target/78904
    * gcc.target/i386/pr78904-2.c: New test.

Patch was bootstrapped and regression tested on x86_64-linux-gnu.

Committed to mainline SVN.

Uros.
diff mbox

Patch

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 243929)
+++ config/i386/i386.md	(working copy)
@@ -1295,7 +1295,7 @@ 
 (define_insn "*cmpqi_ext_1"
   [(set (reg FLAGS_REG)
 	(compare
-	  (match_operand:QI 0 "nonimmediate_x64nomem_operand" "Q,m")
+	  (match_operand:QI 0 "nonimmediate_operand" "Q,m")
 	  (subreg:QI
 	    (zero_extract:SI
 	      (match_operand 1 "ext_register_operand" "Q,Q")
@@ -1340,7 +1340,7 @@ 
 	      (match_operand 0 "ext_register_operand" "Q,Q")
 	      (const_int 8)
 	      (const_int 8)) 0)
-	  (match_operand:QI 1 "general_x64nomem_operand" "Qn,m")))]
+	  (match_operand:QI 1 "general_operand" "Qn,m")))]
   "ix86_match_ccmode (insn, CCmode)"
   "cmp{b}\t{%1, %h0|%h0, %1}"
   [(set_attr "isa" "*,nox64")
@@ -2781,7 +2781,7 @@ 
    (set_attr "mode" "SI")])
 
 (define_insn "*extvqi"
-  [(set (match_operand:QI 0 "nonimmediate_x64nomem_operand" "=Q,?R,m")
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Q,?R,m")
         (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q,Q")
                          (const_int 8)
                          (const_int 8)))]
@@ -2836,7 +2836,7 @@ 
    (set_attr "mode" "SI")])
 
 (define_insn "*extzvqi"
-  [(set (match_operand:QI 0 "nonimmediate_x64nomem_operand" "=Q,?R,m")
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Q,?R,m")
         (subreg:QI
 	  (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q,Q")
 			   (const_int 8)
@@ -2897,7 +2897,7 @@ 
   [(set (zero_extract:SWI248 (match_operand 0 "ext_register_operand" "+Q,Q")
 			     (const_int 8)
 			     (const_int 8))
-	(match_operand:SWI248 1 "general_x64nomem_operand" "Qn,m"))]
+	(match_operand:SWI248 1 "general_operand" "Qn,m"))]
   ""
 {
   if (CONST_INT_P (operands[1]))
@@ -6087,7 +6087,7 @@ 
 	      (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0")
 			       (const_int 8)
 			       (const_int 8)) 0)
-	    (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) 0))
+	    (match_operand:QI 2 "general_operand" "Qn,m")) 0))
    (clobber (reg:CC FLAGS_REG))]
   ""
 {
@@ -7889,7 +7889,7 @@ 
 	      (zero_extract:SI (match_operand 0 "ext_register_operand" "Q,Q")
 			       (const_int 8)
 			       (const_int 8)) 0)
-	    (match_operand:QI 1 "general_x64nomem_operand" "Qn,m"))
+	    (match_operand:QI 1 "general_operand" "Qn,m"))
 	  (const_int 0)))]
   "ix86_match_ccmode (insn, CCNOmode)"
   "test{b}\t{%1, %h0|%h0, %1}"
@@ -8417,7 +8417,7 @@ 
 	      (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0")
 			       (const_int 8)
 			       (const_int 8)) 0)
-	    (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) 0))
+	    (match_operand:QI 2 "general_operand" "Qn,m")) 0))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "and{b}\t{%2, %h0|%h0, %2}"
@@ -8803,7 +8803,7 @@ 
 	      (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0")
 			       (const_int 8)
 			       (const_int 8)) 0)
-	    (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) 0))
+	    (match_operand:QI 2 "general_operand" "Qn,m")) 0))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "<logic>{b}\t{%2, %h0|%h0, %2}"
@@ -8913,7 +8913,7 @@ 
 	      (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0")
 			       (const_int 8)
 			       (const_int 8)) 0)
-	    (match_operand:QI 2 "general_x64nomem_operand" "Qn,m"))
+	    (match_operand:QI 2 "general_operand" "Qn,m"))
 	  (const_int 0)))
    (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
 			 (const_int 8)
Index: config/i386/predicates.md
===================================================================
--- config/i386/predicates.md	(revision 243929)
+++ config/i386/predicates.md	(working copy)
@@ -100,18 +100,6 @@ 
 	  && (REGNO (op) > LAST_VIRTUAL_REGISTER || QI_REGNO_P (REGNO (op))));
 })
 
-;; Match nonimmediate operands, but exclude memory operands on 64bit targets.
-(define_predicate "nonimmediate_x64nomem_operand"
-  (if_then_else (match_test "TARGET_64BIT")
-    (match_operand 0 "register_operand")
-    (match_operand 0 "nonimmediate_operand")))
-
-;; Match general operands, but exclude memory operands on 64bit targets.
-(define_predicate "general_x64nomem_operand"
-  (if_then_else (match_test "TARGET_64BIT")
-    (match_operand 0 "nonmemory_operand")
-    (match_operand 0 "general_operand")))
-
 ;; Match register operands, but include memory operands for TARGET_SSE_MATH.
 (define_predicate "register_ssemem_operand"
   (if_then_else
Index: testsuite/gcc.target/i386/pr78904-2.c
===================================================================
--- testsuite/gcc.target/i386/pr78904-2.c	(nonexistent)
+++ testsuite/gcc.target/i386/pr78904-2.c	(working copy)
@@ -0,0 +1,48 @@ 
+/* PR target/78904 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+
+struct S1
+{
+  unsigned char pad1;
+  unsigned char val;
+  unsigned short pad2;
+};
+
+extern struct S1 t;
+
+struct S1 test_and (struct S1 a, struct S1 b)
+{
+  a.val &= b.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]andb\[^\n\r]*, %.h" } } */
+
+struct S1 test_or (struct S1 a, struct S1 b)
+{
+  a.val |= b.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]orb\[^\n\r]*, %.h" } } */
+
+struct S1 test_xor (struct S1 a, struct S1 b)
+{
+  a.val ^= b.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]xorb\[^\n\r]*, %.h" } } */
+
+struct S1 test_add (struct S1 a, struct S1 b)
+{
+  a.val += t.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]addb\[^\n\r]*, %.h" } } */