diff mbox series

Improve x86_64 *movqi_internal at -Os (PR target/82260)

Message ID 20170920185817.GX1701@tucnak
State New
Headers show
Series Improve x86_64 *movqi_internal at -Os (PR target/82260) | expand

Commit Message

Jakub Jelinek Sept. 20, 2017, 6:58 p.m. UTC
Hi!

As mentioned in the PR, in some cases (one or both *movqi_internal
operands %dil/%sil/%bpl/%spl, neither %r*b) the movl variant is smaller
and in cases where movb and movl is the same size, we might as well choose
the faster instruction.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-09-20  Jakub Jelinek  <jakub@redhat.com>

	PR target/82260
	* config/i386/i386.md (*movqi_internal): Replace =q <- q alternative
	with =Q <- Q, =R <- R and =r <- r alternatives, only enable the
	latter two for 64-bit, renumber alternatives, for -Os imov =q <- n
	alternative always use QI mode, for -Os imov =R <- R alternative
	always use SI mode, for imov =Q <- Q or =r <- r alternatives
	ignore -Os.

	* gcc.target/i386/pr82260-1.c: New test.
	* gcc.target/i386/pr82260-2.c: New test.


	Jakub

Comments

Uros Bizjak Sept. 21, 2017, 6:28 a.m. UTC | #1
On Wed, Sep 20, 2017 at 8:58 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> As mentioned in the PR, in some cases (one or both *movqi_internal
> operands %dil/%sil/%bpl/%spl, neither %r*b) the movl variant is smaller
> and in cases where movb and movl is the same size, we might as well choose
> the faster instruction.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2017-09-20  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/82260
>         * config/i386/i386.md (*movqi_internal): Replace =q <- q alternative
>         with =Q <- Q, =R <- R and =r <- r alternatives, only enable the
>         latter two for 64-bit, renumber alternatives, for -Os imov =q <- n
>         alternative always use QI mode, for -Os imov =R <- R alternative
>         always use SI mode, for imov =Q <- Q or =r <- r alternatives
>         ignore -Os.

BTW: As a personal preference, I find alternatives marked as (=q,n)
easier to read, but I guess we could also live with the above
notation.

>         * gcc.target/i386/pr82260-1.c: New test.
>         * gcc.target/i386/pr82260-2.c: New test.

LGTM, I hope you catched all changes of alternative indexes.

Thanks,
Uros.

> --- gcc/config/i386/i386.md.jj  2017-09-20 10:46:08.000000000 +0200
> +++ gcc/config/i386/i386.md     2017-09-20 15:15:19.365142707 +0200
> @@ -2571,9 +2571,9 @@ (define_insn "*movhi_internal"
>
>  (define_insn "*movqi_internal"
>    [(set (match_operand:QI 0 "nonimmediate_operand"
> -                       "=q,q ,q ,r,r ,?r,m ,k,k,r,m,k")
> +                       "=Q,R,r,q,q,r,r ,?r,m ,k,k,r,m,k")
>         (match_operand:QI 1 "general_operand"
> -                       "q ,qn,qm,q,rn,qm,qn,r,k,k,k,m"))]
> +                       "Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m"))]
>    "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
>  {
>    static char buf[128];
> @@ -2589,17 +2589,17 @@ (define_insn "*movqi_internal"
>      case TYPE_MSKMOV:
>        switch (which_alternative)
>          {
> -       case 7:
> +       case 9:
>           ops = "kmov%s\t{%%k1, %%0|%%0, %%k1}";
>           break;
> -       case 9:
> +       case 11:
>           ops = "kmov%s\t{%%1, %%k0|%%k0, %%1}";
>           break;
> -       case 10:
> -       case 11:
> +       case 12:
> +       case 13:
>           gcc_assert (TARGET_AVX512DQ);
>           /* FALLTHRU */
> -       case 8:
> +       case 10:
>           ops = "kmov%s\t{%%1, %%0|%%0, %%1}";
>           break;
>         default:
> @@ -2619,51 +2619,67 @@ (define_insn "*movqi_internal"
>      }
>  }
>    [(set (attr "isa")
> -     (if_then_else (eq_attr "alternative" "10,11")
> -       (const_string "avx512dq")
> -       (const_string "*")))
> +     (cond [(eq_attr "alternative" "1,2")
> +             (const_string "x64")
> +           (eq_attr "alternative" "12,13")
> +             (const_string "avx512dq")
> +          ]
> +          (const_string "*")))
>     (set (attr "type")
> -     (cond [(eq_attr "alternative" "7,8,9,10,11")
> +     (cond [(eq_attr "alternative" "9,10,11,12,13")
>               (const_string "mskmov")
> -           (and (eq_attr "alternative" "5")
> +           (and (eq_attr "alternative" "7")
>                  (not (match_operand:QI 1 "aligned_operand")))
>               (const_string "imovx")
>             (match_test "optimize_function_for_size_p (cfun)")
>               (const_string "imov")
> -           (and (eq_attr "alternative" "3")
> +           (and (eq_attr "alternative" "5")
>                  (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
>                       (not (match_test "TARGET_QIMODE_MATH"))))
>               (const_string "imov")
> -           (eq_attr "alternative" "3,5")
> +           (eq_attr "alternative" "5,7")
>               (const_string "imovx")
>             (and (match_test "TARGET_MOVX")
> -                (eq_attr "alternative" "2"))
> +                (eq_attr "alternative" "4"))
>               (const_string "imovx")
>            ]
>            (const_string "imov")))
>     (set (attr "prefix")
> -     (if_then_else (eq_attr "alternative" "7,8,9")
> +     (if_then_else (eq_attr "alternative" "9,10,11")
>         (const_string "vex")
>         (const_string "orig")))
>     (set (attr "mode")
> -      (cond [(eq_attr "alternative" "3,4,5")
> +      (cond [(eq_attr "alternative" "5,6,7")
>                (const_string "SI")
> -            (eq_attr "alternative" "6")
> +            (eq_attr "alternative" "8")
>                (const_string "QI")
> -            (and (eq_attr "alternative" "7,8,9")
> +            (and (eq_attr "alternative" "9,10,11")
>                   (not (match_test "TARGET_AVX512DQ")))
>                (const_string "HI")
>              (eq_attr "type" "imovx")
>                (const_string "SI")
> +            ;; For -Os, 8-bit immediates are always shorter than 32-bit
> +            ;; ones.
> +            (and (eq_attr "type" "imov")
> +                 (and (eq_attr "alternative" "3")
> +                      (match_test "optimize_function_for_size_p (cfun)")))
> +              (const_string "QI")
> +            ;; For -Os, movl where one or both operands are NON_Q_REGS
> +            ;; and both are LEGACY_REGS is shorter than movb.
> +            ;; Otherwise movb and movl sizes are the same, so decide purely
> +            ;; based on speed factors.
> +            (and (eq_attr "type" "imov")
> +                 (and (eq_attr "alternative" "1")
> +                      (match_test "optimize_function_for_size_p (cfun)")))
> +              (const_string "SI")
>              (and (eq_attr "type" "imov")
> -                 (and (eq_attr "alternative" "0,1")
> +                 (and (eq_attr "alternative" "0,1,2,3")
>                        (and (match_test "TARGET_PARTIAL_REG_DEPENDENCY")
> -                           (and (not (match_test "optimize_function_for_size_p (cfun)"))
> -                                (not (match_test "TARGET_PARTIAL_REG_STALL"))))))
> +                           (not (match_test "TARGET_PARTIAL_REG_STALL")))))
>                (const_string "SI")
>              ;; Avoid partial register stalls when not using QImode arithmetic
>              (and (eq_attr "type" "imov")
> -                 (and (eq_attr "alternative" "0,1")
> +                 (and (eq_attr "alternative" "0,1,2,3")
>                        (and (match_test "TARGET_PARTIAL_REG_STALL")
>                             (not (match_test "TARGET_QIMODE_MATH")))))
>                (const_string "SI")
> --- gcc/testsuite/gcc.target/i386/pr82260-1.c.jj        2017-09-20 15:27:43.696823321 +0200
> +++ gcc/testsuite/gcc.target/i386/pr82260-1.c   2017-09-20 15:34:45.913536355 +0200
> @@ -0,0 +1,26 @@
> +/* PR target/82260 */
> +/* { dg-do compile { target lp64 } } */
> +/* { dg-options "-Os -mtune=generic -masm=att" } */
> +/* movl %esi, %ecx is shorter than movb %sil, %cl.  While
> +   movl %edx, %ecx is the same size as movb %dl, %cl and
> +   movl %r8d, %ecx is the same size as movb %r8b, %cl, movl
> +   is faster on contemporary CPUs.  */
> +/* { dg-final { scan-assembler-not {\mmovb\M} } } */
> +
> +int
> +foo (int x, int c)
> +{
> +  return x >> c;
> +}
> +
> +int
> +bar (int x, int y, int z)
> +{
> +  return x >> z;
> +}
> +
> +int
> +baz (int x, int y, int z, int u, int v)
> +{
> +  return x >> v;
> +}
> --- gcc/testsuite/gcc.target/i386/pr82260-2.c.jj        2017-09-20 15:30:51.690469279 +0200
> +++ gcc/testsuite/gcc.target/i386/pr82260-2.c   2017-09-20 15:35:31.358967291 +0200
> @@ -0,0 +1,25 @@
> +/* PR target/82260 */
> +/* { dg-do compile { target lp64 } } */
> +/* { dg-options "-Os -mtune=generic -masm=att -mtune-ctrl=^partial_reg_dependency" } */
> +/* { dg-final { scan-assembler-not {\mmovb\t%sil, %cl} } } */
> +/* { dg-final { scan-assembler {\mmovl\t%esi, %ecx} } } */
> +/* { dg-final { scan-assembler {\mmovb\t%dl, %cl} } } */
> +/* { dg-final { scan-assembler {\mmovb\t%r8b, %cl} } } */
> +
> +int
> +foo (int x, int c)
> +{
> +  return x >> c;
> +}
> +
> +int
> +bar (int x, int y, int z)
> +{
> +  return x >> z;
> +}
> +
> +int
> +baz (int x, int y, int z, int u, int v)
> +{
> +  return x >> v;
> +}
>
>         Jakub
diff mbox series

Patch

--- gcc/config/i386/i386.md.jj	2017-09-20 10:46:08.000000000 +0200
+++ gcc/config/i386/i386.md	2017-09-20 15:15:19.365142707 +0200
@@ -2571,9 +2571,9 @@  (define_insn "*movhi_internal"
 
 (define_insn "*movqi_internal"
   [(set (match_operand:QI 0 "nonimmediate_operand"
-			"=q,q ,q ,r,r ,?r,m ,k,k,r,m,k")
+			"=Q,R,r,q,q,r,r ,?r,m ,k,k,r,m,k")
 	(match_operand:QI 1 "general_operand"
-			"q ,qn,qm,q,rn,qm,qn,r,k,k,k,m"))]
+			"Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   static char buf[128];
@@ -2589,17 +2589,17 @@  (define_insn "*movqi_internal"
     case TYPE_MSKMOV:
       switch (which_alternative)
         {
-	case 7:
+	case 9:
 	  ops = "kmov%s\t{%%k1, %%0|%%0, %%k1}";
 	  break;
-	case 9:
+	case 11:
 	  ops = "kmov%s\t{%%1, %%k0|%%k0, %%1}";
 	  break;
-	case 10:
-	case 11:
+	case 12:
+	case 13:
 	  gcc_assert (TARGET_AVX512DQ);
 	  /* FALLTHRU */
-	case 8:
+	case 10:
 	  ops = "kmov%s\t{%%1, %%0|%%0, %%1}";
 	  break;
 	default:
@@ -2619,51 +2619,67 @@  (define_insn "*movqi_internal"
     }
 }
   [(set (attr "isa")
-     (if_then_else (eq_attr "alternative" "10,11")
-       (const_string "avx512dq")
-       (const_string "*")))
+     (cond [(eq_attr "alternative" "1,2")
+	      (const_string "x64")
+	    (eq_attr "alternative" "12,13")
+	      (const_string "avx512dq")
+	   ]
+	   (const_string "*")))
    (set (attr "type")
-     (cond [(eq_attr "alternative" "7,8,9,10,11")
+     (cond [(eq_attr "alternative" "9,10,11,12,13")
 	      (const_string "mskmov")
-	    (and (eq_attr "alternative" "5")
+	    (and (eq_attr "alternative" "7")
 		 (not (match_operand:QI 1 "aligned_operand")))
 	      (const_string "imovx")
 	    (match_test "optimize_function_for_size_p (cfun)")
 	      (const_string "imov")
-	    (and (eq_attr "alternative" "3")
+	    (and (eq_attr "alternative" "5")
 		 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
 		      (not (match_test "TARGET_QIMODE_MATH"))))
 	      (const_string "imov")
-	    (eq_attr "alternative" "3,5")
+	    (eq_attr "alternative" "5,7")
 	      (const_string "imovx")
 	    (and (match_test "TARGET_MOVX")
-		 (eq_attr "alternative" "2"))
+		 (eq_attr "alternative" "4"))
 	      (const_string "imovx")
 	   ]
 	   (const_string "imov")))
    (set (attr "prefix")
-     (if_then_else (eq_attr "alternative" "7,8,9")
+     (if_then_else (eq_attr "alternative" "9,10,11")
        (const_string "vex")
        (const_string "orig")))
    (set (attr "mode")
-      (cond [(eq_attr "alternative" "3,4,5")
+      (cond [(eq_attr "alternative" "5,6,7")
 	       (const_string "SI")
-	     (eq_attr "alternative" "6")
+	     (eq_attr "alternative" "8")
 	       (const_string "QI")
-	     (and (eq_attr "alternative" "7,8,9")
+	     (and (eq_attr "alternative" "9,10,11")
 		  (not (match_test "TARGET_AVX512DQ")))
 	       (const_string "HI")
 	     (eq_attr "type" "imovx")
 	       (const_string "SI")
+	     ;; For -Os, 8-bit immediates are always shorter than 32-bit
+	     ;; ones.
+	     (and (eq_attr "type" "imov")
+		  (and (eq_attr "alternative" "3")
+		       (match_test "optimize_function_for_size_p (cfun)")))
+	       (const_string "QI")
+	     ;; For -Os, movl where one or both operands are NON_Q_REGS
+	     ;; and both are LEGACY_REGS is shorter than movb.
+	     ;; Otherwise movb and movl sizes are the same, so decide purely
+	     ;; based on speed factors.
+	     (and (eq_attr "type" "imov")
+		  (and (eq_attr "alternative" "1")
+		       (match_test "optimize_function_for_size_p (cfun)")))
+	       (const_string "SI")
 	     (and (eq_attr "type" "imov")
-		  (and (eq_attr "alternative" "0,1")
+		  (and (eq_attr "alternative" "0,1,2,3")
 		       (and (match_test "TARGET_PARTIAL_REG_DEPENDENCY")
-			    (and (not (match_test "optimize_function_for_size_p (cfun)"))
-				 (not (match_test "TARGET_PARTIAL_REG_STALL"))))))
+			    (not (match_test "TARGET_PARTIAL_REG_STALL")))))
 	       (const_string "SI")
 	     ;; Avoid partial register stalls when not using QImode arithmetic
 	     (and (eq_attr "type" "imov")
-		  (and (eq_attr "alternative" "0,1")
+		  (and (eq_attr "alternative" "0,1,2,3")
 		       (and (match_test "TARGET_PARTIAL_REG_STALL")
 			    (not (match_test "TARGET_QIMODE_MATH")))))
 	       (const_string "SI")
--- gcc/testsuite/gcc.target/i386/pr82260-1.c.jj	2017-09-20 15:27:43.696823321 +0200
+++ gcc/testsuite/gcc.target/i386/pr82260-1.c	2017-09-20 15:34:45.913536355 +0200
@@ -0,0 +1,26 @@ 
+/* PR target/82260 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-Os -mtune=generic -masm=att" } */
+/* movl %esi, %ecx is shorter than movb %sil, %cl.  While
+   movl %edx, %ecx is the same size as movb %dl, %cl and
+   movl %r8d, %ecx is the same size as movb %r8b, %cl, movl
+   is faster on contemporary CPUs.  */
+/* { dg-final { scan-assembler-not {\mmovb\M} } } */
+
+int
+foo (int x, int c)
+{
+  return x >> c;
+}
+
+int
+bar (int x, int y, int z)
+{
+  return x >> z;
+}
+
+int
+baz (int x, int y, int z, int u, int v)
+{
+  return x >> v;
+}
--- gcc/testsuite/gcc.target/i386/pr82260-2.c.jj	2017-09-20 15:30:51.690469279 +0200
+++ gcc/testsuite/gcc.target/i386/pr82260-2.c	2017-09-20 15:35:31.358967291 +0200
@@ -0,0 +1,25 @@ 
+/* PR target/82260 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-Os -mtune=generic -masm=att -mtune-ctrl=^partial_reg_dependency" } */
+/* { dg-final { scan-assembler-not {\mmovb\t%sil, %cl} } } */
+/* { dg-final { scan-assembler {\mmovl\t%esi, %ecx} } } */
+/* { dg-final { scan-assembler {\mmovb\t%dl, %cl} } } */
+/* { dg-final { scan-assembler {\mmovb\t%r8b, %cl} } } */
+
+int
+foo (int x, int c)
+{
+  return x >> c;
+}
+
+int
+bar (int x, int y, int z)
+{
+  return x >> z;
+}
+
+int
+baz (int x, int y, int z, int u, int v)
+{
+  return x >> v;
+}