diff mbox series

[i386] Improve double-word comparisons (PR target/82580)

Message ID 20171019080935.GM14653@tucnak
State New
Headers show
Series [i386] Improve double-word comparisons (PR target/82580) | expand

Commit Message

Jakub Jelinek Oct. 19, 2017, 8:09 a.m. UTC
Hi!

With the patch you've checked in yesterday we generate e.g. for f2
        cmpq    %rdi, %rdx
        sbbq    %rsi, %rcx
        setb    %al
        movzbl  %al, %eax
This is because the peephole2s we have for setcc + movzbl to xorl + setcc
check that the flags register is dead before the instruction before
setcc (sbbq above), which is not the case, so we can't insert xorl %eax, %eax
before sbbq, as that woiuld clobber flags.  But we can emit
        xorl    %eax, %eax
        cmpq    %rdi, %rdx
        sbbq    %rsi, %rcx
        setb    %al
in this case, move the clearing one insn earlier (of course assuming that
neither the cmpq nor sbbq use or set %rax).

The following peephole2s do that, bootstrapped/regtested on x86_64-linux and
i686-linux, ok for trunk?

The reason for only testing no movzb* insns for lp64 is that for -m32 there
are some - the arguments are passed on the stack, so they need to be loaded
from there and %eax is one of the registers it chooses to use.

2017-10-19  Jakub Jelinek  <jakub@redhat.com>

	PR target/82580
	* config/i386/i386.md (setcc + movzbl to xor + setcc): New peephole2.
	(setcc + and to xor + setcc): New peephole2.

	* gcc.target/i386/pr82580.c: Use {\msbb} instead of "sbb" in
	scan-assembler-times.  Check that there are no movzb* instructions
	if lp64.


	Jakub

Comments

Uros Bizjak Oct. 19, 2017, 8:28 a.m. UTC | #1
On Thu, Oct 19, 2017 at 10:09 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> With the patch you've checked in yesterday we generate e.g. for f2
>         cmpq    %rdi, %rdx
>         sbbq    %rsi, %rcx
>         setb    %al
>         movzbl  %al, %eax
> This is because the peephole2s we have for setcc + movzbl to xorl + setcc
> check that the flags register is dead before the instruction before
> setcc (sbbq above), which is not the case, so we can't insert xorl %eax, %eax
> before sbbq, as that woiuld clobber flags.  But we can emit
>         xorl    %eax, %eax
>         cmpq    %rdi, %rdx
>         sbbq    %rsi, %rcx
>         setb    %al
> in this case, move the clearing one insn earlier (of course assuming that
> neither the cmpq nor sbbq use or set %rax).
>
> The following peephole2s do that, bootstrapped/regtested on x86_64-linux and
> i686-linux, ok for trunk?
>
> The reason for only testing no movzb* insns for lp64 is that for -m32 there
> are some - the arguments are passed on the stack, so they need to be loaded
> from there and %eax is one of the registers it chooses to use.
>
> 2017-10-19  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/82580
>         * config/i386/i386.md (setcc + movzbl to xor + setcc): New peephole2.
>         (setcc + and to xor + setcc): New peephole2.
>
>         * gcc.target/i386/pr82580.c: Use {\msbb} instead of "sbb" in
>         scan-assembler-times.  Check that there are no movzb* instructions
>         if lp64.

OK.

Thanks,
Uros.

> --- gcc/config/i386/i386.md.jj  2017-10-18 14:01:33.000000000 +0200
> +++ gcc/config/i386/i386.md     2017-10-18 14:38:04.623063038 +0200
> @@ -12300,6 +12300,34 @@ (define_peephole2
>    ix86_expand_clear (operands[3]);
>  })
>
> +(define_peephole2
> +  [(set (reg FLAGS_REG) (match_operand 0))
> +   (parallel [(set (reg FLAGS_REG) (match_operand 1))
> +             (match_operand 5)])
> +   (set (match_operand:QI 2 "register_operand")
> +       (match_operator:QI 3 "ix86_comparison_operator"
> +         [(reg FLAGS_REG) (const_int 0)]))
> +   (set (match_operand 4 "any_QIreg_operand")
> +       (zero_extend (match_dup 2)))]
> +  "(peep2_reg_dead_p (4, operands[2])
> +    || operands_match_p (operands[2], operands[4]))
> +   && ! reg_overlap_mentioned_p (operands[4], operands[0])
> +   && ! reg_overlap_mentioned_p (operands[4], operands[1])
> +   && ! reg_set_p (operands[4], operands[5])
> +   && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
> +   && peep2_regno_dead_p (0, FLAGS_REG)"
> +  [(set (match_dup 6) (match_dup 0))
> +   (parallel [(set (match_dup 7) (match_dup 1))
> +             (match_dup 5)])
> +   (set (strict_low_part (match_dup 8))
> +       (match_dup 3))]
> +{
> +  operands[6] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
> +  operands[7] = gen_rtx_REG (GET_MODE (operands[1]), FLAGS_REG);
> +  operands[8] = gen_lowpart (QImode, operands[4]);
> +  ix86_expand_clear (operands[4]);
> +})
> +
>  ;; Similar, but match zero extend with andsi3.
>
>  (define_peephole2
> @@ -12345,6 +12373,35 @@ (define_peephole2
>    operands[6] = gen_lowpart (QImode, operands[3]);
>    ix86_expand_clear (operands[3]);
>  })
> +
> +(define_peephole2
> +  [(set (reg FLAGS_REG) (match_operand 0))
> +   (parallel [(set (reg FLAGS_REG) (match_operand 1))
> +             (match_operand 5)])
> +   (set (match_operand:QI 2 "register_operand")
> +       (match_operator:QI 3 "ix86_comparison_operator"
> +         [(reg FLAGS_REG) (const_int 0)]))
> +   (parallel [(set (match_operand 4 "any_QIreg_operand")
> +                  (zero_extend (match_dup 2)))
> +             (clobber (reg:CC FLAGS_REG))])]
> +  "(peep2_reg_dead_p (4, operands[2])
> +    || operands_match_p (operands[2], operands[4]))
> +   && ! reg_overlap_mentioned_p (operands[4], operands[0])
> +   && ! reg_overlap_mentioned_p (operands[4], operands[1])
> +   && ! reg_set_p (operands[4], operands[5])
> +   && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
> +   && peep2_regno_dead_p (0, FLAGS_REG)"
> +  [(set (match_dup 6) (match_dup 0))
> +   (parallel [(set (match_dup 7) (match_dup 1))
> +             (match_dup 5)])
> +   (set (strict_low_part (match_dup 8))
> +       (match_dup 3))]
> +{
> +  operands[6] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
> +  operands[7] = gen_rtx_REG (GET_MODE (operands[1]), FLAGS_REG);
> +  operands[8] = gen_lowpart (QImode, operands[4]);
> +  ix86_expand_clear (operands[4]);
> +})
>
>  ;; Call instructions.
>
> --- gcc/testsuite/gcc.target/i386/pr82580.c.jj  2017-10-19 09:08:14.499637398 +0200
> +++ gcc/testsuite/gcc.target/i386/pr82580.c     2017-10-19 10:01:22.148721921 +0200
> @@ -35,4 +35,5 @@ void f21 (S x, S y) { if (x >= y) bar ()
>  void f22 (S x, S y) { if (x < y) bar (); }
>  void f23 (S x, S y) { if (x <= y) bar (); }
>
> -/* { dg-final { scan-assembler-times "sbb" 16 } } */
> +/* { dg-final { scan-assembler-times {\msbb} 16 } } */
> +/* { dg-final { scan-assembler-not {\mmovzb} { target lp64 } } } */
>
>         Jakub
diff mbox series

Patch

--- gcc/config/i386/i386.md.jj	2017-10-18 14:01:33.000000000 +0200
+++ gcc/config/i386/i386.md	2017-10-18 14:38:04.623063038 +0200
@@ -12300,6 +12300,34 @@  (define_peephole2
   ix86_expand_clear (operands[3]);
 })
 
+(define_peephole2
+  [(set (reg FLAGS_REG) (match_operand 0))
+   (parallel [(set (reg FLAGS_REG) (match_operand 1))
+	      (match_operand 5)])
+   (set (match_operand:QI 2 "register_operand")
+	(match_operator:QI 3 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (set (match_operand 4 "any_QIreg_operand")
+	(zero_extend (match_dup 2)))]
+  "(peep2_reg_dead_p (4, operands[2])
+    || operands_match_p (operands[2], operands[4]))
+   && ! reg_overlap_mentioned_p (operands[4], operands[0])
+   && ! reg_overlap_mentioned_p (operands[4], operands[1])
+   && ! reg_set_p (operands[4], operands[5])
+   && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(set (match_dup 6) (match_dup 0))
+   (parallel [(set (match_dup 7) (match_dup 1))
+	      (match_dup 5)])
+   (set (strict_low_part (match_dup 8))
+	(match_dup 3))]
+{
+  operands[6] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+  operands[7] = gen_rtx_REG (GET_MODE (operands[1]), FLAGS_REG);
+  operands[8] = gen_lowpart (QImode, operands[4]);
+  ix86_expand_clear (operands[4]);
+})
+
 ;; Similar, but match zero extend with andsi3.
 
 (define_peephole2
@@ -12345,6 +12373,35 @@  (define_peephole2
   operands[6] = gen_lowpart (QImode, operands[3]);
   ix86_expand_clear (operands[3]);
 })
+
+(define_peephole2
+  [(set (reg FLAGS_REG) (match_operand 0))
+   (parallel [(set (reg FLAGS_REG) (match_operand 1))
+	      (match_operand 5)])
+   (set (match_operand:QI 2 "register_operand")
+	(match_operator:QI 3 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (parallel [(set (match_operand 4 "any_QIreg_operand")
+		   (zero_extend (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "(peep2_reg_dead_p (4, operands[2])
+    || operands_match_p (operands[2], operands[4]))
+   && ! reg_overlap_mentioned_p (operands[4], operands[0])
+   && ! reg_overlap_mentioned_p (operands[4], operands[1])
+   && ! reg_set_p (operands[4], operands[5])
+   && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(set (match_dup 6) (match_dup 0))
+   (parallel [(set (match_dup 7) (match_dup 1))
+	      (match_dup 5)])
+   (set (strict_low_part (match_dup 8))
+	(match_dup 3))]
+{
+  operands[6] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+  operands[7] = gen_rtx_REG (GET_MODE (operands[1]), FLAGS_REG);
+  operands[8] = gen_lowpart (QImode, operands[4]);
+  ix86_expand_clear (operands[4]);
+})
 
 ;; Call instructions.
 
--- gcc/testsuite/gcc.target/i386/pr82580.c.jj	2017-10-19 09:08:14.499637398 +0200
+++ gcc/testsuite/gcc.target/i386/pr82580.c	2017-10-19 10:01:22.148721921 +0200
@@ -35,4 +35,5 @@  void f21 (S x, S y) { if (x >= y) bar ()
 void f22 (S x, S y) { if (x < y) bar (); }
 void f23 (S x, S y) { if (x <= y) bar (); }
 
-/* { dg-final { scan-assembler-times "sbb" 16 } } */
+/* { dg-final { scan-assembler-times {\msbb} 16 } } */
+/* { dg-final { scan-assembler-not {\mmovzb} { target lp64 } } } */