Message ID | 20171019080935.GM14653@tucnak |
---|---|
State | New |
Headers | show |
Series | [i386] Improve double-word comparisons (PR target/82580) | expand |
On Thu, Oct 19, 2017 at 10:09 AM, Jakub Jelinek <jakub@redhat.com> wrote: > Hi! > > With the patch you've checked in yesterday we generate e.g. for f2 > cmpq %rdi, %rdx > sbbq %rsi, %rcx > setb %al > movzbl %al, %eax > This is because the peephole2s we have for setcc + movzbl to xorl + setcc > check that the flags register is dead before the instruction before > setcc (sbbq above), which is not the case, so we can't insert xorl %eax, %eax > before sbbq, as that woiuld clobber flags. But we can emit > xorl %eax, %eax > cmpq %rdi, %rdx > sbbq %rsi, %rcx > setb %al > in this case, move the clearing one insn earlier (of course assuming that > neither the cmpq nor sbbq use or set %rax). > > The following peephole2s do that, bootstrapped/regtested on x86_64-linux and > i686-linux, ok for trunk? > > The reason for only testing no movzb* insns for lp64 is that for -m32 there > are some - the arguments are passed on the stack, so they need to be loaded > from there and %eax is one of the registers it chooses to use. > > 2017-10-19 Jakub Jelinek <jakub@redhat.com> > > PR target/82580 > * config/i386/i386.md (setcc + movzbl to xor + setcc): New peephole2. > (setcc + and to xor + setcc): New peephole2. > > * gcc.target/i386/pr82580.c: Use {\msbb} instead of "sbb" in > scan-assembler-times. Check that there are no movzb* instructions > if lp64. OK. Thanks, Uros. > --- gcc/config/i386/i386.md.jj 2017-10-18 14:01:33.000000000 +0200 > +++ gcc/config/i386/i386.md 2017-10-18 14:38:04.623063038 +0200 > @@ -12300,6 +12300,34 @@ (define_peephole2 > ix86_expand_clear (operands[3]); > }) > > +(define_peephole2 > + [(set (reg FLAGS_REG) (match_operand 0)) > + (parallel [(set (reg FLAGS_REG) (match_operand 1)) > + (match_operand 5)]) > + (set (match_operand:QI 2 "register_operand") > + (match_operator:QI 3 "ix86_comparison_operator" > + [(reg FLAGS_REG) (const_int 0)])) > + (set (match_operand 4 "any_QIreg_operand") > + (zero_extend (match_dup 2)))] > + "(peep2_reg_dead_p (4, operands[2]) > + || operands_match_p (operands[2], operands[4])) > + && ! reg_overlap_mentioned_p (operands[4], operands[0]) > + && ! reg_overlap_mentioned_p (operands[4], operands[1]) > + && ! reg_set_p (operands[4], operands[5]) > + && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL) > + && peep2_regno_dead_p (0, FLAGS_REG)" > + [(set (match_dup 6) (match_dup 0)) > + (parallel [(set (match_dup 7) (match_dup 1)) > + (match_dup 5)]) > + (set (strict_low_part (match_dup 8)) > + (match_dup 3))] > +{ > + operands[6] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); > + operands[7] = gen_rtx_REG (GET_MODE (operands[1]), FLAGS_REG); > + operands[8] = gen_lowpart (QImode, operands[4]); > + ix86_expand_clear (operands[4]); > +}) > + > ;; Similar, but match zero extend with andsi3. > > (define_peephole2 > @@ -12345,6 +12373,35 @@ (define_peephole2 > operands[6] = gen_lowpart (QImode, operands[3]); > ix86_expand_clear (operands[3]); > }) > + > +(define_peephole2 > + [(set (reg FLAGS_REG) (match_operand 0)) > + (parallel [(set (reg FLAGS_REG) (match_operand 1)) > + (match_operand 5)]) > + (set (match_operand:QI 2 "register_operand") > + (match_operator:QI 3 "ix86_comparison_operator" > + [(reg FLAGS_REG) (const_int 0)])) > + (parallel [(set (match_operand 4 "any_QIreg_operand") > + (zero_extend (match_dup 2))) > + (clobber (reg:CC FLAGS_REG))])] > + "(peep2_reg_dead_p (4, operands[2]) > + || operands_match_p (operands[2], operands[4])) > + && ! reg_overlap_mentioned_p (operands[4], operands[0]) > + && ! reg_overlap_mentioned_p (operands[4], operands[1]) > + && ! reg_set_p (operands[4], operands[5]) > + && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL) > + && peep2_regno_dead_p (0, FLAGS_REG)" > + [(set (match_dup 6) (match_dup 0)) > + (parallel [(set (match_dup 7) (match_dup 1)) > + (match_dup 5)]) > + (set (strict_low_part (match_dup 8)) > + (match_dup 3))] > +{ > + operands[6] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); > + operands[7] = gen_rtx_REG (GET_MODE (operands[1]), FLAGS_REG); > + operands[8] = gen_lowpart (QImode, operands[4]); > + ix86_expand_clear (operands[4]); > +}) > > ;; Call instructions. > > --- gcc/testsuite/gcc.target/i386/pr82580.c.jj 2017-10-19 09:08:14.499637398 +0200 > +++ gcc/testsuite/gcc.target/i386/pr82580.c 2017-10-19 10:01:22.148721921 +0200 > @@ -35,4 +35,5 @@ void f21 (S x, S y) { if (x >= y) bar () > void f22 (S x, S y) { if (x < y) bar (); } > void f23 (S x, S y) { if (x <= y) bar (); } > > -/* { dg-final { scan-assembler-times "sbb" 16 } } */ > +/* { dg-final { scan-assembler-times {\msbb} 16 } } */ > +/* { dg-final { scan-assembler-not {\mmovzb} { target lp64 } } } */ > > Jakub
--- gcc/config/i386/i386.md.jj 2017-10-18 14:01:33.000000000 +0200 +++ gcc/config/i386/i386.md 2017-10-18 14:38:04.623063038 +0200 @@ -12300,6 +12300,34 @@ (define_peephole2 ix86_expand_clear (operands[3]); }) +(define_peephole2 + [(set (reg FLAGS_REG) (match_operand 0)) + (parallel [(set (reg FLAGS_REG) (match_operand 1)) + (match_operand 5)]) + (set (match_operand:QI 2 "register_operand") + (match_operator:QI 3 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (set (match_operand 4 "any_QIreg_operand") + (zero_extend (match_dup 2)))] + "(peep2_reg_dead_p (4, operands[2]) + || operands_match_p (operands[2], operands[4])) + && ! reg_overlap_mentioned_p (operands[4], operands[0]) + && ! reg_overlap_mentioned_p (operands[4], operands[1]) + && ! reg_set_p (operands[4], operands[5]) + && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(set (match_dup 6) (match_dup 0)) + (parallel [(set (match_dup 7) (match_dup 1)) + (match_dup 5)]) + (set (strict_low_part (match_dup 8)) + (match_dup 3))] +{ + operands[6] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[7] = gen_rtx_REG (GET_MODE (operands[1]), FLAGS_REG); + operands[8] = gen_lowpart (QImode, operands[4]); + ix86_expand_clear (operands[4]); +}) + ;; Similar, but match zero extend with andsi3. (define_peephole2 @@ -12345,6 +12373,35 @@ (define_peephole2 operands[6] = gen_lowpart (QImode, operands[3]); ix86_expand_clear (operands[3]); }) + +(define_peephole2 + [(set (reg FLAGS_REG) (match_operand 0)) + (parallel [(set (reg FLAGS_REG) (match_operand 1)) + (match_operand 5)]) + (set (match_operand:QI 2 "register_operand") + (match_operator:QI 3 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (parallel [(set (match_operand 4 "any_QIreg_operand") + (zero_extend (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "(peep2_reg_dead_p (4, operands[2]) + || operands_match_p (operands[2], operands[4])) + && ! reg_overlap_mentioned_p (operands[4], operands[0]) + && ! reg_overlap_mentioned_p (operands[4], operands[1]) + && ! reg_set_p (operands[4], operands[5]) + && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(set (match_dup 6) (match_dup 0)) + (parallel [(set (match_dup 7) (match_dup 1)) + (match_dup 5)]) + (set (strict_low_part (match_dup 8)) + (match_dup 3))] +{ + operands[6] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[7] = gen_rtx_REG (GET_MODE (operands[1]), FLAGS_REG); + operands[8] = gen_lowpart (QImode, operands[4]); + ix86_expand_clear (operands[4]); +}) ;; Call instructions. --- gcc/testsuite/gcc.target/i386/pr82580.c.jj 2017-10-19 09:08:14.499637398 +0200 +++ gcc/testsuite/gcc.target/i386/pr82580.c 2017-10-19 10:01:22.148721921 +0200 @@ -35,4 +35,5 @@ void f21 (S x, S y) { if (x >= y) bar () void f22 (S x, S y) { if (x < y) bar (); } void f23 (S x, S y) { if (x <= y) bar (); } -/* { dg-final { scan-assembler-times "sbb" 16 } } */ +/* { dg-final { scan-assembler-times {\msbb} 16 } } */ +/* { dg-final { scan-assembler-not {\mmovzb} { target lp64 } } } */