Message ID | 20210305204021.GC1837485@tucnak |
---|---|
State | New |
Headers | show |
Series | i386: Improve [QH]Imode rotates with masked shift count [PR99405] | expand |
On Fri, Mar 5, 2021 at 9:40 PM Jakub Jelinek <jakub@redhat.com> wrote: > > Hi! > > The following testcase shows that while we nicely optimize away the > useless and? of shift count before rotation for [SD]Imode rotates, > we don't do that for [QH]Imode. > > The following patch optimizes that by using the right iterator on those > 4 patterns. > > Bootstrapped/regtested on x86_64-linux and i686-linux. Ok for trunk? > Or just GCC12? > > 2021-03-05 Jakub Jelinek <jakub@redhat.com> > > PR target/99405 > * config/i386/i386.md (*<insn><mode>3_mask, *<insn><mode>3_mask_1): > For any_rotate define_insn_split and following splitters, use > SWI iterator instead of SWI48. > > * gcc.target/i386/pr99405.c: New test. I'm not sure I remember why I left out QI and HImode from rotates (and shifts) when these patterns were introduced, but the testcase shows that they are effective. Since this is not a regression, OK for gcc-12 Thanks, Uros. > --- gcc/config/i386/i386.md.jj 2021-03-03 10:02:27.871589603 +0100 > +++ gcc/config/i386/i386.md 2021-03-05 14:00:35.768378973 +0100 > @@ -11951,9 +11951,9 @@ (define_expand "<insn><mode>3" > > ;; Avoid useless masking of count operand. > (define_insn_and_split "*<insn><mode>3_mask" > - [(set (match_operand:SWI48 0 "nonimmediate_operand") > - (any_rotate:SWI48 > - (match_operand:SWI48 1 "nonimmediate_operand") > + [(set (match_operand:SWI 0 "nonimmediate_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "nonimmediate_operand") > (subreg:QI > (and:SI > (match_operand:SI 2 "register_operand" "c") > @@ -11967,15 +11967,15 @@ (define_insn_and_split "*<insn><mode>3_m > "&& 1" > [(parallel > [(set (match_dup 0) > - (any_rotate:SWI48 (match_dup 1) > - (match_dup 2))) > + (any_rotate:SWI (match_dup 1) > + (match_dup 2))) > (clobber (reg:CC FLAGS_REG))])] > "operands[2] = gen_lowpart (QImode, operands[2]);") > > (define_split > - [(set (match_operand:SWI48 0 "register_operand") > - (any_rotate:SWI48 > - (match_operand:SWI48 1 "const_int_operand") > + [(set (match_operand:SWI 0 "register_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "const_int_operand") > (subreg:QI > (and:SI > (match_operand:SI 2 "register_operand") > @@ -11984,14 +11984,14 @@ (define_split > == GET_MODE_BITSIZE (<MODE>mode) - 1" > [(set (match_dup 4) (match_dup 1)) > (set (match_dup 0) > - (any_rotate:SWI48 (match_dup 4) > - (subreg:QI (match_dup 2) 0)))] > + (any_rotate:SWI (match_dup 4) > + (subreg:QI (match_dup 2) 0)))] > "operands[4] = gen_reg_rtx (<MODE>mode);") > > (define_insn_and_split "*<insn><mode>3_mask_1" > - [(set (match_operand:SWI48 0 "nonimmediate_operand") > - (any_rotate:SWI48 > - (match_operand:SWI48 1 "nonimmediate_operand") > + [(set (match_operand:SWI 0 "nonimmediate_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "nonimmediate_operand") > (and:QI > (match_operand:QI 2 "register_operand" "c") > (match_operand:QI 3 "const_int_operand")))) > @@ -12004,14 +12004,14 @@ (define_insn_and_split "*<insn><mode>3_m > "&& 1" > [(parallel > [(set (match_dup 0) > - (any_rotate:SWI48 (match_dup 1) > - (match_dup 2))) > + (any_rotate:SWI (match_dup 1) > + (match_dup 2))) > (clobber (reg:CC FLAGS_REG))])]) > > (define_split > - [(set (match_operand:SWI48 0 "register_operand") > - (any_rotate:SWI48 > - (match_operand:SWI48 1 "const_int_operand") > + [(set (match_operand:SWI 0 "register_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "const_int_operand") > (and:QI > (match_operand:QI 2 "register_operand") > (match_operand:QI 3 "const_int_operand"))))] > @@ -12019,7 +12019,7 @@ (define_split > == GET_MODE_BITSIZE (<MODE>mode) - 1" > [(set (match_dup 4) (match_dup 1)) > (set (match_dup 0) > - (any_rotate:SWI48 (match_dup 4) (match_dup 2)))] > + (any_rotate:SWI (match_dup 4) (match_dup 2)))] > "operands[4] = gen_reg_rtx (<MODE>mode);") > > ;; Implement rotation using two double-precision > --- gcc/testsuite/gcc.target/i386/pr99405.c.jj 2021-03-05 13:40:20.334860937 +0100 > +++ gcc/testsuite/gcc.target/i386/pr99405.c 2021-03-05 13:39:51.131185009 +0100 > @@ -0,0 +1,23 @@ > +/* PR target/99405 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mtune=generic -fomit-frame-pointer" } */ > +/* { dg-final { scan-assembler-not "\tand\[bl]\t\\\$" } } */ > + > +unsigned char f1 (unsigned char x, unsigned y) { return (x << (y & 7)) | (x >> (-y & 7)); } > +unsigned short f2 (unsigned short x, unsigned y) { return (x << (y & 15)) | (x >> (-y & 15)); } > +unsigned int f3 (unsigned int x, unsigned y) { return (x << (y & 31)) | (x >> (-y & 31)); } > +unsigned char f4 (unsigned char x, unsigned y) { return (x >> (y & 7)) | (x << (-y & 7)); } > +unsigned short f5 (unsigned short x, unsigned y) { return (x >> (y & 15)) | (x << (-y & 15)); } > +unsigned int f6 (unsigned int x, unsigned y) { return (x >> (y & 31)) | (x << (-y & 31)); } > +unsigned char f7 (unsigned char x, unsigned char y) { unsigned char v = y & 7; unsigned char w = -y & 7; return (x << v) | (x >> w); } > +unsigned short f8 (unsigned short x, unsigned char y) { unsigned char v = y & 15; unsigned char w = -y & 15; return (x << v) | (x >> w); } > +unsigned int f9 (unsigned int x, unsigned char y) { unsigned char v = y & 31; unsigned char w = -y & 31; return (x << v) | (x >> w); } > +unsigned char f10 (unsigned char x, unsigned char y) { unsigned char v = y & 7; unsigned char w = -y & 7; return (x >> v) | (x << w); } > +unsigned short f11 (unsigned short x, unsigned char y) { unsigned char v = y & 15; unsigned char w = -y & 15; return (x >> v) | (x << w); } > +unsigned int f12 (unsigned int x, unsigned char y) { unsigned char v = y & 31; unsigned char w = -y & 31; return (x >> v) | (x << w); } > +#ifdef __x86_64__ > +unsigned long long f13 (unsigned long long x, unsigned y) { return (x << (y & 63)) | (x >> (-y & 63)); } > +unsigned long long f14 (unsigned long long x, unsigned y) { return (x >> (y & 63)) | (x << (-y & 63)); } > +unsigned long long f15 (unsigned long long x, unsigned char y) { unsigned char v = y & 63; unsigned char w = -y & 63; return (x << v) | (x >> w); } > +unsigned long long f16 (unsigned long long x, unsigned char y) { unsigned char v = y & 63; unsigned char w = -y & 63; return (x >> v) | (x << w); } > +#endif > > Jakub >
--- gcc/config/i386/i386.md.jj 2021-03-03 10:02:27.871589603 +0100 +++ gcc/config/i386/i386.md 2021-03-05 14:00:35.768378973 +0100 @@ -11951,9 +11951,9 @@ (define_expand "<insn><mode>3" ;; Avoid useless masking of count operand. (define_insn_and_split "*<insn><mode>3_mask" - [(set (match_operand:SWI48 0 "nonimmediate_operand") - (any_rotate:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand") + [(set (match_operand:SWI 0 "nonimmediate_operand") + (any_rotate:SWI + (match_operand:SWI 1 "nonimmediate_operand") (subreg:QI (and:SI (match_operand:SI 2 "register_operand" "c") @@ -11967,15 +11967,15 @@ (define_insn_and_split "*<insn><mode>3_m "&& 1" [(parallel [(set (match_dup 0) - (any_rotate:SWI48 (match_dup 1) - (match_dup 2))) + (any_rotate:SWI (match_dup 1) + (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "operands[2] = gen_lowpart (QImode, operands[2]);") (define_split - [(set (match_operand:SWI48 0 "register_operand") - (any_rotate:SWI48 - (match_operand:SWI48 1 "const_int_operand") + [(set (match_operand:SWI 0 "register_operand") + (any_rotate:SWI + (match_operand:SWI 1 "const_int_operand") (subreg:QI (and:SI (match_operand:SI 2 "register_operand") @@ -11984,14 +11984,14 @@ (define_split == GET_MODE_BITSIZE (<MODE>mode) - 1" [(set (match_dup 4) (match_dup 1)) (set (match_dup 0) - (any_rotate:SWI48 (match_dup 4) - (subreg:QI (match_dup 2) 0)))] + (any_rotate:SWI (match_dup 4) + (subreg:QI (match_dup 2) 0)))] "operands[4] = gen_reg_rtx (<MODE>mode);") (define_insn_and_split "*<insn><mode>3_mask_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand") - (any_rotate:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand") + [(set (match_operand:SWI 0 "nonimmediate_operand") + (any_rotate:SWI + (match_operand:SWI 1 "nonimmediate_operand") (and:QI (match_operand:QI 2 "register_operand" "c") (match_operand:QI 3 "const_int_operand")))) @@ -12004,14 +12004,14 @@ (define_insn_and_split "*<insn><mode>3_m "&& 1" [(parallel [(set (match_dup 0) - (any_rotate:SWI48 (match_dup 1) - (match_dup 2))) + (any_rotate:SWI (match_dup 1) + (match_dup 2))) (clobber (reg:CC FLAGS_REG))])]) (define_split - [(set (match_operand:SWI48 0 "register_operand") - (any_rotate:SWI48 - (match_operand:SWI48 1 "const_int_operand") + [(set (match_operand:SWI 0 "register_operand") + (any_rotate:SWI + (match_operand:SWI 1 "const_int_operand") (and:QI (match_operand:QI 2 "register_operand") (match_operand:QI 3 "const_int_operand"))))] @@ -12019,7 +12019,7 @@ (define_split == GET_MODE_BITSIZE (<MODE>mode) - 1" [(set (match_dup 4) (match_dup 1)) (set (match_dup 0) - (any_rotate:SWI48 (match_dup 4) (match_dup 2)))] + (any_rotate:SWI (match_dup 4) (match_dup 2)))] "operands[4] = gen_reg_rtx (<MODE>mode);") ;; Implement rotation using two double-precision --- gcc/testsuite/gcc.target/i386/pr99405.c.jj 2021-03-05 13:40:20.334860937 +0100 +++ gcc/testsuite/gcc.target/i386/pr99405.c 2021-03-05 13:39:51.131185009 +0100 @@ -0,0 +1,23 @@ +/* PR target/99405 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=generic -fomit-frame-pointer" } */ +/* { dg-final { scan-assembler-not "\tand\[bl]\t\\\$" } } */ + +unsigned char f1 (unsigned char x, unsigned y) { return (x << (y & 7)) | (x >> (-y & 7)); } +unsigned short f2 (unsigned short x, unsigned y) { return (x << (y & 15)) | (x >> (-y & 15)); } +unsigned int f3 (unsigned int x, unsigned y) { return (x << (y & 31)) | (x >> (-y & 31)); } +unsigned char f4 (unsigned char x, unsigned y) { return (x >> (y & 7)) | (x << (-y & 7)); } +unsigned short f5 (unsigned short x, unsigned y) { return (x >> (y & 15)) | (x << (-y & 15)); } +unsigned int f6 (unsigned int x, unsigned y) { return (x >> (y & 31)) | (x << (-y & 31)); } +unsigned char f7 (unsigned char x, unsigned char y) { unsigned char v = y & 7; unsigned char w = -y & 7; return (x << v) | (x >> w); } +unsigned short f8 (unsigned short x, unsigned char y) { unsigned char v = y & 15; unsigned char w = -y & 15; return (x << v) | (x >> w); } +unsigned int f9 (unsigned int x, unsigned char y) { unsigned char v = y & 31; unsigned char w = -y & 31; return (x << v) | (x >> w); } +unsigned char f10 (unsigned char x, unsigned char y) { unsigned char v = y & 7; unsigned char w = -y & 7; return (x >> v) | (x << w); } +unsigned short f11 (unsigned short x, unsigned char y) { unsigned char v = y & 15; unsigned char w = -y & 15; return (x >> v) | (x << w); } +unsigned int f12 (unsigned int x, unsigned char y) { unsigned char v = y & 31; unsigned char w = -y & 31; return (x >> v) | (x << w); } +#ifdef __x86_64__ +unsigned long long f13 (unsigned long long x, unsigned y) { return (x << (y & 63)) | (x >> (-y & 63)); } +unsigned long long f14 (unsigned long long x, unsigned y) { return (x >> (y & 63)) | (x << (-y & 63)); } +unsigned long long f15 (unsigned long long x, unsigned char y) { unsigned char v = y & 63; unsigned char w = -y & 63; return (x << v) | (x >> w); } +unsigned long long f16 (unsigned long long x, unsigned char y) { unsigned char v = y & 63; unsigned char w = -y & 63; return (x >> v) | (x << w); } +#endif