diff mbox series

i386: Improve [QH]Imode rotates with masked shift count [PR99405]

Message ID 20210305204021.GC1837485@tucnak
State New
Headers show
Series i386: Improve [QH]Imode rotates with masked shift count [PR99405] | expand

Commit Message

Jakub Jelinek March 5, 2021, 8:40 p.m. UTC
Hi!

The following testcase shows that while we nicely optimize away the
useless and? of shift count before rotation for [SD]Imode rotates,
we don't do that for [QH]Imode.

The following patch optimizes that by using the right iterator on those
4 patterns.

Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk?
Or just GCC12?

2021-03-05  Jakub Jelinek  <jakub@redhat.com>

	PR target/99405
	* config/i386/i386.md (*<insn><mode>3_mask, *<insn><mode>3_mask_1):
	For any_rotate define_insn_split and following splitters, use
	SWI iterator instead of SWI48.

	* gcc.target/i386/pr99405.c: New test.


	Jakub

Comments

Uros Bizjak March 6, 2021, 10:07 a.m. UTC | #1
On Fri, Mar 5, 2021 at 9:40 PM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> The following testcase shows that while we nicely optimize away the
> useless and? of shift count before rotation for [SD]Imode rotates,
> we don't do that for [QH]Imode.
>
> The following patch optimizes that by using the right iterator on those
> 4 patterns.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk?
> Or just GCC12?
>
> 2021-03-05  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/99405
>         * config/i386/i386.md (*<insn><mode>3_mask, *<insn><mode>3_mask_1):
>         For any_rotate define_insn_split and following splitters, use
>         SWI iterator instead of SWI48.
>
>         * gcc.target/i386/pr99405.c: New test.

I'm not sure I remember why I left out QI and HImode from rotates (and
shifts) when these patterns were introduced, but the testcase shows
that they are effective. Since this is not a regression, OK for gcc-12

Thanks,
Uros.

> --- gcc/config/i386/i386.md.jj  2021-03-03 10:02:27.871589603 +0100
> +++ gcc/config/i386/i386.md     2021-03-05 14:00:35.768378973 +0100
> @@ -11951,9 +11951,9 @@ (define_expand "<insn><mode>3"
>
>  ;; Avoid useless masking of count operand.
>  (define_insn_and_split "*<insn><mode>3_mask"
> -  [(set (match_operand:SWI48 0 "nonimmediate_operand")
> -       (any_rotate:SWI48
> -         (match_operand:SWI48 1 "nonimmediate_operand")
> +  [(set (match_operand:SWI 0 "nonimmediate_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "nonimmediate_operand")
>           (subreg:QI
>             (and:SI
>               (match_operand:SI 2 "register_operand" "c")
> @@ -11967,15 +11967,15 @@ (define_insn_and_split "*<insn><mode>3_m
>    "&& 1"
>    [(parallel
>       [(set (match_dup 0)
> -          (any_rotate:SWI48 (match_dup 1)
> -                            (match_dup 2)))
> +          (any_rotate:SWI (match_dup 1)
> +                          (match_dup 2)))
>        (clobber (reg:CC FLAGS_REG))])]
>    "operands[2] = gen_lowpart (QImode, operands[2]);")
>
>  (define_split
> -  [(set (match_operand:SWI48 0 "register_operand")
> -       (any_rotate:SWI48
> -         (match_operand:SWI48 1 "const_int_operand")
> +  [(set (match_operand:SWI 0 "register_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "const_int_operand")
>           (subreg:QI
>             (and:SI
>               (match_operand:SI 2 "register_operand")
> @@ -11984,14 +11984,14 @@ (define_split
>     == GET_MODE_BITSIZE (<MODE>mode) - 1"
>   [(set (match_dup 4) (match_dup 1))
>    (set (match_dup 0)
> -       (any_rotate:SWI48 (match_dup 4)
> -                        (subreg:QI (match_dup 2) 0)))]
> +       (any_rotate:SWI (match_dup 4)
> +                      (subreg:QI (match_dup 2) 0)))]
>   "operands[4] = gen_reg_rtx (<MODE>mode);")
>
>  (define_insn_and_split "*<insn><mode>3_mask_1"
> -  [(set (match_operand:SWI48 0 "nonimmediate_operand")
> -       (any_rotate:SWI48
> -         (match_operand:SWI48 1 "nonimmediate_operand")
> +  [(set (match_operand:SWI 0 "nonimmediate_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "nonimmediate_operand")
>           (and:QI
>             (match_operand:QI 2 "register_operand" "c")
>             (match_operand:QI 3 "const_int_operand"))))
> @@ -12004,14 +12004,14 @@ (define_insn_and_split "*<insn><mode>3_m
>    "&& 1"
>    [(parallel
>       [(set (match_dup 0)
> -          (any_rotate:SWI48 (match_dup 1)
> -                            (match_dup 2)))
> +          (any_rotate:SWI (match_dup 1)
> +                          (match_dup 2)))
>        (clobber (reg:CC FLAGS_REG))])])
>
>  (define_split
> -  [(set (match_operand:SWI48 0 "register_operand")
> -       (any_rotate:SWI48
> -         (match_operand:SWI48 1 "const_int_operand")
> +  [(set (match_operand:SWI 0 "register_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "const_int_operand")
>           (and:QI
>             (match_operand:QI 2 "register_operand")
>             (match_operand:QI 3 "const_int_operand"))))]
> @@ -12019,7 +12019,7 @@ (define_split
>    == GET_MODE_BITSIZE (<MODE>mode) - 1"
>   [(set (match_dup 4) (match_dup 1))
>    (set (match_dup 0)
> -       (any_rotate:SWI48 (match_dup 4) (match_dup 2)))]
> +       (any_rotate:SWI (match_dup 4) (match_dup 2)))]
>   "operands[4] = gen_reg_rtx (<MODE>mode);")
>
>  ;; Implement rotation using two double-precision
> --- gcc/testsuite/gcc.target/i386/pr99405.c.jj  2021-03-05 13:40:20.334860937 +0100
> +++ gcc/testsuite/gcc.target/i386/pr99405.c     2021-03-05 13:39:51.131185009 +0100
> @@ -0,0 +1,23 @@
> +/* PR target/99405 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune=generic -fomit-frame-pointer" } */
> +/* { dg-final { scan-assembler-not "\tand\[bl]\t\\\$" } } */
> +
> +unsigned char f1 (unsigned char x, unsigned y) { return (x << (y & 7)) | (x >> (-y & 7)); }
> +unsigned short f2 (unsigned short x, unsigned y) { return (x << (y & 15)) | (x >> (-y & 15)); }
> +unsigned int f3 (unsigned int x, unsigned y) { return (x << (y & 31)) | (x >> (-y & 31)); }
> +unsigned char f4 (unsigned char x, unsigned y) { return (x >> (y & 7)) | (x << (-y & 7)); }
> +unsigned short f5 (unsigned short x, unsigned y) { return (x >> (y & 15)) | (x << (-y & 15)); }
> +unsigned int f6 (unsigned int x, unsigned y) { return (x >> (y & 31)) | (x << (-y & 31)); }
> +unsigned char f7 (unsigned char x, unsigned char y) { unsigned char v = y & 7; unsigned char w = -y & 7; return (x << v) | (x >> w); }
> +unsigned short f8 (unsigned short x, unsigned char y) { unsigned char v = y & 15; unsigned char w = -y & 15; return (x << v) | (x >> w); }
> +unsigned int f9 (unsigned int x, unsigned char y) { unsigned char v = y & 31; unsigned char w = -y & 31; return (x << v) | (x >> w); }
> +unsigned char f10 (unsigned char x, unsigned char y) { unsigned char v = y & 7; unsigned char w = -y & 7; return (x >> v) | (x << w); }
> +unsigned short f11 (unsigned short x, unsigned char y) { unsigned char v = y & 15; unsigned char w = -y & 15; return (x >> v) | (x << w); }
> +unsigned int f12 (unsigned int x, unsigned char y) { unsigned char v = y & 31; unsigned char w = -y & 31; return (x >> v) | (x << w); }
> +#ifdef __x86_64__
> +unsigned long long f13 (unsigned long long x, unsigned y) { return (x << (y & 63)) | (x >> (-y & 63)); }
> +unsigned long long f14 (unsigned long long x, unsigned y) { return (x >> (y & 63)) | (x << (-y & 63)); }
> +unsigned long long f15 (unsigned long long x, unsigned char y) { unsigned char v = y & 63; unsigned char w = -y & 63; return (x << v) | (x >> w); }
> +unsigned long long f16 (unsigned long long x, unsigned char y) { unsigned char v = y & 63; unsigned char w = -y & 63; return (x >> v) | (x << w); }
> +#endif
>
>         Jakub
>
diff mbox series

Patch

--- gcc/config/i386/i386.md.jj	2021-03-03 10:02:27.871589603 +0100
+++ gcc/config/i386/i386.md	2021-03-05 14:00:35.768378973 +0100
@@ -11951,9 +11951,9 @@  (define_expand "<insn><mode>3"
 
 ;; Avoid useless masking of count operand.
 (define_insn_and_split "*<insn><mode>3_mask"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand")
-	(any_rotate:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand")
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand")
 	  (subreg:QI
 	    (and:SI
 	      (match_operand:SI 2 "register_operand" "c")
@@ -11967,15 +11967,15 @@  (define_insn_and_split "*<insn><mode>3_m
   "&& 1"
   [(parallel
      [(set (match_dup 0)
-	   (any_rotate:SWI48 (match_dup 1)
-			     (match_dup 2)))
+	   (any_rotate:SWI (match_dup 1)
+			   (match_dup 2)))
       (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (QImode, operands[2]);")
 
 (define_split
-  [(set (match_operand:SWI48 0 "register_operand")
-	(any_rotate:SWI48
-	  (match_operand:SWI48 1 "const_int_operand")
+  [(set (match_operand:SWI 0 "register_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "const_int_operand")
 	  (subreg:QI
 	    (and:SI
 	      (match_operand:SI 2 "register_operand")
@@ -11984,14 +11984,14 @@  (define_split
    == GET_MODE_BITSIZE (<MODE>mode) - 1"
  [(set (match_dup 4) (match_dup 1))
   (set (match_dup 0)
-       (any_rotate:SWI48 (match_dup 4)
-			 (subreg:QI (match_dup 2) 0)))]
+       (any_rotate:SWI (match_dup 4)
+		       (subreg:QI (match_dup 2) 0)))]
  "operands[4] = gen_reg_rtx (<MODE>mode);")
 
 (define_insn_and_split "*<insn><mode>3_mask_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand")
-	(any_rotate:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand")
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand")
 	  (and:QI
 	    (match_operand:QI 2 "register_operand" "c")
 	    (match_operand:QI 3 "const_int_operand"))))
@@ -12004,14 +12004,14 @@  (define_insn_and_split "*<insn><mode>3_m
   "&& 1"
   [(parallel
      [(set (match_dup 0)
-	   (any_rotate:SWI48 (match_dup 1)
-			     (match_dup 2)))
+	   (any_rotate:SWI (match_dup 1)
+			   (match_dup 2)))
       (clobber (reg:CC FLAGS_REG))])])
 
 (define_split
-  [(set (match_operand:SWI48 0 "register_operand")
-	(any_rotate:SWI48
-	  (match_operand:SWI48 1 "const_int_operand")
+  [(set (match_operand:SWI 0 "register_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "const_int_operand")
 	  (and:QI
 	    (match_operand:QI 2 "register_operand")
 	    (match_operand:QI 3 "const_int_operand"))))]
@@ -12019,7 +12019,7 @@  (define_split
   == GET_MODE_BITSIZE (<MODE>mode) - 1"
  [(set (match_dup 4) (match_dup 1))
   (set (match_dup 0)
-       (any_rotate:SWI48 (match_dup 4) (match_dup 2)))]
+       (any_rotate:SWI (match_dup 4) (match_dup 2)))]
  "operands[4] = gen_reg_rtx (<MODE>mode);")
 
 ;; Implement rotation using two double-precision
--- gcc/testsuite/gcc.target/i386/pr99405.c.jj	2021-03-05 13:40:20.334860937 +0100
+++ gcc/testsuite/gcc.target/i386/pr99405.c	2021-03-05 13:39:51.131185009 +0100
@@ -0,0 +1,23 @@ 
+/* PR target/99405 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -fomit-frame-pointer" } */
+/* { dg-final { scan-assembler-not "\tand\[bl]\t\\\$" } } */
+
+unsigned char f1 (unsigned char x, unsigned y) { return (x << (y & 7)) | (x >> (-y & 7)); }
+unsigned short f2 (unsigned short x, unsigned y) { return (x << (y & 15)) | (x >> (-y & 15)); }
+unsigned int f3 (unsigned int x, unsigned y) { return (x << (y & 31)) | (x >> (-y & 31)); }
+unsigned char f4 (unsigned char x, unsigned y) { return (x >> (y & 7)) | (x << (-y & 7)); }
+unsigned short f5 (unsigned short x, unsigned y) { return (x >> (y & 15)) | (x << (-y & 15)); }
+unsigned int f6 (unsigned int x, unsigned y) { return (x >> (y & 31)) | (x << (-y & 31)); }
+unsigned char f7 (unsigned char x, unsigned char y) { unsigned char v = y & 7; unsigned char w = -y & 7; return (x << v) | (x >> w); }
+unsigned short f8 (unsigned short x, unsigned char y) { unsigned char v = y & 15; unsigned char w = -y & 15; return (x << v) | (x >> w); }
+unsigned int f9 (unsigned int x, unsigned char y) { unsigned char v = y & 31; unsigned char w = -y & 31; return (x << v) | (x >> w); }
+unsigned char f10 (unsigned char x, unsigned char y) { unsigned char v = y & 7; unsigned char w = -y & 7; return (x >> v) | (x << w); }
+unsigned short f11 (unsigned short x, unsigned char y) { unsigned char v = y & 15; unsigned char w = -y & 15; return (x >> v) | (x << w); }
+unsigned int f12 (unsigned int x, unsigned char y) { unsigned char v = y & 31; unsigned char w = -y & 31; return (x >> v) | (x << w); }
+#ifdef __x86_64__
+unsigned long long f13 (unsigned long long x, unsigned y) { return (x << (y & 63)) | (x >> (-y & 63)); }
+unsigned long long f14 (unsigned long long x, unsigned y) { return (x >> (y & 63)) | (x << (-y & 63)); }
+unsigned long long f15 (unsigned long long x, unsigned char y) { unsigned char v = y & 63; unsigned char w = -y & 63; return (x << v) | (x >> w); }
+unsigned long long f16 (unsigned long long x, unsigned char y) { unsigned char v = y & 63; unsigned char w = -y & 63; return (x >> v) | (x << w); }
+#endif