diff mbox series

[v2] match.pd: Optimize ffs of known non-zero arg into ctz + 1 [PR94956]

Message ID 20200507142545.GR8462@tucnak
State New
Headers show
Series [v2] match.pd: Optimize ffs of known non-zero arg into ctz + 1 [PR94956] | expand

Commit Message

Jakub Jelinek May 7, 2020, 2:25 p.m. UTC
On Thu, May 07, 2020 at 10:04:35AM +0200, Richard Biener wrote:
> On Thu, 7 May 2020, Jakub Jelinek wrote:
> > The ffs expanders on several targets (x86, ia64, aarch64 at least)
> > emit a conditional move or similar code to handle the case when the
> > argument is 0, which makes the code longer.
> > If we know from VRP that the argument will not be zero, we can (if the
> > target has also an ctz expander) just use ctz which is undefined at zero
> > and thus the expander doesn't need to deal with that.
> > 
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> can you use direct_internal_fn_supported_p (IFN_CTZ, type, 
> OPTIMIZE_FOR_SPEED)?

Only if it is guarded with #if GIMPLE (because otherwise the fn
isn't declared).
Though, restricting this to GIMPLE seems like a good idea anyway to me.

Ok for trunk if it passes bootstrap/regtest?

2020-05-07  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/94956
	* match.pd (FFS): Optimize __builtin_ffs* of non-zero argument into
	__builtin_ctz* + 1 if direct IFN_CTZ is supported.

	* gcc.target/i386/pr94956.c: New test.



	Jakub

Comments

Richard Biener May 7, 2020, 3:37 p.m. UTC | #1
On May 7, 2020 4:25:45 PM GMT+02:00, Jakub Jelinek <jakub@redhat.com> wrote:
>On Thu, May 07, 2020 at 10:04:35AM +0200, Richard Biener wrote:
>> On Thu, 7 May 2020, Jakub Jelinek wrote:
>> > The ffs expanders on several targets (x86, ia64, aarch64 at least)
>> > emit a conditional move or similar code to handle the case when the
>> > argument is 0, which makes the code longer.
>> > If we know from VRP that the argument will not be zero, we can (if
>the
>> > target has also an ctz expander) just use ctz which is undefined at
>zero
>> > and thus the expander doesn't need to deal with that.
>> > 
>> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for
>trunk?
>> 
>> can you use direct_internal_fn_supported_p (IFN_CTZ, type, 
>> OPTIMIZE_FOR_SPEED)?
>
>Only if it is guarded with #if GIMPLE (because otherwise the fn
>isn't declared).
>Though, restricting this to GIMPLE seems like a good idea anyway to me.
>
>Ok for trunk if it passes bootstrap/regtest?

OK. 

Richard. 

>2020-05-07  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/94956
>	* match.pd (FFS): Optimize __builtin_ffs* of non-zero argument into
>	__builtin_ctz* + 1 if direct IFN_CTZ is supported.
>
>	* gcc.target/i386/pr94956.c: New test.
>
>--- gcc/match.pd.jj	2020-05-06 15:03:51.618058839 +0200
>+++ gcc/match.pd	2020-05-07 16:16:48.466970168 +0200
>@@ -5986,6 +5986,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> 	&& direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> 					   OPTIMIZE_FOR_BOTH))
>     (convert (IFN_POPCOUNT:type @0)))))
>+
>+/* __builtin_ffs needs to deal on many targets with the possible zero
>+   argument.  If we know the argument is always non-zero,
>__builtin_ctz + 1
>+   should lead to better code.  */
>+(simplify
>+ (FFS tree_expr_nonzero_p@0)
>+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
>+      && direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (@0),
>+					 OPTIMIZE_FOR_SPEED))
>+  (plus (CTZ:type @0) { build_one_cst (type); })))
> #endif
> 
> /* Simplify:
>--- gcc/testsuite/gcc.target/i386/pr94956.c.jj	2020-05-06
>16:35:47.085876237 +0200
>+++ gcc/testsuite/gcc.target/i386/pr94956.c	2020-05-06
>16:39:52.927140038 +0200
>@@ -0,0 +1,28 @@
>+/* PR tree-optimization/94956 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2" } */
>+/* { dg-final { scan-assembler-not "\tcmovne\t" } } */
>+/* { dg-final { scan-assembler-not "\tsete\t" } } */
>+
>+int
>+foo (unsigned x)
>+{
>+  if (x == 0) __builtin_unreachable ();
>+  return __builtin_ffs (x) - 1;
>+}
>+
>+int
>+bar (unsigned long x)
>+{
>+  if (x == 0) __builtin_unreachable ();
>+  return __builtin_ffsl (x) - 1;
>+}
>+
>+#ifdef __x86_64__
>+int
>+baz (unsigned long long x)
>+{
>+  if (x == 0) __builtin_unreachable ();
>+  return __builtin_ffsll (x) - 1;
>+}
>+#endif
>
>
>	Jakub
diff mbox series

Patch

--- gcc/match.pd.jj	2020-05-06 15:03:51.618058839 +0200
+++ gcc/match.pd	2020-05-07 16:16:48.466970168 +0200
@@ -5986,6 +5986,16 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	&& direct_internal_fn_supported_p (IFN_POPCOUNT, type,
 					   OPTIMIZE_FOR_BOTH))
     (convert (IFN_POPCOUNT:type @0)))))
+
+/* __builtin_ffs needs to deal on many targets with the possible zero
+   argument.  If we know the argument is always non-zero, __builtin_ctz + 1
+   should lead to better code.  */
+(simplify
+ (FFS tree_expr_nonzero_p@0)
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+      && direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (@0),
+					 OPTIMIZE_FOR_SPEED))
+  (plus (CTZ:type @0) { build_one_cst (type); })))
 #endif
 
 /* Simplify:
--- gcc/testsuite/gcc.target/i386/pr94956.c.jj	2020-05-06 16:35:47.085876237 +0200
+++ gcc/testsuite/gcc.target/i386/pr94956.c	2020-05-06 16:39:52.927140038 +0200
@@ -0,0 +1,28 @@ 
+/* PR tree-optimization/94956 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "\tcmovne\t" } } */
+/* { dg-final { scan-assembler-not "\tsete\t" } } */
+
+int
+foo (unsigned x)
+{
+  if (x == 0) __builtin_unreachable ();
+  return __builtin_ffs (x) - 1;
+}
+
+int
+bar (unsigned long x)
+{
+  if (x == 0) __builtin_unreachable ();
+  return __builtin_ffsl (x) - 1;
+}
+
+#ifdef __x86_64__
+int
+baz (unsigned long long x)
+{
+  if (x == 0) __builtin_unreachable ();
+  return __builtin_ffsll (x) - 1;
+}
+#endif