diff mbox

Fold __builtin_ia32_[tl]zcnt_u{16,32,64} (PR target/78057)

Message ID 20161021152339.GK7282@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Oct. 21, 2016, 3:23 p.m. UTC
Hi!

This patch adds folding for the new ia32 md builtins.
If they can be folded into constant, it is done in ix86_fold_builtin,
if they can fold to corresponding generic __builtin_c[lt]z* (which have
e.g. the advantage that VRP knows about what values it can have etc.),
it is done in gimple_fold_builtin target hook.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-10-21  Jakub Jelinek  <jakub@redhat.com>

	PR target/78057
	* config/i386/i386.c: Include fold-const-call.h, tree-vrp.h
	and tree-ssanames.h.
	(ix86_fold_builtin): Fold IX86_BUILTIN_[LT]ZCNT{16,32,64}
	with INTEGER_CST argument.
	(ix86_gimple_fold_builtin): New function.
	(TARGET_GIMPLE_FOLD_BUILTIN): Define.

	* gcc.target/i386/pr78057.c: New test.

+/* { dg-final { scan-tree-dump-times "__builtin_clzll " 1 "optimized" { target lp64 } } } */

	Jakub

Comments

Uros Bizjak Oct. 21, 2016, 3:28 p.m. UTC | #1
On Fri, Oct 21, 2016 at 5:23 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> This patch adds folding for the new ia32 md builtins.
> If they can be folded into constant, it is done in ix86_fold_builtin,
> if they can fold to corresponding generic __builtin_c[lt]z* (which have
> e.g. the advantage that VRP knows about what values it can have etc.),
> it is done in gimple_fold_builtin target hook.

Are you sure that there is no way zero will be passed to generic
__builtin_c[lt]z?

Uros.
Jakub Jelinek Oct. 21, 2016, 3:31 p.m. UTC | #2
On Fri, Oct 21, 2016 at 05:28:42PM +0200, Uros Bizjak wrote:
> On Fri, Oct 21, 2016 at 5:23 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> > Hi!
> >
> > This patch adds folding for the new ia32 md builtins.
> > If they can be folded into constant, it is done in ix86_fold_builtin,
> > if they can fold to corresponding generic __builtin_c[lt]z* (which have
> > e.g. the advantage that VRP knows about what values it can have etc.),
> > it is done in gimple_fold_builtin target hook.
> 
> Are you sure that there is no way zero will be passed to generic
> __builtin_c[lt]z?

The patch only folds the ia32 specific builtins into __builtin_c[lt]z, if
the argument is known not to be 0 (from VRP).
That is the expr_not_equal_to call, which uses get_range_info under the
hood.

	Jakub
Uros Bizjak Oct. 21, 2016, 3:33 p.m. UTC | #3
On Fri, Oct 21, 2016 at 5:31 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Fri, Oct 21, 2016 at 05:28:42PM +0200, Uros Bizjak wrote:
>> On Fri, Oct 21, 2016 at 5:23 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>> > Hi!
>> >
>> > This patch adds folding for the new ia32 md builtins.
>> > If they can be folded into constant, it is done in ix86_fold_builtin,
>> > if they can fold to corresponding generic __builtin_c[lt]z* (which have
>> > e.g. the advantage that VRP knows about what values it can have etc.),
>> > it is done in gimple_fold_builtin target hook.
>>
>> Are you sure that there is no way zero will be passed to generic
>> __builtin_c[lt]z?
>
> The patch only folds the ia32 specific builtins into __builtin_c[lt]z, if
> the argument is known not to be 0 (from VRP).
> That is the expr_not_equal_to call, which uses get_range_info under the
> hood.

I was expecting this answer ;)

Thanks, the patch is OK.

(I'll backport this and my patch to gcc-6 early next week).

Uros.
diff mbox

Patch

--- gcc/config/i386/i386.c.jj	2016-10-21 11:36:33.135677698 +0200
+++ gcc/config/i386/i386.c	2016-10-21 11:57:58.248530521 +0200
@@ -77,6 +77,9 @@  along with GCC; see the file COPYING3.
 #include "case-cfn-macros.h"
 #include "regrename.h"
 #include "dojump.h"
+#include "fold-const-call.h"
+#include "tree-vrp.h"
+#include "tree-ssanames.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -33332,6 +33335,40 @@  ix86_fold_builtin (tree fndecl, int n_ar
 	    return build_real (type, inf);
 	  }
 
+	case IX86_BUILTIN_TZCNT16:
+	case IX86_BUILTIN_TZCNT32:
+	case IX86_BUILTIN_TZCNT64:
+	  gcc_assert (n_args == 1);
+	  if (TREE_CODE (args[0]) == INTEGER_CST)
+	    {
+	      tree type = TREE_TYPE (TREE_TYPE (fndecl));
+	      tree arg = args[0];
+	      if (fn_code == IX86_BUILTIN_TZCNT16)
+		arg = fold_convert (short_unsigned_type_node, arg);
+	      if (integer_zerop (arg))
+		return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
+	      else
+		return fold_const_call (CFN_CTZ, type, arg);
+	    }
+	  break;
+
+	case IX86_BUILTIN_LZCNT16:
+	case IX86_BUILTIN_LZCNT32:
+	case IX86_BUILTIN_LZCNT64:
+	  gcc_assert (n_args == 1);
+	  if (TREE_CODE (args[0]) == INTEGER_CST)
+	    {
+	      tree type = TREE_TYPE (TREE_TYPE (fndecl));
+	      tree arg = args[0];
+	      if (fn_code == IX86_BUILTIN_LZCNT16)
+		arg = fold_convert (short_unsigned_type_node, arg);
+	      if (integer_zerop (arg))
+		return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
+	      else
+		return fold_const_call (CFN_CLZ, type, arg);
+	    }
+	  break;
+
 	default:
 	  break;
 	}
@@ -33344,6 +33381,67 @@  ix86_fold_builtin (tree fndecl, int n_ar
   return NULL_TREE;
 }
 
+/* Fold a MD builtin (use ix86_fold_builtin for folding into
+   constant) in GIMPLE.  */
+
+bool
+ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
+{
+  gimple *stmt = gsi_stmt (*gsi);
+  tree fndecl = gimple_call_fndecl (stmt);
+  gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
+  int n_args = gimple_call_num_args (stmt);
+  enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
+  tree decl = NULL_TREE;
+  tree arg0;
+
+  switch (fn_code)
+    {
+    case IX86_BUILTIN_TZCNT32:
+      decl = builtin_decl_implicit (BUILT_IN_CTZ);
+      goto fold_tzcnt_lzcnt;
+
+    case IX86_BUILTIN_TZCNT64:
+      decl = builtin_decl_implicit (BUILT_IN_CTZLL);
+      goto fold_tzcnt_lzcnt;
+
+    case IX86_BUILTIN_LZCNT32:
+      decl = builtin_decl_implicit (BUILT_IN_CLZ);
+      goto fold_tzcnt_lzcnt;
+
+    case IX86_BUILTIN_LZCNT64:
+      decl = builtin_decl_implicit (BUILT_IN_CLZLL);
+      goto fold_tzcnt_lzcnt;
+
+    fold_tzcnt_lzcnt:
+      gcc_assert (n_args == 1);
+      arg0 = gimple_call_arg (stmt, 0);
+      if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
+	{
+	  int prec = TYPE_PRECISION (TREE_TYPE (arg0));
+	  if (!expr_not_equal_to (arg0, wi::zero (prec)))
+	    return false;
+
+	  location_t loc = gimple_location (stmt);
+	  gimple *g = gimple_build_call (decl, 1, arg0);
+	  gimple_set_location (g, loc);
+	  tree lhs = make_ssa_name (integer_type_node);
+	  gimple_call_set_lhs (g, lhs);
+	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
+	  g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
+	  gimple_set_location (g, loc);
+	  gsi_replace (gsi, g, true);
+	  return true;
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  return false;
+}
+
 /* Make builtins to detect cpu type and features supported.  NAME is
    the builtin name, CODE is the builtin code, and FTYPE is the function
    type of the builtin.  */
@@ -50531,6 +50629,9 @@  ix86_addr_space_zero_address_valid (addr
 #undef TARGET_FOLD_BUILTIN
 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
 
+#undef TARGET_GIMPLE_FOLD_BUILTIN
+#define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
+
 #undef TARGET_COMPARE_VERSION_PRIORITY
 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
 
--- gcc/testsuite/gcc.target/i386/pr78057.c.jj	2016-10-21 11:57:58.249530508 +0200
+++ gcc/testsuite/gcc.target/i386/pr78057.c	2016-10-21 11:57:58.249530508 +0200
@@ -0,0 +1,42 @@ 
+/* PR target/78057 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi -mlzcnt -fdump-tree-optimized" } */
+
+extern void link_error (void);
+
+int
+foo (int x)
+{
+  if (__builtin_ia32_tzcnt_u16 (16) != 4
+      || __builtin_ia32_tzcnt_u16 (0) != 16
+      || __builtin_ia32_lzcnt_u16 (0x1ff) != 7
+      || __builtin_ia32_lzcnt_u16 (0) != 16
+      || __builtin_ia32_tzcnt_u32 (8) != 3
+      || __builtin_ia32_tzcnt_u32 (0) != 32
+      || __builtin_ia32_lzcnt_u32 (0x3fffffff) != 2
+      || __builtin_ia32_lzcnt_u32 (0) != 32
+#ifdef __x86_64__
+      || __builtin_ia32_tzcnt_u64 (4) != 2
+      || __builtin_ia32_tzcnt_u64 (0) != 64
+      || __builtin_ia32_lzcnt_u64 (0x1fffffff) != 35
+      || __builtin_ia32_lzcnt_u64 (0) != 64
+#endif
+     )
+    link_error ();
+  x += 2;
+  if (x == 0)
+    return 5;
+  return __builtin_ia32_tzcnt_u32 (x)
+         + __builtin_ia32_lzcnt_u32 (x)
+#ifdef __x86_64__
+	 + __builtin_ia32_tzcnt_u64 (x)
+	 + __builtin_ia32_lzcnt_u64 (x)
+#endif
+	 ;
+}
+
+/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_ia32_\[lt]zcnt" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_ctz " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_clz " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_ctzll " 1 "optimized" { target lp64 } } } */