diff mbox series

phiopt: Optimize x ? __builtin_clz (x) : 32 in GIMPLE [PR97503]

Message ID 20201021080705.GW2176@tucnak
State New
Headers show
Series phiopt: Optimize x ? __builtin_clz (x) : 32 in GIMPLE [PR97503] | expand

Commit Message

Jakub Jelinek Oct. 21, 2020, 8:07 a.m. UTC
Hi!

While we have at the RTL level noce_try_ifelse_collapse combined with
simplify_cond_clz_ctz, that optimization doesn't always trigger because
e.g. on powerpc there is an define_insn to compare a reg against zero and
copy that register to another one and so we end up with a different pseudo
in the simplify_cond_clz_ctz test and punt.

For targets that define C?Z_DEFINED_VALUE_AT_ZERO to 2 for certain modes,
we can optimize it already in phiopt though, just need to ensure that
we transform the __builtin_c?z* calls into .C?Z ifns because my recent
VRP changes codified that the builtin calls are always undefined at zero,
while ifns honor C?Z_DEFINED_VALUE_AT_ZERO equal to 2.
And, in phiopt we already have popcount handling that does pretty much the
same thing, except for always using a zero value rather than the one set
by C?Z_DEFINED_VALUE_AT_ZERO.

So, this patch extends that function to handle not just popcount, but also
clz and ctz.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-10-20  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/97503
	* tree-ssa-phiopt.c (cond_removal_in_popcount_pattern): Rename to ...
	(cond_removal_in_popcount_clz_ctz_pattern): ... this.  Handle not just
	popcount, but also clz and ctz if it has C?Z_DEFINED_VALUE_AT_ZERO 2.

	* gcc.dg/tree-ssa/pr97503.c: New test.


	Jakub

Comments

Richard Biener Oct. 21, 2020, 8:30 a.m. UTC | #1
On Wed, 21 Oct 2020, Jakub Jelinek wrote:

> Hi!
> 
> While we have at the RTL level noce_try_ifelse_collapse combined with
> simplify_cond_clz_ctz, that optimization doesn't always trigger because
> e.g. on powerpc there is an define_insn to compare a reg against zero and
> copy that register to another one and so we end up with a different pseudo
> in the simplify_cond_clz_ctz test and punt.
> 
> For targets that define C?Z_DEFINED_VALUE_AT_ZERO to 2 for certain modes,
> we can optimize it already in phiopt though, just need to ensure that
> we transform the __builtin_c?z* calls into .C?Z ifns because my recent
> VRP changes codified that the builtin calls are always undefined at zero,
> while ifns honor C?Z_DEFINED_VALUE_AT_ZERO equal to 2.
> And, in phiopt we already have popcount handling that does pretty much the
> same thing, except for always using a zero value rather than the one set
> by C?Z_DEFINED_VALUE_AT_ZERO.
> 
> So, this patch extends that function to handle not just popcount, but also
> clz and ctz.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2020-10-20  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/97503
> 	* tree-ssa-phiopt.c (cond_removal_in_popcount_pattern): Rename to ...
> 	(cond_removal_in_popcount_clz_ctz_pattern): ... this.  Handle not just
> 	popcount, but also clz and ctz if it has C?Z_DEFINED_VALUE_AT_ZERO 2.
> 
> 	* gcc.dg/tree-ssa/pr97503.c: New test.
> 
> --- gcc/tree-ssa-phiopt.c.jj	2020-07-28 15:39:10.075755306 +0200
> +++ gcc/tree-ssa-phiopt.c	2020-10-20 17:46:16.971329154 +0200
> @@ -61,8 +61,9 @@ static bool minmax_replacement (basic_bl
>  				edge, edge, gimple *, tree, tree);
>  static bool abs_replacement (basic_block, basic_block,
>  			     edge, edge, gimple *, tree, tree);
> -static bool cond_removal_in_popcount_pattern (basic_block, basic_block,
> -					      edge, edge, gimple *, tree, tree);
> +static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
> +						      edge, edge, gimple *,
> +						      tree, tree);
>  static bool cond_store_replacement (basic_block, basic_block, edge, edge,
>  				    hash_set<tree> *);
>  static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
> @@ -344,8 +345,9 @@ tree_ssa_phiopt_worker (bool do_store_el
>  	  else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
>  	    cfgchanged = true;
>  	  else if (!early_p
> -		   && cond_removal_in_popcount_pattern (bb, bb1, e1, e2,
> -							phi, arg0, arg1))
> +		   && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
> +								e2, phi, arg0,
> +								arg1))
>  	    cfgchanged = true;
>  	  else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
>  	    cfgchanged = true;
> @@ -1777,16 +1779,20 @@ minmax_replacement (basic_block cond_bb,
>  
>     <bb 4>
>     c_12 = PHI <_9(2)>
> -*/
> +
> +   Similarly for __builtin_clz or __builtin_ctz if
> +   C?Z_DEFINED_VALUE_AT_ZERO is 2, optab is present and
> +   instead of 0 above it uses the value from that macro.  */
>  
>  static bool
> -cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
> -				  edge e1, edge e2,
> -				  gimple *phi, tree arg0, tree arg1)
> +cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
> +					  basic_block middle_bb,
> +					  edge e1, edge e2, gimple *phi,
> +					  tree arg0, tree arg1)
>  {
>    gimple *cond;
>    gimple_stmt_iterator gsi, gsi_from;
> -  gimple *popcount;
> +  gimple *call;
>    gimple *cast = NULL;
>    tree lhs, arg;
>  
> @@ -1804,35 +1810,65 @@ cond_removal_in_popcount_pattern (basic_
>    gsi_next_nondebug (&gsi);
>    if (!gsi_end_p (gsi))
>      {
> -      popcount = gsi_stmt (gsi);
> +      call = gsi_stmt (gsi);
>        gsi_next_nondebug (&gsi);
>        if (!gsi_end_p (gsi))
>  	return false;
>      }
>    else
>      {
> -      popcount = cast;
> +      call = cast;
>        cast = NULL;
>      }
>  
> -  /* Check that we have a popcount builtin.  */
> -  if (!is_gimple_call (popcount))
> +  /* Check that we have a popcount/clz/ctz builtin.  */
> +  if (!is_gimple_call (call) || gimple_call_num_args (call) != 1)
>      return false;
> -  combined_fn cfn = gimple_call_combined_fn (popcount);
> +
> +  arg = gimple_call_arg (call, 0);
> +  lhs = gimple_get_lhs (call);
> +
> +  if (lhs == NULL_TREE)
> +    return false;
> +
> +  combined_fn cfn = gimple_call_combined_fn (call);
> +  internal_fn ifn = IFN_LAST;
> +  int val = 0;
>    switch (cfn)
>      {
>      CASE_CFN_POPCOUNT:
>        break;
> +    CASE_CFN_CLZ:
> +      if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
> +	{
> +	  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
> +	  if (optab_handler (clz_optab, mode) != CODE_FOR_nothing

I think it's better to use

        direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg),
				        OPTIMIZE_FOR_BOTH);

for these checks nowadays.  Otherwise OK.

Thanks,
Richard.


> +	      && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
> +	    {
> +	      ifn = IFN_CLZ;
> +	      break;
> +	    }
> +	}
> +      return false;
> +    CASE_CFN_CTZ:
> +      if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
> +	{
> +	  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
> +	  if (optab_handler (ctz_optab, mode) != CODE_FOR_nothing
> +	      && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
> +	    {
> +	      ifn = IFN_CTZ;
> +	      break;
> +	    }
> +	}
> +      return false;
>      default:
>        return false;
>      }
>  
> -  arg = gimple_call_arg (popcount, 0);
> -  lhs = gimple_get_lhs (popcount);
> -
>    if (cast)
>      {
> -      /* We have a cast stmt feeding popcount builtin.  */
> +      /* We have a cast stmt feeding popcount/clz/ctz builtin.  */
>        /* Check that we have a cast prior to that.  */
>        if (gimple_code (cast) != GIMPLE_ASSIGN
>  	  || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast)))
> @@ -1845,7 +1881,7 @@ cond_removal_in_popcount_pattern (basic_
>  
>    cond = last_stmt (cond_bb);
>  
> -  /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount
> +  /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount/clz/ctz
>       builtin.  */
>    if (gimple_code (cond) != GIMPLE_COND
>        || (gimple_cond_code (cond) != NE_EXPR
> @@ -1865,10 +1901,13 @@ cond_removal_in_popcount_pattern (basic_
>      }
>  
>    /* Check PHI arguments.  */
> -  if (lhs != arg0 || !integer_zerop (arg1))
> +  if (lhs != arg0
> +      || TREE_CODE (arg1) != INTEGER_CST
> +      || wi::to_wide (arg1) != val)
>      return false;
>  
> -  /* And insert the popcount builtin and cast stmt before the cond_bb.  */
> +  /* And insert the popcount/clz/ctz builtin and cast stmt before the
> +     cond_bb.  */
>    gsi = gsi_last_bb (cond_bb);
>    if (cast)
>      {
> @@ -1876,9 +1915,19 @@ cond_removal_in_popcount_pattern (basic_
>        gsi_move_before (&gsi_from, &gsi);
>        reset_flow_sensitive_info (gimple_get_lhs (cast));
>      }
> -  gsi_from = gsi_for_stmt (popcount);
> -  gsi_move_before (&gsi_from, &gsi);
> -  reset_flow_sensitive_info (gimple_get_lhs (popcount));
> +  gsi_from = gsi_for_stmt (call);
> +  if (ifn == IFN_LAST || gimple_call_internal_p (call))
> +    gsi_move_before (&gsi_from, &gsi);
> +  else
> +    {
> +      /* For __builtin_c[lt]z* force .C[LT]Z ifn, because only
> +	 the latter is well defined at zero.  */
> +      call = gimple_build_call_internal (ifn, 1, gimple_call_arg (call, 0));
> +      gimple_call_set_lhs (call, lhs);
> +      gsi_insert_before (&gsi, call, GSI_SAME_STMT);
> +      gsi_remove (&gsi_from, true);
> +    }
> +  reset_flow_sensitive_info (lhs);
>  
>    /* Now update the PHI and remove unneeded bbs.  */
>    replace_phi_edge_with_variable (cond_bb, e2, phi, lhs);
> --- gcc/testsuite/gcc.dg/tree-ssa/pr97503.c.jj	2020-10-20 18:14:46.862749687 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr97503.c	2020-10-20 18:17:13.618640295 +0200
> @@ -0,0 +1,19 @@
> +/* PR tree-optimization/97503 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-additional-options "-mbmi -mlzcnt" { target i?86-*-* x86_64-*-* } } */
> +/* { dg-final { scan-tree-dump-times "\.CLZ" 2 "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-* } && lp64 } } } } */
> +/* { dg-final { scan-tree-dump-not "__builtin_clz" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
> +/* { dg-final { scan-tree-dump-not "PHI <" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
> +
> +int
> +foo (int x)
> +{
> +  return x ? __builtin_clz (x) : 32;
> +}
> +
> +int
> +bar (unsigned long long x)
> +{
> +  return x ? __builtin_clzll (x) : 64;
> +}
> 
> 	Jakub
> 
>
Rainer Orth Oct. 21, 2020, 5:30 p.m. UTC | #2
Hi Jakub,

> While we have at the RTL level noce_try_ifelse_collapse combined with
> simplify_cond_clz_ctz, that optimization doesn't always trigger because
> e.g. on powerpc there is an define_insn to compare a reg against zero and
> copy that register to another one and so we end up with a different pseudo
> in the simplify_cond_clz_ctz test and punt.
>
> For targets that define C?Z_DEFINED_VALUE_AT_ZERO to 2 for certain modes,
> we can optimize it already in phiopt though, just need to ensure that
> we transform the __builtin_c?z* calls into .C?Z ifns because my recent
> VRP changes codified that the builtin calls are always undefined at zero,
> while ifns honor C?Z_DEFINED_VALUE_AT_ZERO equal to 2.
> And, in phiopt we already have popcount handling that does pretty much the
> same thing, except for always using a zero value rather than the one set
> by C?Z_DEFINED_VALUE_AT_ZERO.
>
> So, this patch extends that function to handle not just popcount, but also
> clz and ctz.

this broke sparc-sun-solaris2.11 bootstrap

/vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c: In function 'bool cond_removal_in_popcount_clz_ctz_pattern(basic_block, basic_block, edge, edge, gimple*, tree, tree)':
/vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c:1858:27: error: variable 'mode' set but not used [-Werror=unused-but-set-variable]
 1858 |           scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
      |                           ^~~~


and doubtlessly several other targets that use the defaults.h definition of

#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  0

	Rainer
Jakub Jelinek Oct. 21, 2020, 5:41 p.m. UTC | #3
On Wed, Oct 21, 2020 at 07:30:46PM +0200, Rainer Orth wrote:
> this broke sparc-sun-solaris2.11 bootstrap
> 
> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c: In function 'bool cond_removal_in_popcount_clz_ctz_pattern(basic_block, basic_block, edge, edge, gimple*, tree, tree)':
> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c:1858:27: error: variable 'mode' set but not used [-Werror=unused-but-set-variable]
>  1858 |           scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
>       |                           ^~~~
> 
> 
> and doubtlessly several other targets that use the defaults.h definition of
> 
> #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  0

Ugh, seems many of those macros do not evaluate the first argument.
This got broken by the change to direct_internal_fn_supported_p, previously
it used mode also in the optab test.

Anyway, I think this should fix it, I'll bootstrap/regtest it tonight:

2020-10-21  Jakub Jelinek  <jakub@redhat.com>

	* tree-ssa-phiopt.c (cond_removal_in_popcount_clz_ctz_pattern):
	For CLZ and CTZ tests, use type temporary instead of mode.

--- gcc/tree-ssa-phiopt.c.jj	2020-10-21 19:33:12.358042645 +0200
+++ gcc/tree-ssa-phiopt.c	2020-10-21 19:35:18.113213095 +0200
@@ -1842,10 +1842,10 @@ cond_removal_in_popcount_clz_ctz_pattern
     CASE_CFN_CLZ:
       if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
 	{
-	  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
-	  if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg),
-					      OPTIMIZE_FOR_BOTH)
-	      && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+	  tree type = TREE_TYPE (arg);
+	  if (direct_internal_fn_supported_p (IFN_CLZ, type, OPTIMIZE_FOR_BOTH)
+	      && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
+					    val) == 2)
 	    {
 	      ifn = IFN_CLZ;
 	      break;
@@ -1855,10 +1855,10 @@ cond_removal_in_popcount_clz_ctz_pattern
     CASE_CFN_CTZ:
       if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
 	{
-	  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
-	  if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg),
-					      OPTIMIZE_FOR_BOTH)
-	      && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+	  tree type = TREE_TYPE (arg);
+	  if (direct_internal_fn_supported_p (IFN_CTZ, type, OPTIMIZE_FOR_BOTH)
+	      && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
+					    val) == 2)
 	    {
 	      ifn = IFN_CTZ;
 	      break;


	Jakub
Rainer Orth Oct. 22, 2020, 10:45 a.m. UTC | #4
Hi Jakub,

> On Wed, Oct 21, 2020 at 07:30:46PM +0200, Rainer Orth wrote:
>> this broke sparc-sun-solaris2.11 bootstrap
>> 
>> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c: In function 'bool cond_removal_in_popcount_clz_ctz_pattern(basic_block, basic_block, edge, edge, gimple*, tree, tree)':
>> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c:1858:27: error: variable 'mode' set but not used [-Werror=unused-but-set-variable]
>>  1858 |           scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
>>       |                           ^~~~
>> 
>> 
>> and doubtlessly several other targets that use the defaults.h definition of
>> 
>> #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  0
>
> Ugh, seems many of those macros do not evaluate the first argument.
> This got broken by the change to direct_internal_fn_supported_p, previously
> it used mode also in the optab test.
>
> Anyway, I think this should fix it, I'll bootstrap/regtest it tonight:
>
> 2020-10-21  Jakub Jelinek  <jakub@redhat.com>
>
> 	* tree-ssa-phiopt.c (cond_removal_in_popcount_clz_ctz_pattern):
> 	For CLZ and CTZ tests, use type temporary instead of mode.

this worked for me just fine, both sparc-sun-solaris2.11 and (for good
measure) i386-pc-solaris2.11.

Thanks.
        Rainer
diff mbox series

Patch

--- gcc/tree-ssa-phiopt.c.jj	2020-07-28 15:39:10.075755306 +0200
+++ gcc/tree-ssa-phiopt.c	2020-10-20 17:46:16.971329154 +0200
@@ -61,8 +61,9 @@  static bool minmax_replacement (basic_bl
 				edge, edge, gimple *, tree, tree);
 static bool abs_replacement (basic_block, basic_block,
 			     edge, edge, gimple *, tree, tree);
-static bool cond_removal_in_popcount_pattern (basic_block, basic_block,
-					      edge, edge, gimple *, tree, tree);
+static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
+						      edge, edge, gimple *,
+						      tree, tree);
 static bool cond_store_replacement (basic_block, basic_block, edge, edge,
 				    hash_set<tree> *);
 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
@@ -344,8 +345,9 @@  tree_ssa_phiopt_worker (bool do_store_el
 	  else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
 	    cfgchanged = true;
 	  else if (!early_p
-		   && cond_removal_in_popcount_pattern (bb, bb1, e1, e2,
-							phi, arg0, arg1))
+		   && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
+								e2, phi, arg0,
+								arg1))
 	    cfgchanged = true;
 	  else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
 	    cfgchanged = true;
@@ -1777,16 +1779,20 @@  minmax_replacement (basic_block cond_bb,
 
    <bb 4>
    c_12 = PHI <_9(2)>
-*/
+
+   Similarly for __builtin_clz or __builtin_ctz if
+   C?Z_DEFINED_VALUE_AT_ZERO is 2, optab is present and
+   instead of 0 above it uses the value from that macro.  */
 
 static bool
-cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
-				  edge e1, edge e2,
-				  gimple *phi, tree arg0, tree arg1)
+cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
+					  basic_block middle_bb,
+					  edge e1, edge e2, gimple *phi,
+					  tree arg0, tree arg1)
 {
   gimple *cond;
   gimple_stmt_iterator gsi, gsi_from;
-  gimple *popcount;
+  gimple *call;
   gimple *cast = NULL;
   tree lhs, arg;
 
@@ -1804,35 +1810,65 @@  cond_removal_in_popcount_pattern (basic_
   gsi_next_nondebug (&gsi);
   if (!gsi_end_p (gsi))
     {
-      popcount = gsi_stmt (gsi);
+      call = gsi_stmt (gsi);
       gsi_next_nondebug (&gsi);
       if (!gsi_end_p (gsi))
 	return false;
     }
   else
     {
-      popcount = cast;
+      call = cast;
       cast = NULL;
     }
 
-  /* Check that we have a popcount builtin.  */
-  if (!is_gimple_call (popcount))
+  /* Check that we have a popcount/clz/ctz builtin.  */
+  if (!is_gimple_call (call) || gimple_call_num_args (call) != 1)
     return false;
-  combined_fn cfn = gimple_call_combined_fn (popcount);
+
+  arg = gimple_call_arg (call, 0);
+  lhs = gimple_get_lhs (call);
+
+  if (lhs == NULL_TREE)
+    return false;
+
+  combined_fn cfn = gimple_call_combined_fn (call);
+  internal_fn ifn = IFN_LAST;
+  int val = 0;
   switch (cfn)
     {
     CASE_CFN_POPCOUNT:
       break;
+    CASE_CFN_CLZ:
+      if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
+	{
+	  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
+	  if (optab_handler (clz_optab, mode) != CODE_FOR_nothing
+	      && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+	    {
+	      ifn = IFN_CLZ;
+	      break;
+	    }
+	}
+      return false;
+    CASE_CFN_CTZ:
+      if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
+	{
+	  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
+	  if (optab_handler (ctz_optab, mode) != CODE_FOR_nothing
+	      && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+	    {
+	      ifn = IFN_CTZ;
+	      break;
+	    }
+	}
+      return false;
     default:
       return false;
     }
 
-  arg = gimple_call_arg (popcount, 0);
-  lhs = gimple_get_lhs (popcount);
-
   if (cast)
     {
-      /* We have a cast stmt feeding popcount builtin.  */
+      /* We have a cast stmt feeding popcount/clz/ctz builtin.  */
       /* Check that we have a cast prior to that.  */
       if (gimple_code (cast) != GIMPLE_ASSIGN
 	  || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast)))
@@ -1845,7 +1881,7 @@  cond_removal_in_popcount_pattern (basic_
 
   cond = last_stmt (cond_bb);
 
-  /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount
+  /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount/clz/ctz
      builtin.  */
   if (gimple_code (cond) != GIMPLE_COND
       || (gimple_cond_code (cond) != NE_EXPR
@@ -1865,10 +1901,13 @@  cond_removal_in_popcount_pattern (basic_
     }
 
   /* Check PHI arguments.  */
-  if (lhs != arg0 || !integer_zerop (arg1))
+  if (lhs != arg0
+      || TREE_CODE (arg1) != INTEGER_CST
+      || wi::to_wide (arg1) != val)
     return false;
 
-  /* And insert the popcount builtin and cast stmt before the cond_bb.  */
+  /* And insert the popcount/clz/ctz builtin and cast stmt before the
+     cond_bb.  */
   gsi = gsi_last_bb (cond_bb);
   if (cast)
     {
@@ -1876,9 +1915,19 @@  cond_removal_in_popcount_pattern (basic_
       gsi_move_before (&gsi_from, &gsi);
       reset_flow_sensitive_info (gimple_get_lhs (cast));
     }
-  gsi_from = gsi_for_stmt (popcount);
-  gsi_move_before (&gsi_from, &gsi);
-  reset_flow_sensitive_info (gimple_get_lhs (popcount));
+  gsi_from = gsi_for_stmt (call);
+  if (ifn == IFN_LAST || gimple_call_internal_p (call))
+    gsi_move_before (&gsi_from, &gsi);
+  else
+    {
+      /* For __builtin_c[lt]z* force .C[LT]Z ifn, because only
+	 the latter is well defined at zero.  */
+      call = gimple_build_call_internal (ifn, 1, gimple_call_arg (call, 0));
+      gimple_call_set_lhs (call, lhs);
+      gsi_insert_before (&gsi, call, GSI_SAME_STMT);
+      gsi_remove (&gsi_from, true);
+    }
+  reset_flow_sensitive_info (lhs);
 
   /* Now update the PHI and remove unneeded bbs.  */
   replace_phi_edge_with_variable (cond_bb, e2, phi, lhs);
--- gcc/testsuite/gcc.dg/tree-ssa/pr97503.c.jj	2020-10-20 18:14:46.862749687 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr97503.c	2020-10-20 18:17:13.618640295 +0200
@@ -0,0 +1,19 @@ 
+/* PR tree-optimization/97503 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-additional-options "-mbmi -mlzcnt" { target i?86-*-* x86_64-*-* } } */
+/* { dg-final { scan-tree-dump-times "\.CLZ" 2 "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-* } && lp64 } } } } */
+/* { dg-final { scan-tree-dump-not "__builtin_clz" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
+
+int
+foo (int x)
+{
+  return x ? __builtin_clz (x) : 32;
+}
+
+int
+bar (unsigned long long x)
+{
+  return x ? __builtin_clzll (x) : 64;
+}