diff mbox series

phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589]

Message ID 20210518074202.GL1179226@tucnak
State New
Headers show
Series phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589] | expand

Commit Message

Jakub Jelinek May 18, 2021, 7:42 a.m. UTC
Hi!

As mentioned earlier, spaceship_replacement didn't optimize partial_ordering
>= 0 comparisons, because the possible values are -1, 0, 1, 2 and the
>= comparison is implemented as (res & 1) == res to choose the 0 and 1
cases from that.  As we optimize that only with -ffinite-math-only, the
2 case is assumed not to happen and my earlier match.pd change optimizes
(res & 1) == res into (res & ~1) == 0, so this patch pattern matches
that case and handles it like res >= 0.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-05-18  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/94589
	* tree-ssa-phiopt.c (spaceship_replacement): Pattern match
	phi result used in (res & ~1) == 0 comparison as res >= 0 as
	res == 2 would be UB with -ffinite-math-only.

	* g++.dg/opt/pr94589-2.C: Adjust scan-tree-dump count from 14 to 12.


	Jakub

Comments

Richard Biener May 18, 2021, 7:55 a.m. UTC | #1
On Tue, 18 May 2021, Jakub Jelinek wrote:

> Hi!
> 
> As mentioned earlier, spaceship_replacement didn't optimize partial_ordering
> >= 0 comparisons, because the possible values are -1, 0, 1, 2 and the
> >= comparison is implemented as (res & 1) == res to choose the 0 and 1
> cases from that.  As we optimize that only with -ffinite-math-only, the
> 2 case is assumed not to happen and my earlier match.pd change optimizes
> (res & 1) == res into (res & ~1) == 0, so this patch pattern matches
> that case and handles it like res >= 0.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2021-05-18  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/94589
> 	* tree-ssa-phiopt.c (spaceship_replacement): Pattern match
> 	phi result used in (res & ~1) == 0 comparison as res >= 0 as
> 	res == 2 would be UB with -ffinite-math-only.
> 
> 	* g++.dg/opt/pr94589-2.C: Adjust scan-tree-dump count from 14 to 12.
> 
> --- gcc/tree-ssa-phiopt.c.jj	2021-05-06 14:05:03.254763364 +0200
> +++ gcc/tree-ssa-phiopt.c	2021-05-17 14:51:55.648356364 +0200
> @@ -1887,8 +1887,9 @@ spaceship_replacement (basic_block cond_
>  		       edge e0, edge e1, gphi *phi,
>  		       tree arg0, tree arg1)
>  {
> -  if (!INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
> -      || TYPE_UNSIGNED (TREE_TYPE (PHI_RESULT (phi)))
> +  tree phires = PHI_RESULT (phi);
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (phires))
> +      || TYPE_UNSIGNED (TREE_TYPE (phires))
>        || !tree_fits_shwi_p (arg0)
>        || !tree_fits_shwi_p (arg1)
>        || !IN_RANGE (tree_to_shwi (arg0), -1, 2)
> @@ -1902,12 +1903,32 @@ spaceship_replacement (basic_block cond_
>  
>    use_operand_p use_p;
>    gimple *use_stmt;
> -  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
> +  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (phires))
>      return false;
> -  if (!single_imm_use (PHI_RESULT (phi), &use_p, &use_stmt))
> +  if (!single_imm_use (phires, &use_p, &use_stmt))
>      return false;
>    enum tree_code cmp;
>    tree lhs, rhs;
> +  gimple *orig_use_stmt = use_stmt;
> +  tree orig_use_lhs = NULL_TREE;
> +  int prec = TYPE_PRECISION (TREE_TYPE (phires));
> +  if (is_gimple_assign (use_stmt)
> +      && gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
> +      && TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST
> +      && (wi::to_wide (gimple_assign_rhs2 (use_stmt))
> +	  == wi::shifted_mask (1, prec - 1, false, prec)))
> +    {
> +      /* For partial_ordering result operator>= with unspec as second
> +	 argument is (res & 1) == res, folded by match.pd into
> +	 (res & ~1) == 0.  */
> +      orig_use_lhs = gimple_assign_lhs (use_stmt);
> +      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
> +	return false;
> +      if (EDGE_COUNT (phi_bb->preds) != 4)
> +	return false;
> +      if (!single_imm_use (orig_use_lhs, &use_p, &use_stmt))
> +	return false;
> +    }
>    if (gimple_code (use_stmt) == GIMPLE_COND)
>      {
>        cmp = gimple_cond_code (use_stmt);
> @@ -1948,10 +1969,19 @@ spaceship_replacement (basic_block cond_
>      default:
>        return false;
>      }
> -  if (lhs != PHI_RESULT (phi)
> +  if (lhs != (orig_use_lhs ? orig_use_lhs : phires)
>        || !tree_fits_shwi_p (rhs)
>        || !IN_RANGE (tree_to_shwi (rhs), -1, 1))
>      return false;
> +  if (orig_use_lhs)
> +    {
> +      if ((cmp != EQ_EXPR && cmp != NE_EXPR) || !integer_zerop (rhs))
> +	return false;
> +      /* As for -ffast-math we assume the 2 return to be
> +	 impossible, canonicalize (res & ~1) == 0 into
> +	 res >= 0 and (res & ~1) != 0 as res < 0.  */
> +      cmp = cmp == EQ_EXPR ? GE_EXPR : LT_EXPR;
> +    }
>  
>    if (!empty_block_p (middle_bb))
>      return false;
> @@ -2092,7 +2122,7 @@ spaceship_replacement (basic_block cond_
>  				? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) == 0)
>      return false;
>  
> -  /* lhs1 one_cmp rhs1 results in PHI_RESULT (phi) of 1.  */
> +  /* lhs1 one_cmp rhs1 results in phires of 1.  */
>    enum tree_code one_cmp;
>    if ((cmp1 == LT_EXPR)
>        ^ (!integer_onep ((e1->flags & EDGE_TRUE_VALUE) ? arg1 : arg0)))
> @@ -2185,13 +2215,29 @@ spaceship_replacement (basic_block cond_
>        use_operand_p use_p;
>        imm_use_iterator iter;
>        bool has_debug_uses = false;
> -      FOR_EACH_IMM_USE_FAST (use_p, iter, PHI_RESULT (phi))
> +      FOR_EACH_IMM_USE_FAST (use_p, iter, phires)
>  	{
>  	  gimple *use_stmt = USE_STMT (use_p);
> +	  if (orig_use_lhs && use_stmt == orig_use_stmt)
> +	    continue;
>  	  gcc_assert (is_gimple_debug (use_stmt));
>  	  has_debug_uses = true;
>  	  break;
>  	}
> +      if (orig_use_lhs)
> +	{
> +	  if (!has_debug_uses)
> +	    FOR_EACH_IMM_USE_FAST (use_p, iter, orig_use_lhs)
> +	      {
> +		gimple *use_stmt = USE_STMT (use_p);
> +		gcc_assert (is_gimple_debug (use_stmt));
> +		has_debug_uses = true;
> +	      }
> +	  gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
> +	  tree zero = build_zero_cst (TREE_TYPE (orig_use_lhs));
> +	  gimple_assign_set_rhs_with_ops (&gsi, INTEGER_CST, zero);
> +	  update_stmt (orig_use_stmt);
> +	}
>  
>        if (has_debug_uses)
>  	{
> @@ -2203,7 +2249,7 @@ spaceship_replacement (basic_block cond_
>  	     Ignore the value of 2, because if NaNs aren't expected,
>  	     all floating point numbers should be comparable.  */
>  	  gimple_stmt_iterator gsi = gsi_after_labels (gimple_bb (phi));
> -	  tree type = TREE_TYPE (PHI_RESULT (phi));
> +	  tree type = TREE_TYPE (phires);
>  	  tree temp1 = make_node (DEBUG_EXPR_DECL);
>  	  DECL_ARTIFICIAL (temp1) = 1;
>  	  TREE_TYPE (temp1) = type;
> @@ -2221,10 +2267,18 @@ spaceship_replacement (basic_block cond_
>  	  t = build3 (COND_EXPR, type, t, build_zero_cst (type), temp1);
>  	  g = gimple_build_debug_bind (temp2, t, phi);
>  	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> -	  replace_uses_by (PHI_RESULT (phi), temp2);
> +	  replace_uses_by (phires, temp2);
> +	  if (orig_use_lhs)
> +	    replace_uses_by (orig_use_lhs, temp2);
>  	}
>      }
>  
> +  if (orig_use_lhs)
> +    {
> +      gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
> +      gsi_remove (&gsi, true);
> +    }
> +
>    gimple_stmt_iterator psi = gsi_for_stmt (phi);
>    remove_phi_node (&psi, true);
>  
> --- gcc/testsuite/g++.dg/opt/pr94589-2.C.jj	2021-05-06 10:15:23.712751382 +0200
> +++ gcc/testsuite/g++.dg/opt/pr94589-2.C	2021-05-17 14:57:21.154800488 +0200
> @@ -1,8 +1,8 @@
>  // PR tree-optimization/94589
>  // { dg-do compile { target c++20 } }
>  // { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" }
> -// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } }
> -// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } }
> +// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 12 "optimized" } }
> +// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 12 "optimized" } }
>  
>  #include <compare>
>  
> 
> 	Jakub
> 
>
Christophe Lyon May 19, 2021, 8:15 a.m. UTC | #2
On Tue, 18 May 2021 at 09:42, Jakub Jelinek via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi!
>
> As mentioned earlier, spaceship_replacement didn't optimize partial_ordering
> >= 0 comparisons, because the possible values are -1, 0, 1, 2 and the
> >= comparison is implemented as (res & 1) == res to choose the 0 and 1
> cases from that.  As we optimize that only with -ffinite-math-only, the
> 2 case is assumed not to happen and my earlier match.pd change optimizes
> (res & 1) == res into (res & ~1) == 0, so this patch pattern matches
> that case and handles it like res >= 0.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2021-05-18  Jakub Jelinek  <jakub@redhat.com>
>
>         PR tree-optimization/94589
>         * tree-ssa-phiopt.c (spaceship_replacement): Pattern match
>         phi result used in (res & ~1) == 0 comparison as res >= 0 as
>         res == 2 would be UB with -ffinite-math-only.
>
>         * g++.dg/opt/pr94589-2.C: Adjust scan-tree-dump count from 14 to 12.
>
> --- gcc/tree-ssa-phiopt.c.jj    2021-05-06 14:05:03.254763364 +0200
> +++ gcc/tree-ssa-phiopt.c       2021-05-17 14:51:55.648356364 +0200
> @@ -1887,8 +1887,9 @@ spaceship_replacement (basic_block cond_
>                        edge e0, edge e1, gphi *phi,
>                        tree arg0, tree arg1)
>  {
> -  if (!INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
> -      || TYPE_UNSIGNED (TREE_TYPE (PHI_RESULT (phi)))
> +  tree phires = PHI_RESULT (phi);
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (phires))
> +      || TYPE_UNSIGNED (TREE_TYPE (phires))
>        || !tree_fits_shwi_p (arg0)
>        || !tree_fits_shwi_p (arg1)
>        || !IN_RANGE (tree_to_shwi (arg0), -1, 2)
> @@ -1902,12 +1903,32 @@ spaceship_replacement (basic_block cond_
>
>    use_operand_p use_p;
>    gimple *use_stmt;
> -  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
> +  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (phires))
>      return false;
> -  if (!single_imm_use (PHI_RESULT (phi), &use_p, &use_stmt))
> +  if (!single_imm_use (phires, &use_p, &use_stmt))
>      return false;
>    enum tree_code cmp;
>    tree lhs, rhs;
> +  gimple *orig_use_stmt = use_stmt;
> +  tree orig_use_lhs = NULL_TREE;
> +  int prec = TYPE_PRECISION (TREE_TYPE (phires));
> +  if (is_gimple_assign (use_stmt)
> +      && gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
> +      && TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST
> +      && (wi::to_wide (gimple_assign_rhs2 (use_stmt))
> +         == wi::shifted_mask (1, prec - 1, false, prec)))
> +    {
> +      /* For partial_ordering result operator>= with unspec as second
> +        argument is (res & 1) == res, folded by match.pd into
> +        (res & ~1) == 0.  */
> +      orig_use_lhs = gimple_assign_lhs (use_stmt);
> +      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
> +       return false;
> +      if (EDGE_COUNT (phi_bb->preds) != 4)
> +       return false;
> +      if (!single_imm_use (orig_use_lhs, &use_p, &use_stmt))
> +       return false;
> +    }
>    if (gimple_code (use_stmt) == GIMPLE_COND)
>      {
>        cmp = gimple_cond_code (use_stmt);
> @@ -1948,10 +1969,19 @@ spaceship_replacement (basic_block cond_
>      default:
>        return false;
>      }
> -  if (lhs != PHI_RESULT (phi)
> +  if (lhs != (orig_use_lhs ? orig_use_lhs : phires)
>        || !tree_fits_shwi_p (rhs)
>        || !IN_RANGE (tree_to_shwi (rhs), -1, 1))
>      return false;
> +  if (orig_use_lhs)
> +    {
> +      if ((cmp != EQ_EXPR && cmp != NE_EXPR) || !integer_zerop (rhs))
> +       return false;
> +      /* As for -ffast-math we assume the 2 return to be
> +        impossible, canonicalize (res & ~1) == 0 into
> +        res >= 0 and (res & ~1) != 0 as res < 0.  */
> +      cmp = cmp == EQ_EXPR ? GE_EXPR : LT_EXPR;
> +    }
>
>    if (!empty_block_p (middle_bb))
>      return false;
> @@ -2092,7 +2122,7 @@ spaceship_replacement (basic_block cond_
>                                 ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) == 0)
>      return false;
>
> -  /* lhs1 one_cmp rhs1 results in PHI_RESULT (phi) of 1.  */
> +  /* lhs1 one_cmp rhs1 results in phires of 1.  */
>    enum tree_code one_cmp;
>    if ((cmp1 == LT_EXPR)
>        ^ (!integer_onep ((e1->flags & EDGE_TRUE_VALUE) ? arg1 : arg0)))
> @@ -2185,13 +2215,29 @@ spaceship_replacement (basic_block cond_
>        use_operand_p use_p;
>        imm_use_iterator iter;
>        bool has_debug_uses = false;
> -      FOR_EACH_IMM_USE_FAST (use_p, iter, PHI_RESULT (phi))
> +      FOR_EACH_IMM_USE_FAST (use_p, iter, phires)
>         {
>           gimple *use_stmt = USE_STMT (use_p);
> +         if (orig_use_lhs && use_stmt == orig_use_stmt)
> +           continue;
>           gcc_assert (is_gimple_debug (use_stmt));
>           has_debug_uses = true;
>           break;
>         }
> +      if (orig_use_lhs)
> +       {
> +         if (!has_debug_uses)
> +           FOR_EACH_IMM_USE_FAST (use_p, iter, orig_use_lhs)
> +             {
> +               gimple *use_stmt = USE_STMT (use_p);
> +               gcc_assert (is_gimple_debug (use_stmt));
> +               has_debug_uses = true;
> +             }
> +         gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
> +         tree zero = build_zero_cst (TREE_TYPE (orig_use_lhs));
> +         gimple_assign_set_rhs_with_ops (&gsi, INTEGER_CST, zero);
> +         update_stmt (orig_use_stmt);
> +       }
>
>        if (has_debug_uses)
>         {
> @@ -2203,7 +2249,7 @@ spaceship_replacement (basic_block cond_
>              Ignore the value of 2, because if NaNs aren't expected,
>              all floating point numbers should be comparable.  */
>           gimple_stmt_iterator gsi = gsi_after_labels (gimple_bb (phi));
> -         tree type = TREE_TYPE (PHI_RESULT (phi));
> +         tree type = TREE_TYPE (phires);
>           tree temp1 = make_node (DEBUG_EXPR_DECL);
>           DECL_ARTIFICIAL (temp1) = 1;
>           TREE_TYPE (temp1) = type;
> @@ -2221,10 +2267,18 @@ spaceship_replacement (basic_block cond_
>           t = build3 (COND_EXPR, type, t, build_zero_cst (type), temp1);
>           g = gimple_build_debug_bind (temp2, t, phi);
>           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> -         replace_uses_by (PHI_RESULT (phi), temp2);
> +         replace_uses_by (phires, temp2);
> +         if (orig_use_lhs)
> +           replace_uses_by (orig_use_lhs, temp2);
>         }
>      }
>
> +  if (orig_use_lhs)
> +    {
> +      gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
> +      gsi_remove (&gsi, true);
> +    }
> +
>    gimple_stmt_iterator psi = gsi_for_stmt (phi);
>    remove_phi_node (&psi, true);
>
> --- gcc/testsuite/g++.dg/opt/pr94589-2.C.jj     2021-05-06 10:15:23.712751382 +0200
> +++ gcc/testsuite/g++.dg/opt/pr94589-2.C        2021-05-17 14:57:21.154800488 +0200
> @@ -1,8 +1,8 @@
>  // PR tree-optimization/94589
>  // { dg-do compile { target c++20 } }
>  // { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" }
> -// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } }
> -// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } }
> +// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 12 "optimized" } }
> +// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 12 "optimized" } }
>

Hi Jakub,

After this update, the test fails on arm and aarch64: according to the
logs, the optimization is still performed 14 times.

Can you check?

Thanks

Christophe

>  #include <compare>
>
>
>         Jakub
>
Jakub Jelinek May 19, 2021, 9:09 a.m. UTC | #3
On Wed, May 19, 2021 at 10:15:53AM +0200, Christophe Lyon via Gcc-patches wrote:
> After this update, the test fails on arm and aarch64: according to the
> logs, the optimization is still performed 14 times.

Seems this is because
              if (change
                  && !flag_syntax_only
                  && (load_extend_op (TYPE_MODE (TREE_TYPE (and0)))
                      == ZERO_EXTEND))
                {
                  tree uns = unsigned_type_for (TREE_TYPE (and0));
                  and0 = fold_convert_loc (loc, uns, and0);
                  and1 = fold_convert_loc (loc, uns, and1);
                }
in fold-const.c adds on these targets extra casts that prevent the
optimizations, instead of
      _1 = __v._M_value;
      _2 = (int) _1;
      _3 = _2 & 1;
      _4 = __v._M_value;
      _5 = (int) _4;
      D.8503 = _3 == _5;
on x86_64 it is:
      _1 = __v._M_value;
      _2 = (unsigned char) _1;
      _3 = (int) _2;
      _4 = _3 & 1;
      _5 = __v._M_value;
      _6 = (int) _5;
      D.10471 = _4 == _6;
Before fre1 it is
  __v$_M_value_9 = __v._M_value;
  _1 = __v$_M_value_9;
  _2 = (int) _1;
  _8 = _1 & 1;
  _3 = (int) _8;
  _4 = __v$_M_value_9;
  _5 = (int) _4;
  _7 = _4 == _8;
which fre1 using match.pd optimizes into
  __v$_M_value_9 = __v._M_value;
  _10 = __v$_M_value_9 & -2;
  _7 = _10 == 0;
but on aarch64 before fre1 we have:
  __v$_M_value_10 = __v._M_value;
  _1 = __v$_M_value_10;
  _2 = (unsigned char) _1;
  _3 = (int) _2;
  _9 = _2 & 1;
  _4 = (int) _9;
  _5 = __v$_M_value_10;
  _6 = (int) _5;
  _8 = _4 == _6;
which is not optimized.

	Jakub
diff mbox series

Patch

--- gcc/tree-ssa-phiopt.c.jj	2021-05-06 14:05:03.254763364 +0200
+++ gcc/tree-ssa-phiopt.c	2021-05-17 14:51:55.648356364 +0200
@@ -1887,8 +1887,9 @@  spaceship_replacement (basic_block cond_
 		       edge e0, edge e1, gphi *phi,
 		       tree arg0, tree arg1)
 {
-  if (!INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
-      || TYPE_UNSIGNED (TREE_TYPE (PHI_RESULT (phi)))
+  tree phires = PHI_RESULT (phi);
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (phires))
+      || TYPE_UNSIGNED (TREE_TYPE (phires))
       || !tree_fits_shwi_p (arg0)
       || !tree_fits_shwi_p (arg1)
       || !IN_RANGE (tree_to_shwi (arg0), -1, 2)
@@ -1902,12 +1903,32 @@  spaceship_replacement (basic_block cond_
 
   use_operand_p use_p;
   gimple *use_stmt;
-  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
+  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (phires))
     return false;
-  if (!single_imm_use (PHI_RESULT (phi), &use_p, &use_stmt))
+  if (!single_imm_use (phires, &use_p, &use_stmt))
     return false;
   enum tree_code cmp;
   tree lhs, rhs;
+  gimple *orig_use_stmt = use_stmt;
+  tree orig_use_lhs = NULL_TREE;
+  int prec = TYPE_PRECISION (TREE_TYPE (phires));
+  if (is_gimple_assign (use_stmt)
+      && gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
+      && TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST
+      && (wi::to_wide (gimple_assign_rhs2 (use_stmt))
+	  == wi::shifted_mask (1, prec - 1, false, prec)))
+    {
+      /* For partial_ordering result operator>= with unspec as second
+	 argument is (res & 1) == res, folded by match.pd into
+	 (res & ~1) == 0.  */
+      orig_use_lhs = gimple_assign_lhs (use_stmt);
+      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
+	return false;
+      if (EDGE_COUNT (phi_bb->preds) != 4)
+	return false;
+      if (!single_imm_use (orig_use_lhs, &use_p, &use_stmt))
+	return false;
+    }
   if (gimple_code (use_stmt) == GIMPLE_COND)
     {
       cmp = gimple_cond_code (use_stmt);
@@ -1948,10 +1969,19 @@  spaceship_replacement (basic_block cond_
     default:
       return false;
     }
-  if (lhs != PHI_RESULT (phi)
+  if (lhs != (orig_use_lhs ? orig_use_lhs : phires)
       || !tree_fits_shwi_p (rhs)
       || !IN_RANGE (tree_to_shwi (rhs), -1, 1))
     return false;
+  if (orig_use_lhs)
+    {
+      if ((cmp != EQ_EXPR && cmp != NE_EXPR) || !integer_zerop (rhs))
+	return false;
+      /* As for -ffast-math we assume the 2 return to be
+	 impossible, canonicalize (res & ~1) == 0 into
+	 res >= 0 and (res & ~1) != 0 as res < 0.  */
+      cmp = cmp == EQ_EXPR ? GE_EXPR : LT_EXPR;
+    }
 
   if (!empty_block_p (middle_bb))
     return false;
@@ -2092,7 +2122,7 @@  spaceship_replacement (basic_block cond_
 				? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) == 0)
     return false;
 
-  /* lhs1 one_cmp rhs1 results in PHI_RESULT (phi) of 1.  */
+  /* lhs1 one_cmp rhs1 results in phires of 1.  */
   enum tree_code one_cmp;
   if ((cmp1 == LT_EXPR)
       ^ (!integer_onep ((e1->flags & EDGE_TRUE_VALUE) ? arg1 : arg0)))
@@ -2185,13 +2215,29 @@  spaceship_replacement (basic_block cond_
       use_operand_p use_p;
       imm_use_iterator iter;
       bool has_debug_uses = false;
-      FOR_EACH_IMM_USE_FAST (use_p, iter, PHI_RESULT (phi))
+      FOR_EACH_IMM_USE_FAST (use_p, iter, phires)
 	{
 	  gimple *use_stmt = USE_STMT (use_p);
+	  if (orig_use_lhs && use_stmt == orig_use_stmt)
+	    continue;
 	  gcc_assert (is_gimple_debug (use_stmt));
 	  has_debug_uses = true;
 	  break;
 	}
+      if (orig_use_lhs)
+	{
+	  if (!has_debug_uses)
+	    FOR_EACH_IMM_USE_FAST (use_p, iter, orig_use_lhs)
+	      {
+		gimple *use_stmt = USE_STMT (use_p);
+		gcc_assert (is_gimple_debug (use_stmt));
+		has_debug_uses = true;
+	      }
+	  gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
+	  tree zero = build_zero_cst (TREE_TYPE (orig_use_lhs));
+	  gimple_assign_set_rhs_with_ops (&gsi, INTEGER_CST, zero);
+	  update_stmt (orig_use_stmt);
+	}
 
       if (has_debug_uses)
 	{
@@ -2203,7 +2249,7 @@  spaceship_replacement (basic_block cond_
 	     Ignore the value of 2, because if NaNs aren't expected,
 	     all floating point numbers should be comparable.  */
 	  gimple_stmt_iterator gsi = gsi_after_labels (gimple_bb (phi));
-	  tree type = TREE_TYPE (PHI_RESULT (phi));
+	  tree type = TREE_TYPE (phires);
 	  tree temp1 = make_node (DEBUG_EXPR_DECL);
 	  DECL_ARTIFICIAL (temp1) = 1;
 	  TREE_TYPE (temp1) = type;
@@ -2221,10 +2267,18 @@  spaceship_replacement (basic_block cond_
 	  t = build3 (COND_EXPR, type, t, build_zero_cst (type), temp1);
 	  g = gimple_build_debug_bind (temp2, t, phi);
 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
-	  replace_uses_by (PHI_RESULT (phi), temp2);
+	  replace_uses_by (phires, temp2);
+	  if (orig_use_lhs)
+	    replace_uses_by (orig_use_lhs, temp2);
 	}
     }
 
+  if (orig_use_lhs)
+    {
+      gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
+      gsi_remove (&gsi, true);
+    }
+
   gimple_stmt_iterator psi = gsi_for_stmt (phi);
   remove_phi_node (&psi, true);
 
--- gcc/testsuite/g++.dg/opt/pr94589-2.C.jj	2021-05-06 10:15:23.712751382 +0200
+++ gcc/testsuite/g++.dg/opt/pr94589-2.C	2021-05-17 14:57:21.154800488 +0200
@@ -1,8 +1,8 @@ 
 // PR tree-optimization/94589
 // { dg-do compile { target c++20 } }
 // { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" }
-// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } }
-// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } }
+// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 12 "optimized" } }
+// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 12 "optimized" } }
 
 #include <compare>