diff mbox series

[v5,1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

Message ID 20240515021407.1287623-1-pan2.li@intel.com
State New
Headers show
Series [v5,1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int | expand

Commit Message

Li, Pan2 May 15, 2024, 2:14 a.m. UTC
From: Pan Li <pan2.li@intel.com>

This patch would like to add the middle-end presentation for the
saturation add.  Aka set the result of add to the max when overflow.
It will take the pattern similar as below.

SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))

Take uint8_t as example, we will have:

* SAT_ADD (1, 254)   => 255.
* SAT_ADD (1, 255)   => 255.
* SAT_ADD (2, 255)   => 255.
* SAT_ADD (255, 255) => 255.

Given below example for the unsigned scalar integer uint64_t:

uint64_t sat_add_u64 (uint64_t x, uint64_t y)
{
  return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
}

Before this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  _Bool _2;
  long unsigned int _3;
  long unsigned int _4;
  uint64_t _7;
  long unsigned int _10;
  __complex__ long unsigned int _11;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
  _1 = REALPART_EXPR <_11>;
  _10 = IMAGPART_EXPR <_11>;
  _2 = _10 != 0;
  _3 = (long unsigned int) _2;
  _4 = -_3;
  _7 = _1 | _4;
  return _7;
;;    succ:       EXIT

}

After this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _7;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  return _7;
;;    succ:       EXIT
}

The below tests are passed for this patch:
1. The riscv fully regression tests.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

	PR target/51492
	PR target/112600

gcc/ChangeLog:

	* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
	to the return true switch case(s).
	* internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
	* match.pd: Add unsigned SAT_ADD match(es).
	* optabs.def (OPTAB_NL): Remove fixed-point limitation for
	us/ssadd.
	* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
	extern func decl generated in match.pd match.
	(match_saturation_arith): New func impl to match the saturation arith.
	(math_opts_dom_walker::after_dom_children): Try match saturation
	arith when IOR expr.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/internal-fn.cc        |  1 +
 gcc/internal-fn.def       |  2 ++
 gcc/match.pd              | 51 +++++++++++++++++++++++++++++++++++++++
 gcc/optabs.def            |  4 +--
 gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++
 5 files changed, 88 insertions(+), 2 deletions(-)

Comments

Tamar Christina May 15, 2024, 9:12 a.m. UTC | #1
Hi Pan,

Thanks!

> -----Original Message-----
> From: pan2.li@intel.com <pan2.li@intel.com>
> Sent: Wednesday, May 15, 2024 3:14 AM
> To: gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina
> <Tamar.Christina@arm.com>; richard.guenther@gmail.com;
> hongtao.liu@intel.com; Pan Li <pan2.li@intel.com>
> Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar
> int
> 
> From: Pan Li <pan2.li@intel.com>
> 
> This patch would like to add the middle-end presentation for the
> saturation add.  Aka set the result of add to the max when overflow.
> It will take the pattern similar as below.
> 
> SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
> 
> Take uint8_t as example, we will have:
> 
> * SAT_ADD (1, 254)   => 255.
> * SAT_ADD (1, 255)   => 255.
> * SAT_ADD (2, 255)   => 255.
> * SAT_ADD (255, 255) => 255.
> 
> Given below example for the unsigned scalar integer uint64_t:
> 
> uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> {
>   return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> }
> 
> Before this patch:
> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> {
>   long unsigned int _1;
>   _Bool _2;
>   long unsigned int _3;
>   long unsigned int _4;
>   uint64_t _7;
>   long unsigned int _10;
>   __complex__ long unsigned int _11;
> 
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
>   _1 = REALPART_EXPR <_11>;
>   _10 = IMAGPART_EXPR <_11>;
>   _2 = _10 != 0;
>   _3 = (long unsigned int) _2;
>   _4 = -_3;
>   _7 = _1 | _4;
>   return _7;
> ;;    succ:       EXIT
> 
> }
> 
> After this patch:
> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> {
>   uint64_t _7;
> 
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
>   return _7;
> ;;    succ:       EXIT
> }
> 
> The below tests are passed for this patch:
> 1. The riscv fully regression tests.
> 3. The x86 bootstrap tests.
> 4. The x86 fully regression tests.
> 
> 	PR target/51492
> 	PR target/112600
> 
> gcc/ChangeLog:
> 
> 	* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
> 	to the return true switch case(s).
> 	* internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
> 	* match.pd: Add unsigned SAT_ADD match(es).
> 	* optabs.def (OPTAB_NL): Remove fixed-point limitation for
> 	us/ssadd.
> 	* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
> 	extern func decl generated in match.pd match.
> 	(match_saturation_arith): New func impl to match the saturation arith.
> 	(math_opts_dom_walker::after_dom_children): Try match saturation
> 	arith when IOR expr.
> 

 LGTM but you'll need an OK from Richard,

Thanks for working on this!

Tamar

> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/internal-fn.cc        |  1 +
>  gcc/internal-fn.def       |  2 ++
>  gcc/match.pd              | 51 +++++++++++++++++++++++++++++++++++++++
>  gcc/optabs.def            |  4 +--
>  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++
>  5 files changed, 88 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 0a7053c2286..73045ca8c8c 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
>      case IFN_UBSAN_CHECK_MUL:
>      case IFN_ADD_OVERFLOW:
>      case IFN_MUL_OVERFLOW:
> +    case IFN_SAT_ADD:
>      case IFN_VEC_WIDEN_PLUS:
>      case IFN_VEC_WIDEN_PLUS_LO:
>      case IFN_VEC_WIDEN_PLUS_HI:
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 848bb9dbff3..25badbb86e5 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST
> | ECF_NOTHROW, first,
>  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW,
> first,
>  			      smulhrs, umulhrs, binary)
> 
> +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd,
> binary)
> +
>  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
>  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
>  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 07e743ae464..0f9c34fa897 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>         || POINTER_TYPE_P (itype))
>        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
> 
> +/* Unsigned Saturation Add */
> +(match (usadd_left_part_1 @0 @1)
> + (plus:c @0 @1)
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_left_part_2 @0 @1)
> + (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_right_part_1 @0 @1)
> + (negate (convert (lt (plus:c @0 @1) @0)))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_right_part_1 @0 @1)
> + (negate (convert (gt @0 (plus:c @0 @1))))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_right_part_2 @0 @1)
> + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1))
> integer_zerop)))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> +   because the sub part of left_part_2 cannot work with right_part_1.
> +   For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> +   right_part_1 has nothing to do with .ADD_OVERFLOW.  */
> +
> +/* Unsigned saturation add, case 1 (branchless):
> +   SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
> +   SAT_U_ADD = (X + Y) | - (X > (X + Y)).  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
> +
> +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index ad14f9328b9..3f2cb46aff8 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3")
>  OPTAB_NX(add_optab, "add$Q$a3")
>  OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
>  OPTAB_VX(addv_optab, "add$F$a3")
> -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3',
> gen_signed_fixed_libfunc)
> -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3',
> gen_unsigned_fixed_libfunc)
> +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3',
> gen_signed_fixed_libfunc)
> +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3',
> gen_unsigned_fixed_libfunc)
>  OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc)
>  OPTAB_NX(sub_optab, "sub$F$a3")
>  OPTAB_NX(sub_optab, "sub$Q$a3")
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index e8c804f09b7..62da1c5ee08 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple
> *cast_stmt, gimple *&use_stmt,
>    return 0;
>  }
> 
> +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> +
> +/*
> + * Try to match saturation arith pattern(s).
> + *   1. SAT_ADD (unsigned)
> + *      _7 = _4 + _6;
> + *      _8 = _4 > _7;
> + *      _9 = (long unsigned int) _8;
> + *      _10 = -_9;
> + *      _12 = _7 | _10;
> + *      =>
> + *      _12 = .SAT_ADD (_4, _6);  */
> +static void
> +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> +{
> +  gcall *call = NULL;
> +
> +  tree ops[2];
> +  tree lhs = gimple_assign_lhs (stmt);
> +
> +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> +					 OPTIMIZE_FOR_BOTH))
> +    {
> +      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> +      gimple_call_set_lhs (call, lhs);
> +      gsi_replace (gsi, call, true);
> +    }
> +}
> +
>  /* Recognize for unsigned x
>     x = y - z;
>     if (x > y)
> @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children
> (basic_block bb)
>  	      break;
> 
>  	    case BIT_IOR_EXPR:
> +	      match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> +	      /* fall-through  */
>  	    case BIT_XOR_EXPR:
>  	      match_uaddc_usubc (&gsi, stmt, code);
>  	      break;
> --
> 2.34.1
Li, Pan2 May 15, 2024, 11:36 a.m. UTC | #2
> LGTM but you'll need an OK from Richard,
> Thanks for working on this!

Thanks Tamar for help and coaching, let's wait Richard for a while,😊!

Pan

-----Original Message-----
From: Tamar Christina <Tamar.Christina@arm.com> 
Sent: Wednesday, May 15, 2024 5:12 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

Hi Pan,

Thanks!

> -----Original Message-----
> From: pan2.li@intel.com <pan2.li@intel.com>
> Sent: Wednesday, May 15, 2024 3:14 AM
> To: gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina
> <Tamar.Christina@arm.com>; richard.guenther@gmail.com;
> hongtao.liu@intel.com; Pan Li <pan2.li@intel.com>
> Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar
> int
> 
> From: Pan Li <pan2.li@intel.com>
> 
> This patch would like to add the middle-end presentation for the
> saturation add.  Aka set the result of add to the max when overflow.
> It will take the pattern similar as below.
> 
> SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
> 
> Take uint8_t as example, we will have:
> 
> * SAT_ADD (1, 254)   => 255.
> * SAT_ADD (1, 255)   => 255.
> * SAT_ADD (2, 255)   => 255.
> * SAT_ADD (255, 255) => 255.
> 
> Given below example for the unsigned scalar integer uint64_t:
> 
> uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> {
>   return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> }
> 
> Before this patch:
> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> {
>   long unsigned int _1;
>   _Bool _2;
>   long unsigned int _3;
>   long unsigned int _4;
>   uint64_t _7;
>   long unsigned int _10;
>   __complex__ long unsigned int _11;
> 
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
>   _1 = REALPART_EXPR <_11>;
>   _10 = IMAGPART_EXPR <_11>;
>   _2 = _10 != 0;
>   _3 = (long unsigned int) _2;
>   _4 = -_3;
>   _7 = _1 | _4;
>   return _7;
> ;;    succ:       EXIT
> 
> }
> 
> After this patch:
> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> {
>   uint64_t _7;
> 
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
>   return _7;
> ;;    succ:       EXIT
> }
> 
> The below tests are passed for this patch:
> 1. The riscv fully regression tests.
> 3. The x86 bootstrap tests.
> 4. The x86 fully regression tests.
> 
> 	PR target/51492
> 	PR target/112600
> 
> gcc/ChangeLog:
> 
> 	* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
> 	to the return true switch case(s).
> 	* internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
> 	* match.pd: Add unsigned SAT_ADD match(es).
> 	* optabs.def (OPTAB_NL): Remove fixed-point limitation for
> 	us/ssadd.
> 	* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
> 	extern func decl generated in match.pd match.
> 	(match_saturation_arith): New func impl to match the saturation arith.
> 	(math_opts_dom_walker::after_dom_children): Try match saturation
> 	arith when IOR expr.
> 

 LGTM but you'll need an OK from Richard,

Thanks for working on this!

Tamar

> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/internal-fn.cc        |  1 +
>  gcc/internal-fn.def       |  2 ++
>  gcc/match.pd              | 51 +++++++++++++++++++++++++++++++++++++++
>  gcc/optabs.def            |  4 +--
>  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++
>  5 files changed, 88 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 0a7053c2286..73045ca8c8c 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
>      case IFN_UBSAN_CHECK_MUL:
>      case IFN_ADD_OVERFLOW:
>      case IFN_MUL_OVERFLOW:
> +    case IFN_SAT_ADD:
>      case IFN_VEC_WIDEN_PLUS:
>      case IFN_VEC_WIDEN_PLUS_LO:
>      case IFN_VEC_WIDEN_PLUS_HI:
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 848bb9dbff3..25badbb86e5 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST
> | ECF_NOTHROW, first,
>  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW,
> first,
>  			      smulhrs, umulhrs, binary)
> 
> +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd,
> binary)
> +
>  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
>  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
>  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 07e743ae464..0f9c34fa897 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>         || POINTER_TYPE_P (itype))
>        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
> 
> +/* Unsigned Saturation Add */
> +(match (usadd_left_part_1 @0 @1)
> + (plus:c @0 @1)
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_left_part_2 @0 @1)
> + (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_right_part_1 @0 @1)
> + (negate (convert (lt (plus:c @0 @1) @0)))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_right_part_1 @0 @1)
> + (negate (convert (gt @0 (plus:c @0 @1))))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_right_part_2 @0 @1)
> + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1))
> integer_zerop)))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> +   because the sub part of left_part_2 cannot work with right_part_1.
> +   For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> +   right_part_1 has nothing to do with .ADD_OVERFLOW.  */
> +
> +/* Unsigned saturation add, case 1 (branchless):
> +   SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
> +   SAT_U_ADD = (X + Y) | - (X > (X + Y)).  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
> +
> +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index ad14f9328b9..3f2cb46aff8 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3")
>  OPTAB_NX(add_optab, "add$Q$a3")
>  OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
>  OPTAB_VX(addv_optab, "add$F$a3")
> -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3',
> gen_signed_fixed_libfunc)
> -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3',
> gen_unsigned_fixed_libfunc)
> +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3',
> gen_signed_fixed_libfunc)
> +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3',
> gen_unsigned_fixed_libfunc)
>  OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc)
>  OPTAB_NX(sub_optab, "sub$F$a3")
>  OPTAB_NX(sub_optab, "sub$Q$a3")
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index e8c804f09b7..62da1c5ee08 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple
> *cast_stmt, gimple *&use_stmt,
>    return 0;
>  }
> 
> +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> +
> +/*
> + * Try to match saturation arith pattern(s).
> + *   1. SAT_ADD (unsigned)
> + *      _7 = _4 + _6;
> + *      _8 = _4 > _7;
> + *      _9 = (long unsigned int) _8;
> + *      _10 = -_9;
> + *      _12 = _7 | _10;
> + *      =>
> + *      _12 = .SAT_ADD (_4, _6);  */
> +static void
> +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> +{
> +  gcall *call = NULL;
> +
> +  tree ops[2];
> +  tree lhs = gimple_assign_lhs (stmt);
> +
> +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> +					 OPTIMIZE_FOR_BOTH))
> +    {
> +      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> +      gimple_call_set_lhs (call, lhs);
> +      gsi_replace (gsi, call, true);
> +    }
> +}
> +
>  /* Recognize for unsigned x
>     x = y - z;
>     if (x > y)
> @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children
> (basic_block bb)
>  	      break;
> 
>  	    case BIT_IOR_EXPR:
> +	      match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> +	      /* fall-through  */
>  	    case BIT_XOR_EXPR:
>  	      match_uaddc_usubc (&gsi, stmt, code);
>  	      break;
> --
> 2.34.1
Richard Biener May 16, 2024, 8:10 a.m. UTC | #3
On Wed, May 15, 2024 at 1:36 PM Li, Pan2 <pan2.li@intel.com> wrote:
>
> > LGTM but you'll need an OK from Richard,
> > Thanks for working on this!
>
> Thanks Tamar for help and coaching, let's wait Richard for a while,😊!

OK.

Thanks for the patience,
Richard.

> Pan
>
> -----Original Message-----
> From: Tamar Christina <Tamar.Christina@arm.com>
> Sent: Wednesday, May 15, 2024 5:12 PM
> To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
> Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int
>
> Hi Pan,
>
> Thanks!
>
> > -----Original Message-----
> > From: pan2.li@intel.com <pan2.li@intel.com>
> > Sent: Wednesday, May 15, 2024 3:14 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina
> > <Tamar.Christina@arm.com>; richard.guenther@gmail.com;
> > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com>
> > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar
> > int
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > This patch would like to add the middle-end presentation for the
> > saturation add.  Aka set the result of add to the max when overflow.
> > It will take the pattern similar as below.
> >
> > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
> >
> > Take uint8_t as example, we will have:
> >
> > * SAT_ADD (1, 254)   => 255.
> > * SAT_ADD (1, 255)   => 255.
> > * SAT_ADD (2, 255)   => 255.
> > * SAT_ADD (255, 255) => 255.
> >
> > Given below example for the unsigned scalar integer uint64_t:
> >
> > uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> > {
> >   return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> > }
> >
> > Before this patch:
> > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> > {
> >   long unsigned int _1;
> >   _Bool _2;
> >   long unsigned int _3;
> >   long unsigned int _4;
> >   uint64_t _7;
> >   long unsigned int _10;
> >   __complex__ long unsigned int _11;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
> >   _1 = REALPART_EXPR <_11>;
> >   _10 = IMAGPART_EXPR <_11>;
> >   _2 = _10 != 0;
> >   _3 = (long unsigned int) _2;
> >   _4 = -_3;
> >   _7 = _1 | _4;
> >   return _7;
> > ;;    succ:       EXIT
> >
> > }
> >
> > After this patch:
> > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> > {
> >   uint64_t _7;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> >   return _7;
> > ;;    succ:       EXIT
> > }
> >
> > The below tests are passed for this patch:
> > 1. The riscv fully regression tests.
> > 3. The x86 bootstrap tests.
> > 4. The x86 fully regression tests.
> >
> >       PR target/51492
> >       PR target/112600
> >
> > gcc/ChangeLog:
> >
> >       * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
> >       to the return true switch case(s).
> >       * internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
> >       * match.pd: Add unsigned SAT_ADD match(es).
> >       * optabs.def (OPTAB_NL): Remove fixed-point limitation for
> >       us/ssadd.
> >       * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
> >       extern func decl generated in match.pd match.
> >       (match_saturation_arith): New func impl to match the saturation arith.
> >       (math_opts_dom_walker::after_dom_children): Try match saturation
> >       arith when IOR expr.
> >
>
>  LGTM but you'll need an OK from Richard,
>
> Thanks for working on this!
>
> Tamar
>
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/internal-fn.cc        |  1 +
> >  gcc/internal-fn.def       |  2 ++
> >  gcc/match.pd              | 51 +++++++++++++++++++++++++++++++++++++++
> >  gcc/optabs.def            |  4 +--
> >  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++
> >  5 files changed, 88 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> > index 0a7053c2286..73045ca8c8c 100644
> > --- a/gcc/internal-fn.cc
> > +++ b/gcc/internal-fn.cc
> > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
> >      case IFN_UBSAN_CHECK_MUL:
> >      case IFN_ADD_OVERFLOW:
> >      case IFN_MUL_OVERFLOW:
> > +    case IFN_SAT_ADD:
> >      case IFN_VEC_WIDEN_PLUS:
> >      case IFN_VEC_WIDEN_PLUS_LO:
> >      case IFN_VEC_WIDEN_PLUS_HI:
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 848bb9dbff3..25badbb86e5 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST
> > | ECF_NOTHROW, first,
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW,
> > first,
> >                             smulhrs, umulhrs, binary)
> >
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd,
> > binary)
> > +
> >  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
> >  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
> >  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 07e743ae464..0f9c34fa897 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >         || POINTER_TYPE_P (itype))
> >        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
> >
> > +/* Unsigned Saturation Add */
> > +(match (usadd_left_part_1 @0 @1)
> > + (plus:c @0 @1)
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_left_part_2 @0 @1)
> > + (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_right_part_1 @0 @1)
> > + (negate (convert (lt (plus:c @0 @1) @0)))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_right_part_1 @0 @1)
> > + (negate (convert (gt @0 (plus:c @0 @1))))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_right_part_2 @0 @1)
> > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1))
> > integer_zerop)))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> > +   because the sub part of left_part_2 cannot work with right_part_1.
> > +   For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> > +   right_part_1 has nothing to do with .ADD_OVERFLOW.  */
> > +
> > +/* Unsigned saturation add, case 1 (branchless):
> > +   SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
> > +   SAT_U_ADD = (X + Y) | - (X > (X + Y)).  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
> > +
> > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index ad14f9328b9..3f2cb46aff8 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3")
> >  OPTAB_NX(add_optab, "add$Q$a3")
> >  OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
> >  OPTAB_VX(addv_optab, "add$F$a3")
> > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3',
> > gen_signed_fixed_libfunc)
> > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3',
> > gen_unsigned_fixed_libfunc)
> > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3',
> > gen_signed_fixed_libfunc)
> > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3',
> > gen_unsigned_fixed_libfunc)
> >  OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc)
> >  OPTAB_NX(sub_optab, "sub$F$a3")
> >  OPTAB_NX(sub_optab, "sub$Q$a3")
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index e8c804f09b7..62da1c5ee08 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple
> > *cast_stmt, gimple *&use_stmt,
> >    return 0;
> >  }
> >
> > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> > +
> > +/*
> > + * Try to match saturation arith pattern(s).
> > + *   1. SAT_ADD (unsigned)
> > + *      _7 = _4 + _6;
> > + *      _8 = _4 > _7;
> > + *      _9 = (long unsigned int) _8;
> > + *      _10 = -_9;
> > + *      _12 = _7 | _10;
> > + *      =>
> > + *      _12 = .SAT_ADD (_4, _6);  */
> > +static void
> > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> > +{
> > +  gcall *call = NULL;
> > +
> > +  tree ops[2];
> > +  tree lhs = gimple_assign_lhs (stmt);
> > +
> > +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> > +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> > +                                      OPTIMIZE_FOR_BOTH))
> > +    {
> > +      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> > +      gimple_call_set_lhs (call, lhs);
> > +      gsi_replace (gsi, call, true);
> > +    }
> > +}
> > +
> >  /* Recognize for unsigned x
> >     x = y - z;
> >     if (x > y)
> > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children
> > (basic_block bb)
> >             break;
> >
> >           case BIT_IOR_EXPR:
> > +           match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> > +           /* fall-through  */
> >           case BIT_XOR_EXPR:
> >             match_uaddc_usubc (&gsi, stmt, code);
> >             break;
> > --
> > 2.34.1
>
Li, Pan2 May 16, 2024, 9:34 a.m. UTC | #4
> OK.

Thanks Richard for help and coaching. To double confirm, are you OK with this patch only or for the series patch(es) of SAT middle-end?
Thanks again for reviewing and suggestions.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Thursday, May 16, 2024 4:10 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

On Wed, May 15, 2024 at 1:36 PM Li, Pan2 <pan2.li@intel.com> wrote:
>
> > LGTM but you'll need an OK from Richard,
> > Thanks for working on this!
>
> Thanks Tamar for help and coaching, let's wait Richard for a while,😊!

OK.

Thanks for the patience,
Richard.

> Pan
>
> -----Original Message-----
> From: Tamar Christina <Tamar.Christina@arm.com>
> Sent: Wednesday, May 15, 2024 5:12 PM
> To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
> Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int
>
> Hi Pan,
>
> Thanks!
>
> > -----Original Message-----
> > From: pan2.li@intel.com <pan2.li@intel.com>
> > Sent: Wednesday, May 15, 2024 3:14 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina
> > <Tamar.Christina@arm.com>; richard.guenther@gmail.com;
> > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com>
> > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar
> > int
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > This patch would like to add the middle-end presentation for the
> > saturation add.  Aka set the result of add to the max when overflow.
> > It will take the pattern similar as below.
> >
> > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
> >
> > Take uint8_t as example, we will have:
> >
> > * SAT_ADD (1, 254)   => 255.
> > * SAT_ADD (1, 255)   => 255.
> > * SAT_ADD (2, 255)   => 255.
> > * SAT_ADD (255, 255) => 255.
> >
> > Given below example for the unsigned scalar integer uint64_t:
> >
> > uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> > {
> >   return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> > }
> >
> > Before this patch:
> > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> > {
> >   long unsigned int _1;
> >   _Bool _2;
> >   long unsigned int _3;
> >   long unsigned int _4;
> >   uint64_t _7;
> >   long unsigned int _10;
> >   __complex__ long unsigned int _11;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
> >   _1 = REALPART_EXPR <_11>;
> >   _10 = IMAGPART_EXPR <_11>;
> >   _2 = _10 != 0;
> >   _3 = (long unsigned int) _2;
> >   _4 = -_3;
> >   _7 = _1 | _4;
> >   return _7;
> > ;;    succ:       EXIT
> >
> > }
> >
> > After this patch:
> > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> > {
> >   uint64_t _7;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> >   return _7;
> > ;;    succ:       EXIT
> > }
> >
> > The below tests are passed for this patch:
> > 1. The riscv fully regression tests.
> > 3. The x86 bootstrap tests.
> > 4. The x86 fully regression tests.
> >
> >       PR target/51492
> >       PR target/112600
> >
> > gcc/ChangeLog:
> >
> >       * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
> >       to the return true switch case(s).
> >       * internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
> >       * match.pd: Add unsigned SAT_ADD match(es).
> >       * optabs.def (OPTAB_NL): Remove fixed-point limitation for
> >       us/ssadd.
> >       * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
> >       extern func decl generated in match.pd match.
> >       (match_saturation_arith): New func impl to match the saturation arith.
> >       (math_opts_dom_walker::after_dom_children): Try match saturation
> >       arith when IOR expr.
> >
>
>  LGTM but you'll need an OK from Richard,
>
> Thanks for working on this!
>
> Tamar
>
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/internal-fn.cc        |  1 +
> >  gcc/internal-fn.def       |  2 ++
> >  gcc/match.pd              | 51 +++++++++++++++++++++++++++++++++++++++
> >  gcc/optabs.def            |  4 +--
> >  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++
> >  5 files changed, 88 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> > index 0a7053c2286..73045ca8c8c 100644
> > --- a/gcc/internal-fn.cc
> > +++ b/gcc/internal-fn.cc
> > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
> >      case IFN_UBSAN_CHECK_MUL:
> >      case IFN_ADD_OVERFLOW:
> >      case IFN_MUL_OVERFLOW:
> > +    case IFN_SAT_ADD:
> >      case IFN_VEC_WIDEN_PLUS:
> >      case IFN_VEC_WIDEN_PLUS_LO:
> >      case IFN_VEC_WIDEN_PLUS_HI:
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 848bb9dbff3..25badbb86e5 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST
> > | ECF_NOTHROW, first,
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW,
> > first,
> >                             smulhrs, umulhrs, binary)
> >
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd,
> > binary)
> > +
> >  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
> >  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
> >  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 07e743ae464..0f9c34fa897 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >         || POINTER_TYPE_P (itype))
> >        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
> >
> > +/* Unsigned Saturation Add */
> > +(match (usadd_left_part_1 @0 @1)
> > + (plus:c @0 @1)
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_left_part_2 @0 @1)
> > + (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_right_part_1 @0 @1)
> > + (negate (convert (lt (plus:c @0 @1) @0)))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_right_part_1 @0 @1)
> > + (negate (convert (gt @0 (plus:c @0 @1))))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_right_part_2 @0 @1)
> > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1))
> > integer_zerop)))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> > +   because the sub part of left_part_2 cannot work with right_part_1.
> > +   For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> > +   right_part_1 has nothing to do with .ADD_OVERFLOW.  */
> > +
> > +/* Unsigned saturation add, case 1 (branchless):
> > +   SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
> > +   SAT_U_ADD = (X + Y) | - (X > (X + Y)).  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
> > +
> > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index ad14f9328b9..3f2cb46aff8 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3")
> >  OPTAB_NX(add_optab, "add$Q$a3")
> >  OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
> >  OPTAB_VX(addv_optab, "add$F$a3")
> > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3',
> > gen_signed_fixed_libfunc)
> > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3',
> > gen_unsigned_fixed_libfunc)
> > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3',
> > gen_signed_fixed_libfunc)
> > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3',
> > gen_unsigned_fixed_libfunc)
> >  OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc)
> >  OPTAB_NX(sub_optab, "sub$F$a3")
> >  OPTAB_NX(sub_optab, "sub$Q$a3")
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index e8c804f09b7..62da1c5ee08 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple
> > *cast_stmt, gimple *&use_stmt,
> >    return 0;
> >  }
> >
> > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> > +
> > +/*
> > + * Try to match saturation arith pattern(s).
> > + *   1. SAT_ADD (unsigned)
> > + *      _7 = _4 + _6;
> > + *      _8 = _4 > _7;
> > + *      _9 = (long unsigned int) _8;
> > + *      _10 = -_9;
> > + *      _12 = _7 | _10;
> > + *      =>
> > + *      _12 = .SAT_ADD (_4, _6);  */
> > +static void
> > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> > +{
> > +  gcall *call = NULL;
> > +
> > +  tree ops[2];
> > +  tree lhs = gimple_assign_lhs (stmt);
> > +
> > +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> > +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> > +                                      OPTIMIZE_FOR_BOTH))
> > +    {
> > +      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> > +      gimple_call_set_lhs (call, lhs);
> > +      gsi_replace (gsi, call, true);
> > +    }
> > +}
> > +
> >  /* Recognize for unsigned x
> >     x = y - z;
> >     if (x > y)
> > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children
> > (basic_block bb)
> >             break;
> >
> >           case BIT_IOR_EXPR:
> > +           match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> > +           /* fall-through  */
> >           case BIT_XOR_EXPR:
> >             match_uaddc_usubc (&gsi, stmt, code);
> >             break;
> > --
> > 2.34.1
>
Richard Biener May 16, 2024, 11:58 a.m. UTC | #5
On Thu, May 16, 2024 at 11:35 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> > OK.
>
> Thanks Richard for help and coaching. To double confirm, are you OK with this patch only or for the series patch(es) of SAT middle-end?
> Thanks again for reviewing and suggestions.

For the series, the riscv specific part of course needs riscv approval.

> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Thursday, May 16, 2024 4:10 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
> Subject: Re: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int
>
> On Wed, May 15, 2024 at 1:36 PM Li, Pan2 <pan2.li@intel.com> wrote:
> >
> > > LGTM but you'll need an OK from Richard,
> > > Thanks for working on this!
> >
> > Thanks Tamar for help and coaching, let's wait Richard for a while,😊!
>
> OK.
>
> Thanks for the patience,
> Richard.
>
> > Pan
> >
> > -----Original Message-----
> > From: Tamar Christina <Tamar.Christina@arm.com>
> > Sent: Wednesday, May 15, 2024 5:12 PM
> > To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
> > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
> > Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int
> >
> > Hi Pan,
> >
> > Thanks!
> >
> > > -----Original Message-----
> > > From: pan2.li@intel.com <pan2.li@intel.com>
> > > Sent: Wednesday, May 15, 2024 3:14 AM
> > > To: gcc-patches@gcc.gnu.org
> > > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina
> > > <Tamar.Christina@arm.com>; richard.guenther@gmail.com;
> > > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com>
> > > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar
> > > int
> > >
> > > From: Pan Li <pan2.li@intel.com>
> > >
> > > This patch would like to add the middle-end presentation for the
> > > saturation add.  Aka set the result of add to the max when overflow.
> > > It will take the pattern similar as below.
> > >
> > > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
> > >
> > > Take uint8_t as example, we will have:
> > >
> > > * SAT_ADD (1, 254)   => 255.
> > > * SAT_ADD (1, 255)   => 255.
> > > * SAT_ADD (2, 255)   => 255.
> > > * SAT_ADD (255, 255) => 255.
> > >
> > > Given below example for the unsigned scalar integer uint64_t:
> > >
> > > uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> > > {
> > >   return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> > > }
> > >
> > > Before this patch:
> > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> > > {
> > >   long unsigned int _1;
> > >   _Bool _2;
> > >   long unsigned int _3;
> > >   long unsigned int _4;
> > >   uint64_t _7;
> > >   long unsigned int _10;
> > >   __complex__ long unsigned int _11;
> > >
> > > ;;   basic block 2, loop depth 0
> > > ;;    pred:       ENTRY
> > >   _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
> > >   _1 = REALPART_EXPR <_11>;
> > >   _10 = IMAGPART_EXPR <_11>;
> > >   _2 = _10 != 0;
> > >   _3 = (long unsigned int) _2;
> > >   _4 = -_3;
> > >   _7 = _1 | _4;
> > >   return _7;
> > > ;;    succ:       EXIT
> > >
> > > }
> > >
> > > After this patch:
> > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> > > {
> > >   uint64_t _7;
> > >
> > > ;;   basic block 2, loop depth 0
> > > ;;    pred:       ENTRY
> > >   _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> > >   return _7;
> > > ;;    succ:       EXIT
> > > }
> > >
> > > The below tests are passed for this patch:
> > > 1. The riscv fully regression tests.
> > > 3. The x86 bootstrap tests.
> > > 4. The x86 fully regression tests.
> > >
> > >       PR target/51492
> > >       PR target/112600
> > >
> > > gcc/ChangeLog:
> > >
> > >       * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
> > >       to the return true switch case(s).
> > >       * internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
> > >       * match.pd: Add unsigned SAT_ADD match(es).
> > >       * optabs.def (OPTAB_NL): Remove fixed-point limitation for
> > >       us/ssadd.
> > >       * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
> > >       extern func decl generated in match.pd match.
> > >       (match_saturation_arith): New func impl to match the saturation arith.
> > >       (math_opts_dom_walker::after_dom_children): Try match saturation
> > >       arith when IOR expr.
> > >
> >
> >  LGTM but you'll need an OK from Richard,
> >
> > Thanks for working on this!
> >
> > Tamar
> >
> > > Signed-off-by: Pan Li <pan2.li@intel.com>
> > > ---
> > >  gcc/internal-fn.cc        |  1 +
> > >  gcc/internal-fn.def       |  2 ++
> > >  gcc/match.pd              | 51 +++++++++++++++++++++++++++++++++++++++
> > >  gcc/optabs.def            |  4 +--
> > >  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++
> > >  5 files changed, 88 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> > > index 0a7053c2286..73045ca8c8c 100644
> > > --- a/gcc/internal-fn.cc
> > > +++ b/gcc/internal-fn.cc
> > > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
> > >      case IFN_UBSAN_CHECK_MUL:
> > >      case IFN_ADD_OVERFLOW:
> > >      case IFN_MUL_OVERFLOW:
> > > +    case IFN_SAT_ADD:
> > >      case IFN_VEC_WIDEN_PLUS:
> > >      case IFN_VEC_WIDEN_PLUS_LO:
> > >      case IFN_VEC_WIDEN_PLUS_HI:
> > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > > index 848bb9dbff3..25badbb86e5 100644
> > > --- a/gcc/internal-fn.def
> > > +++ b/gcc/internal-fn.def
> > > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST
> > > | ECF_NOTHROW, first,
> > >  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW,
> > > first,
> > >                             smulhrs, umulhrs, binary)
> > >
> > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd,
> > > binary)
> > > +
> > >  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
> > >  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
> > >  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index 07e743ae464..0f9c34fa897 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >         || POINTER_TYPE_P (itype))
> > >        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
> > >
> > > +/* Unsigned Saturation Add */
> > > +(match (usadd_left_part_1 @0 @1)
> > > + (plus:c @0 @1)
> > > + (if (INTEGRAL_TYPE_P (type)
> > > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@1)))))
> > > +
> > > +(match (usadd_left_part_2 @0 @1)
> > > + (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> > > + (if (INTEGRAL_TYPE_P (type)
> > > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@1)))))
> > > +
> > > +(match (usadd_right_part_1 @0 @1)
> > > + (negate (convert (lt (plus:c @0 @1) @0)))
> > > + (if (INTEGRAL_TYPE_P (type)
> > > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@1)))))
> > > +
> > > +(match (usadd_right_part_1 @0 @1)
> > > + (negate (convert (gt @0 (plus:c @0 @1))))
> > > + (if (INTEGRAL_TYPE_P (type)
> > > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@1)))))
> > > +
> > > +(match (usadd_right_part_2 @0 @1)
> > > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1))
> > > integer_zerop)))
> > > + (if (INTEGRAL_TYPE_P (type)
> > > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@1)))))
> > > +
> > > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> > > +   because the sub part of left_part_2 cannot work with right_part_1.
> > > +   For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> > > +   right_part_1 has nothing to do with .ADD_OVERFLOW.  */
> > > +
> > > +/* Unsigned saturation add, case 1 (branchless):
> > > +   SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
> > > +   SAT_U_ADD = (X + Y) | - (X > (X + Y)).  */
> > > +(match (unsigned_integer_sat_add @0 @1)
> > > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
> > > +
> > > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> > > +(match (unsigned_integer_sat_add @0 @1)
> > > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> > > +
> > >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> > >     x >  y  &&  x == XXX_MIN  -->  false . */
> > >  (for eqne (eq ne)
> > > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > > index ad14f9328b9..3f2cb46aff8 100644
> > > --- a/gcc/optabs.def
> > > +++ b/gcc/optabs.def
> > > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3")
> > >  OPTAB_NX(add_optab, "add$Q$a3")
> > >  OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
> > >  OPTAB_VX(addv_optab, "add$F$a3")
> > > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3',
> > > gen_signed_fixed_libfunc)
> > > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3',
> > > gen_unsigned_fixed_libfunc)
> > > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3',
> > > gen_signed_fixed_libfunc)
> > > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3',
> > > gen_unsigned_fixed_libfunc)
> > >  OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc)
> > >  OPTAB_NX(sub_optab, "sub$F$a3")
> > >  OPTAB_NX(sub_optab, "sub$Q$a3")
> > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > > index e8c804f09b7..62da1c5ee08 100644
> > > --- a/gcc/tree-ssa-math-opts.cc
> > > +++ b/gcc/tree-ssa-math-opts.cc
> > > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple
> > > *cast_stmt, gimple *&use_stmt,
> > >    return 0;
> > >  }
> > >
> > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> > > +
> > > +/*
> > > + * Try to match saturation arith pattern(s).
> > > + *   1. SAT_ADD (unsigned)
> > > + *      _7 = _4 + _6;
> > > + *      _8 = _4 > _7;
> > > + *      _9 = (long unsigned int) _8;
> > > + *      _10 = -_9;
> > > + *      _12 = _7 | _10;
> > > + *      =>
> > > + *      _12 = .SAT_ADD (_4, _6);  */
> > > +static void
> > > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> > > +{
> > > +  gcall *call = NULL;
> > > +
> > > +  tree ops[2];
> > > +  tree lhs = gimple_assign_lhs (stmt);
> > > +
> > > +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> > > +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> > > +                                      OPTIMIZE_FOR_BOTH))
> > > +    {
> > > +      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> > > +      gimple_call_set_lhs (call, lhs);
> > > +      gsi_replace (gsi, call, true);
> > > +    }
> > > +}
> > > +
> > >  /* Recognize for unsigned x
> > >     x = y - z;
> > >     if (x > y)
> > > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children
> > > (basic_block bb)
> > >             break;
> > >
> > >           case BIT_IOR_EXPR:
> > > +           match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> > > +           /* fall-through  */
> > >           case BIT_XOR_EXPR:
> > >             match_uaddc_usubc (&gsi, stmt, code);
> > >             break;
> > > --
> > > 2.34.1
> >
Li, Pan2 May 16, 2024, noon UTC | #6
> For the series, the riscv specific part of course needs riscv approval.

Thanks a lot, have a nice day!

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Thursday, May 16, 2024 7:59 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

On Thu, May 16, 2024 at 11:35 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> > OK.
>
> Thanks Richard for help and coaching. To double confirm, are you OK with this patch only or for the series patch(es) of SAT middle-end?
> Thanks again for reviewing and suggestions.

For the series, the riscv specific part of course needs riscv approval.

> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Thursday, May 16, 2024 4:10 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
> Subject: Re: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int
>
> On Wed, May 15, 2024 at 1:36 PM Li, Pan2 <pan2.li@intel.com> wrote:
> >
> > > LGTM but you'll need an OK from Richard,
> > > Thanks for working on this!
> >
> > Thanks Tamar for help and coaching, let's wait Richard for a while,😊!
>
> OK.
>
> Thanks for the patience,
> Richard.
>
> > Pan
> >
> > -----Original Message-----
> > From: Tamar Christina <Tamar.Christina@arm.com>
> > Sent: Wednesday, May 15, 2024 5:12 PM
> > To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
> > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
> > Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int
> >
> > Hi Pan,
> >
> > Thanks!
> >
> > > -----Original Message-----
> > > From: pan2.li@intel.com <pan2.li@intel.com>
> > > Sent: Wednesday, May 15, 2024 3:14 AM
> > > To: gcc-patches@gcc.gnu.org
> > > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina
> > > <Tamar.Christina@arm.com>; richard.guenther@gmail.com;
> > > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com>
> > > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar
> > > int
> > >
> > > From: Pan Li <pan2.li@intel.com>
> > >
> > > This patch would like to add the middle-end presentation for the
> > > saturation add.  Aka set the result of add to the max when overflow.
> > > It will take the pattern similar as below.
> > >
> > > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
> > >
> > > Take uint8_t as example, we will have:
> > >
> > > * SAT_ADD (1, 254)   => 255.
> > > * SAT_ADD (1, 255)   => 255.
> > > * SAT_ADD (2, 255)   => 255.
> > > * SAT_ADD (255, 255) => 255.
> > >
> > > Given below example for the unsigned scalar integer uint64_t:
> > >
> > > uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> > > {
> > >   return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> > > }
> > >
> > > Before this patch:
> > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> > > {
> > >   long unsigned int _1;
> > >   _Bool _2;
> > >   long unsigned int _3;
> > >   long unsigned int _4;
> > >   uint64_t _7;
> > >   long unsigned int _10;
> > >   __complex__ long unsigned int _11;
> > >
> > > ;;   basic block 2, loop depth 0
> > > ;;    pred:       ENTRY
> > >   _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
> > >   _1 = REALPART_EXPR <_11>;
> > >   _10 = IMAGPART_EXPR <_11>;
> > >   _2 = _10 != 0;
> > >   _3 = (long unsigned int) _2;
> > >   _4 = -_3;
> > >   _7 = _1 | _4;
> > >   return _7;
> > > ;;    succ:       EXIT
> > >
> > > }
> > >
> > > After this patch:
> > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> > > {
> > >   uint64_t _7;
> > >
> > > ;;   basic block 2, loop depth 0
> > > ;;    pred:       ENTRY
> > >   _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> > >   return _7;
> > > ;;    succ:       EXIT
> > > }
> > >
> > > The below tests are passed for this patch:
> > > 1. The riscv fully regression tests.
> > > 3. The x86 bootstrap tests.
> > > 4. The x86 fully regression tests.
> > >
> > >       PR target/51492
> > >       PR target/112600
> > >
> > > gcc/ChangeLog:
> > >
> > >       * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
> > >       to the return true switch case(s).
> > >       * internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
> > >       * match.pd: Add unsigned SAT_ADD match(es).
> > >       * optabs.def (OPTAB_NL): Remove fixed-point limitation for
> > >       us/ssadd.
> > >       * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
> > >       extern func decl generated in match.pd match.
> > >       (match_saturation_arith): New func impl to match the saturation arith.
> > >       (math_opts_dom_walker::after_dom_children): Try match saturation
> > >       arith when IOR expr.
> > >
> >
> >  LGTM but you'll need an OK from Richard,
> >
> > Thanks for working on this!
> >
> > Tamar
> >
> > > Signed-off-by: Pan Li <pan2.li@intel.com>
> > > ---
> > >  gcc/internal-fn.cc        |  1 +
> > >  gcc/internal-fn.def       |  2 ++
> > >  gcc/match.pd              | 51 +++++++++++++++++++++++++++++++++++++++
> > >  gcc/optabs.def            |  4 +--
> > >  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++
> > >  5 files changed, 88 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> > > index 0a7053c2286..73045ca8c8c 100644
> > > --- a/gcc/internal-fn.cc
> > > +++ b/gcc/internal-fn.cc
> > > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
> > >      case IFN_UBSAN_CHECK_MUL:
> > >      case IFN_ADD_OVERFLOW:
> > >      case IFN_MUL_OVERFLOW:
> > > +    case IFN_SAT_ADD:
> > >      case IFN_VEC_WIDEN_PLUS:
> > >      case IFN_VEC_WIDEN_PLUS_LO:
> > >      case IFN_VEC_WIDEN_PLUS_HI:
> > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > > index 848bb9dbff3..25badbb86e5 100644
> > > --- a/gcc/internal-fn.def
> > > +++ b/gcc/internal-fn.def
> > > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST
> > > | ECF_NOTHROW, first,
> > >  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW,
> > > first,
> > >                             smulhrs, umulhrs, binary)
> > >
> > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd,
> > > binary)
> > > +
> > >  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
> > >  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
> > >  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index 07e743ae464..0f9c34fa897 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >         || POINTER_TYPE_P (itype))
> > >        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
> > >
> > > +/* Unsigned Saturation Add */
> > > +(match (usadd_left_part_1 @0 @1)
> > > + (plus:c @0 @1)
> > > + (if (INTEGRAL_TYPE_P (type)
> > > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@1)))))
> > > +
> > > +(match (usadd_left_part_2 @0 @1)
> > > + (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> > > + (if (INTEGRAL_TYPE_P (type)
> > > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@1)))))
> > > +
> > > +(match (usadd_right_part_1 @0 @1)
> > > + (negate (convert (lt (plus:c @0 @1) @0)))
> > > + (if (INTEGRAL_TYPE_P (type)
> > > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@1)))))
> > > +
> > > +(match (usadd_right_part_1 @0 @1)
> > > + (negate (convert (gt @0 (plus:c @0 @1))))
> > > + (if (INTEGRAL_TYPE_P (type)
> > > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@1)))))
> > > +
> > > +(match (usadd_right_part_2 @0 @1)
> > > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1))
> > > integer_zerop)))
> > > + (if (INTEGRAL_TYPE_P (type)
> > > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@0))
> > > +      && types_match (type, TREE_TYPE (@1)))))
> > > +
> > > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> > > +   because the sub part of left_part_2 cannot work with right_part_1.
> > > +   For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> > > +   right_part_1 has nothing to do with .ADD_OVERFLOW.  */
> > > +
> > > +/* Unsigned saturation add, case 1 (branchless):
> > > +   SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
> > > +   SAT_U_ADD = (X + Y) | - (X > (X + Y)).  */
> > > +(match (unsigned_integer_sat_add @0 @1)
> > > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
> > > +
> > > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> > > +(match (unsigned_integer_sat_add @0 @1)
> > > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> > > +
> > >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> > >     x >  y  &&  x == XXX_MIN  -->  false . */
> > >  (for eqne (eq ne)
> > > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > > index ad14f9328b9..3f2cb46aff8 100644
> > > --- a/gcc/optabs.def
> > > +++ b/gcc/optabs.def
> > > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3")
> > >  OPTAB_NX(add_optab, "add$Q$a3")
> > >  OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
> > >  OPTAB_VX(addv_optab, "add$F$a3")
> > > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3',
> > > gen_signed_fixed_libfunc)
> > > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3',
> > > gen_unsigned_fixed_libfunc)
> > > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3',
> > > gen_signed_fixed_libfunc)
> > > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3',
> > > gen_unsigned_fixed_libfunc)
> > >  OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc)
> > >  OPTAB_NX(sub_optab, "sub$F$a3")
> > >  OPTAB_NX(sub_optab, "sub$Q$a3")
> > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > > index e8c804f09b7..62da1c5ee08 100644
> > > --- a/gcc/tree-ssa-math-opts.cc
> > > +++ b/gcc/tree-ssa-math-opts.cc
> > > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple
> > > *cast_stmt, gimple *&use_stmt,
> > >    return 0;
> > >  }
> > >
> > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> > > +
> > > +/*
> > > + * Try to match saturation arith pattern(s).
> > > + *   1. SAT_ADD (unsigned)
> > > + *      _7 = _4 + _6;
> > > + *      _8 = _4 > _7;
> > > + *      _9 = (long unsigned int) _8;
> > > + *      _10 = -_9;
> > > + *      _12 = _7 | _10;
> > > + *      =>
> > > + *      _12 = .SAT_ADD (_4, _6);  */
> > > +static void
> > > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> > > +{
> > > +  gcall *call = NULL;
> > > +
> > > +  tree ops[2];
> > > +  tree lhs = gimple_assign_lhs (stmt);
> > > +
> > > +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> > > +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> > > +                                      OPTIMIZE_FOR_BOTH))
> > > +    {
> > > +      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> > > +      gimple_call_set_lhs (call, lhs);
> > > +      gsi_replace (gsi, call, true);
> > > +    }
> > > +}
> > > +
> > >  /* Recognize for unsigned x
> > >     x = y - z;
> > >     if (x > y)
> > > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children
> > > (basic_block bb)
> > >             break;
> > >
> > >           case BIT_IOR_EXPR:
> > > +           match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> > > +           /* fall-through  */
> > >           case BIT_XOR_EXPR:
> > >             match_uaddc_usubc (&gsi, stmt, code);
> > >             break;
> > > --
> > > 2.34.1
> >
Jeff Law May 16, 2024, 1:32 p.m. UTC | #7
On 5/16/24 5:58 AM, Richard Biener wrote:
> On Thu, May 16, 2024 at 11:35 AM Li, Pan2 <pan2.li@intel.com> wrote:
>>
>>> OK.
>>
>> Thanks Richard for help and coaching. To double confirm, are you OK with this patch only or for the series patch(es) of SAT middle-end?
>> Thanks again for reviewing and suggestions.
> 
> For the series, the riscv specific part of course needs riscv approval.
Yea, we'll take a look at it.  Tons of stuff to go through, but this is 
definitely on the list.

jeff
Li, Pan2 May 17, 2024, 3:18 a.m. UTC | #8
Hi Tamar,

I am trying to add more shape(s) like below branch version for SAT_ADD. I suspect that widening_mul may not be the best place to take care of this shape.
Because after_dom_children almost works on bb but we actually need to find the def/use cross the bb.

Thus, is there any suggestion for branch shape? Add new simplify to match.pd works well but it is not recommended per previous discussion.

Thanks a lot for help!

Pan

-------Source code---------

#define SAT_ADD_U_1(T) \
T sat_add_u_1_##T(T x, T y) \
{ \
  return (T)(x + y) >= x ? (x + y) : -1; \
}

SAT_ADD_U_1(uint16_t)

-------Gimple---------

uint16_t sat_add_u_1_uint16_t (uint16_t x, uint16_t y)
{
  short unsigned int _1;
  uint16_t _2;

  <bb 2> [local count: 1073741824]:
  _1 = x_3(D) + y_4(D);
  if (_1 >= x_3(D))
    goto <bb 3>; [65.00%]
  else
    goto <bb 4>; [35.00%]

  <bb 3> [local count: 697932184]:

  <bb 4> [local count: 1073741824]:
  # _2 = PHI <65535(2), _1(3)>
  return _2;
}

Pan

-----Original Message-----
From: Tamar Christina <Tamar.Christina@arm.com> 
Sent: Wednesday, May 15, 2024 5:12 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

Hi Pan,

Thanks!

> -----Original Message-----
> From: pan2.li@intel.com <pan2.li@intel.com>
> Sent: Wednesday, May 15, 2024 3:14 AM
> To: gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina
> <Tamar.Christina@arm.com>; richard.guenther@gmail.com;
> hongtao.liu@intel.com; Pan Li <pan2.li@intel.com>
> Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar
> int
> 
> From: Pan Li <pan2.li@intel.com>
> 
> This patch would like to add the middle-end presentation for the
> saturation add.  Aka set the result of add to the max when overflow.
> It will take the pattern similar as below.
> 
> SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
> 
> Take uint8_t as example, we will have:
> 
> * SAT_ADD (1, 254)   => 255.
> * SAT_ADD (1, 255)   => 255.
> * SAT_ADD (2, 255)   => 255.
> * SAT_ADD (255, 255) => 255.
> 
> Given below example for the unsigned scalar integer uint64_t:
> 
> uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> {
>   return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> }
> 
> Before this patch:
> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> {
>   long unsigned int _1;
>   _Bool _2;
>   long unsigned int _3;
>   long unsigned int _4;
>   uint64_t _7;
>   long unsigned int _10;
>   __complex__ long unsigned int _11;
> 
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
>   _1 = REALPART_EXPR <_11>;
>   _10 = IMAGPART_EXPR <_11>;
>   _2 = _10 != 0;
>   _3 = (long unsigned int) _2;
>   _4 = -_3;
>   _7 = _1 | _4;
>   return _7;
> ;;    succ:       EXIT
> 
> }
> 
> After this patch:
> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> {
>   uint64_t _7;
> 
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
>   return _7;
> ;;    succ:       EXIT
> }
> 
> The below tests are passed for this patch:
> 1. The riscv fully regression tests.
> 3. The x86 bootstrap tests.
> 4. The x86 fully regression tests.
> 
> 	PR target/51492
> 	PR target/112600
> 
> gcc/ChangeLog:
> 
> 	* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
> 	to the return true switch case(s).
> 	* internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
> 	* match.pd: Add unsigned SAT_ADD match(es).
> 	* optabs.def (OPTAB_NL): Remove fixed-point limitation for
> 	us/ssadd.
> 	* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
> 	extern func decl generated in match.pd match.
> 	(match_saturation_arith): New func impl to match the saturation arith.
> 	(math_opts_dom_walker::after_dom_children): Try match saturation
> 	arith when IOR expr.
> 

 LGTM but you'll need an OK from Richard,

Thanks for working on this!

Tamar

> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/internal-fn.cc        |  1 +
>  gcc/internal-fn.def       |  2 ++
>  gcc/match.pd              | 51 +++++++++++++++++++++++++++++++++++++++
>  gcc/optabs.def            |  4 +--
>  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++
>  5 files changed, 88 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 0a7053c2286..73045ca8c8c 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
>      case IFN_UBSAN_CHECK_MUL:
>      case IFN_ADD_OVERFLOW:
>      case IFN_MUL_OVERFLOW:
> +    case IFN_SAT_ADD:
>      case IFN_VEC_WIDEN_PLUS:
>      case IFN_VEC_WIDEN_PLUS_LO:
>      case IFN_VEC_WIDEN_PLUS_HI:
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 848bb9dbff3..25badbb86e5 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST
> | ECF_NOTHROW, first,
>  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW,
> first,
>  			      smulhrs, umulhrs, binary)
> 
> +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd,
> binary)
> +
>  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
>  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
>  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 07e743ae464..0f9c34fa897 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>         || POINTER_TYPE_P (itype))
>        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
> 
> +/* Unsigned Saturation Add */
> +(match (usadd_left_part_1 @0 @1)
> + (plus:c @0 @1)
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_left_part_2 @0 @1)
> + (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_right_part_1 @0 @1)
> + (negate (convert (lt (plus:c @0 @1) @0)))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_right_part_1 @0 @1)
> + (negate (convert (gt @0 (plus:c @0 @1))))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_right_part_2 @0 @1)
> + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1))
> integer_zerop)))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> +   because the sub part of left_part_2 cannot work with right_part_1.
> +   For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> +   right_part_1 has nothing to do with .ADD_OVERFLOW.  */
> +
> +/* Unsigned saturation add, case 1 (branchless):
> +   SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
> +   SAT_U_ADD = (X + Y) | - (X > (X + Y)).  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
> +
> +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index ad14f9328b9..3f2cb46aff8 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3")
>  OPTAB_NX(add_optab, "add$Q$a3")
>  OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
>  OPTAB_VX(addv_optab, "add$F$a3")
> -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3',
> gen_signed_fixed_libfunc)
> -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3',
> gen_unsigned_fixed_libfunc)
> +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3',
> gen_signed_fixed_libfunc)
> +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3',
> gen_unsigned_fixed_libfunc)
>  OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc)
>  OPTAB_NX(sub_optab, "sub$F$a3")
>  OPTAB_NX(sub_optab, "sub$Q$a3")
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index e8c804f09b7..62da1c5ee08 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple
> *cast_stmt, gimple *&use_stmt,
>    return 0;
>  }
> 
> +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> +
> +/*
> + * Try to match saturation arith pattern(s).
> + *   1. SAT_ADD (unsigned)
> + *      _7 = _4 + _6;
> + *      _8 = _4 > _7;
> + *      _9 = (long unsigned int) _8;
> + *      _10 = -_9;
> + *      _12 = _7 | _10;
> + *      =>
> + *      _12 = .SAT_ADD (_4, _6);  */
> +static void
> +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> +{
> +  gcall *call = NULL;
> +
> +  tree ops[2];
> +  tree lhs = gimple_assign_lhs (stmt);
> +
> +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> +					 OPTIMIZE_FOR_BOTH))
> +    {
> +      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> +      gimple_call_set_lhs (call, lhs);
> +      gsi_replace (gsi, call, true);
> +    }
> +}
> +
>  /* Recognize for unsigned x
>     x = y - z;
>     if (x > y)
> @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children
> (basic_block bb)
>  	      break;
> 
>  	    case BIT_IOR_EXPR:
> +	      match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> +	      /* fall-through  */
>  	    case BIT_XOR_EXPR:
>  	      match_uaddc_usubc (&gsi, stmt, code);
>  	      break;
> --
> 2.34.1
Tamar Christina May 17, 2024, 2:46 p.m. UTC | #9
Hi Pan,

> 
> Hi Tamar,
> 
> I am trying to add more shape(s) like below branch version for SAT_ADD. I suspect
> that widening_mul may not be the best place to take care of this shape.
> Because after_dom_children almost works on bb but we actually need to find the
> def/use cross the bb.

It actually already does this, see for example optimize_spaceship which optimizes
across basic blocks. However...

> 
> Thus, is there any suggestion for branch shape? Add new simplify to match.pd
> works well but it is not recommended per previous discussion.

The objection previously was not to introduce the IFNs at match.pd, it doesn't
mean we can't use match.pd to force the versions with branches to banchless
code so the existing patterns can deal with them as is.

...in this case something like this:

#if GIMPLE
(simplify
 (cond (ge (plus:c@3 @0 @1) @0) @3 integer_minus_onep)
  (if (direct_internal_fn_supported_p (...))
   (bit_ior @3 (negate (...)))))
#endif

Works better I think.

That is, for targets we know we can optimize it later on, or do something with it
in the vectorizer we canonicalize it.  The reason I have it guarded with the IFN is
that some target maintainers objected to replacing the branch code with branchless
code as their targets can more optimally deal with branches.

Cheers,
Tamar
> 
> Thanks a lot for help!
> 
> Pan
> 
> -------Source code---------
> 
> #define SAT_ADD_U_1(T) \
> T sat_add_u_1_##T(T x, T y) \
> { \
>   return (T)(x + y) >= x ? (x + y) : -1; \
> }
> 
> SAT_ADD_U_1(uint16_t)
> 
> -------Gimple---------
> 
> uint16_t sat_add_u_1_uint16_t (uint16_t x, uint16_t y)
> {
>   short unsigned int _1;
>   uint16_t _2;
> 
>   <bb 2> [local count: 1073741824]:
>   _1 = x_3(D) + y_4(D);
>   if (_1 >= x_3(D))
>     goto <bb 3>; [65.00%]
>   else
>     goto <bb 4>; [35.00%]
> 
>   <bb 3> [local count: 697932184]:
> 
>   <bb 4> [local count: 1073741824]:
>   # _2 = PHI <65535(2), _1(3)>
>   return _2;
> }
> 
> Pan
> 
> -----Original Message-----
> From: Tamar Christina <Tamar.Christina@arm.com>
> Sent: Wednesday, May 15, 2024 5:12 PM
> To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com;
> Liu, Hongtao <hongtao.liu@intel.com>
> Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned
> scalar int
> 
> Hi Pan,
> 
> Thanks!
> 
> > -----Original Message-----
> > From: pan2.li@intel.com <pan2.li@intel.com>
> > Sent: Wednesday, May 15, 2024 3:14 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina
> > <Tamar.Christina@arm.com>; richard.guenther@gmail.com;
> > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com>
> > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned
> scalar
> > int
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > This patch would like to add the middle-end presentation for the
> > saturation add.  Aka set the result of add to the max when overflow.
> > It will take the pattern similar as below.
> >
> > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
> >
> > Take uint8_t as example, we will have:
> >
> > * SAT_ADD (1, 254)   => 255.
> > * SAT_ADD (1, 255)   => 255.
> > * SAT_ADD (2, 255)   => 255.
> > * SAT_ADD (255, 255) => 255.
> >
> > Given below example for the unsigned scalar integer uint64_t:
> >
> > uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> > {
> >   return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> > }
> >
> > Before this patch:
> > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> > {
> >   long unsigned int _1;
> >   _Bool _2;
> >   long unsigned int _3;
> >   long unsigned int _4;
> >   uint64_t _7;
> >   long unsigned int _10;
> >   __complex__ long unsigned int _11;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
> >   _1 = REALPART_EXPR <_11>;
> >   _10 = IMAGPART_EXPR <_11>;
> >   _2 = _10 != 0;
> >   _3 = (long unsigned int) _2;
> >   _4 = -_3;
> >   _7 = _1 | _4;
> >   return _7;
> > ;;    succ:       EXIT
> >
> > }
> >
> > After this patch:
> > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> > {
> >   uint64_t _7;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> >   return _7;
> > ;;    succ:       EXIT
> > }
> >
> > The below tests are passed for this patch:
> > 1. The riscv fully regression tests.
> > 3. The x86 bootstrap tests.
> > 4. The x86 fully regression tests.
> >
> > 	PR target/51492
> > 	PR target/112600
> >
> > gcc/ChangeLog:
> >
> > 	* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
> > 	to the return true switch case(s).
> > 	* internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
> > 	* match.pd: Add unsigned SAT_ADD match(es).
> > 	* optabs.def (OPTAB_NL): Remove fixed-point limitation for
> > 	us/ssadd.
> > 	* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
> > 	extern func decl generated in match.pd match.
> > 	(match_saturation_arith): New func impl to match the saturation arith.
> > 	(math_opts_dom_walker::after_dom_children): Try match saturation
> > 	arith when IOR expr.
> >
> 
>  LGTM but you'll need an OK from Richard,
> 
> Thanks for working on this!
> 
> Tamar
> 
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/internal-fn.cc        |  1 +
> >  gcc/internal-fn.def       |  2 ++
> >  gcc/match.pd              | 51 +++++++++++++++++++++++++++++++++++++++
> >  gcc/optabs.def            |  4 +--
> >  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++
> >  5 files changed, 88 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> > index 0a7053c2286..73045ca8c8c 100644
> > --- a/gcc/internal-fn.cc
> > +++ b/gcc/internal-fn.cc
> > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
> >      case IFN_UBSAN_CHECK_MUL:
> >      case IFN_ADD_OVERFLOW:
> >      case IFN_MUL_OVERFLOW:
> > +    case IFN_SAT_ADD:
> >      case IFN_VEC_WIDEN_PLUS:
> >      case IFN_VEC_WIDEN_PLUS_LO:
> >      case IFN_VEC_WIDEN_PLUS_HI:
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 848bb9dbff3..25badbb86e5 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS,
> ECF_CONST
> > | ECF_NOTHROW, first,
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW,
> > first,
> >  			      smulhrs, umulhrs, binary)
> >
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd,
> > binary)
> > +
> >  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
> >  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
> >  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 07e743ae464..0f9c34fa897 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >         || POINTER_TYPE_P (itype))
> >        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
> >
> > +/* Unsigned Saturation Add */
> > +(match (usadd_left_part_1 @0 @1)
> > + (plus:c @0 @1)
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_left_part_2 @0 @1)
> > + (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_right_part_1 @0 @1)
> > + (negate (convert (lt (plus:c @0 @1) @0)))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_right_part_1 @0 @1)
> > + (negate (convert (gt @0 (plus:c @0 @1))))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_right_part_2 @0 @1)
> > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1))
> > integer_zerop)))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> > +   because the sub part of left_part_2 cannot work with right_part_1.
> > +   For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> > +   right_part_1 has nothing to do with .ADD_OVERFLOW.  */
> > +
> > +/* Unsigned saturation add, case 1 (branchless):
> > +   SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
> > +   SAT_U_ADD = (X + Y) | - (X > (X + Y)).  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
> > +
> > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index ad14f9328b9..3f2cb46aff8 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3")
> >  OPTAB_NX(add_optab, "add$Q$a3")
> >  OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
> >  OPTAB_VX(addv_optab, "add$F$a3")
> > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3',
> > gen_signed_fixed_libfunc)
> > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3',
> > gen_unsigned_fixed_libfunc)
> > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3',
> > gen_signed_fixed_libfunc)
> > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3',
> > gen_unsigned_fixed_libfunc)
> >  OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3',
> gen_int_fp_fixed_libfunc)
> >  OPTAB_NX(sub_optab, "sub$F$a3")
> >  OPTAB_NX(sub_optab, "sub$Q$a3")
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index e8c804f09b7..62da1c5ee08 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple
> > *cast_stmt, gimple *&use_stmt,
> >    return 0;
> >  }
> >
> > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> > +
> > +/*
> > + * Try to match saturation arith pattern(s).
> > + *   1. SAT_ADD (unsigned)
> > + *      _7 = _4 + _6;
> > + *      _8 = _4 > _7;
> > + *      _9 = (long unsigned int) _8;
> > + *      _10 = -_9;
> > + *      _12 = _7 | _10;
> > + *      =>
> > + *      _12 = .SAT_ADD (_4, _6);  */
> > +static void
> > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> > +{
> > +  gcall *call = NULL;
> > +
> > +  tree ops[2];
> > +  tree lhs = gimple_assign_lhs (stmt);
> > +
> > +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> > +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> > +					 OPTIMIZE_FOR_BOTH))
> > +    {
> > +      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> > +      gimple_call_set_lhs (call, lhs);
> > +      gsi_replace (gsi, call, true);
> > +    }
> > +}
> > +
> >  /* Recognize for unsigned x
> >     x = y - z;
> >     if (x > y)
> > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children
> > (basic_block bb)
> >  	      break;
> >
> >  	    case BIT_IOR_EXPR:
> > +	      match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> > +	      /* fall-through  */
> >  	    case BIT_XOR_EXPR:
> >  	      match_uaddc_usubc (&gsi, stmt, code);
> >  	      break;
> > --
> > 2.34.1
Li, Pan2 May 18, 2024, 5:24 a.m. UTC | #10
Thanks Tamer for enlightening, will have a try for the ingenious idea!

Pan

-----Original Message-----
From: Tamar Christina <Tamar.Christina@arm.com> 
Sent: Friday, May 17, 2024 10:46 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

Hi Pan,

> 
> Hi Tamar,
> 
> I am trying to add more shape(s) like below branch version for SAT_ADD. I suspect
> that widening_mul may not be the best place to take care of this shape.
> Because after_dom_children almost works on bb but we actually need to find the
> def/use cross the bb.

It actually already does this, see for example optimize_spaceship which optimizes
across basic blocks. However...

> 
> Thus, is there any suggestion for branch shape? Add new simplify to match.pd
> works well but it is not recommended per previous discussion.

The objection previously was not to introduce the IFNs at match.pd, it doesn't
mean we can't use match.pd to force the versions with branches to banchless
code so the existing patterns can deal with them as is.

...in this case something like this:

#if GIMPLE
(simplify
 (cond (ge (plus:c@3 @0 @1) @0) @3 integer_minus_onep)
  (if (direct_internal_fn_supported_p (...))
   (bit_ior @3 (negate (...)))))
#endif

Works better I think.

That is, for targets we know we can optimize it later on, or do something with it
in the vectorizer we canonicalize it.  The reason I have it guarded with the IFN is
that some target maintainers objected to replacing the branch code with branchless
code as their targets can more optimally deal with branches.

Cheers,
Tamar
> 
> Thanks a lot for help!
> 
> Pan
> 
> -------Source code---------
> 
> #define SAT_ADD_U_1(T) \
> T sat_add_u_1_##T(T x, T y) \
> { \
>   return (T)(x + y) >= x ? (x + y) : -1; \
> }
> 
> SAT_ADD_U_1(uint16_t)
> 
> -------Gimple---------
> 
> uint16_t sat_add_u_1_uint16_t (uint16_t x, uint16_t y)
> {
>   short unsigned int _1;
>   uint16_t _2;
> 
>   <bb 2> [local count: 1073741824]:
>   _1 = x_3(D) + y_4(D);
>   if (_1 >= x_3(D))
>     goto <bb 3>; [65.00%]
>   else
>     goto <bb 4>; [35.00%]
> 
>   <bb 3> [local count: 697932184]:
> 
>   <bb 4> [local count: 1073741824]:
>   # _2 = PHI <65535(2), _1(3)>
>   return _2;
> }
> 
> Pan
> 
> -----Original Message-----
> From: Tamar Christina <Tamar.Christina@arm.com>
> Sent: Wednesday, May 15, 2024 5:12 PM
> To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com;
> Liu, Hongtao <hongtao.liu@intel.com>
> Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned
> scalar int
> 
> Hi Pan,
> 
> Thanks!
> 
> > -----Original Message-----
> > From: pan2.li@intel.com <pan2.li@intel.com>
> > Sent: Wednesday, May 15, 2024 3:14 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina
> > <Tamar.Christina@arm.com>; richard.guenther@gmail.com;
> > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com>
> > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned
> scalar
> > int
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > This patch would like to add the middle-end presentation for the
> > saturation add.  Aka set the result of add to the max when overflow.
> > It will take the pattern similar as below.
> >
> > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
> >
> > Take uint8_t as example, we will have:
> >
> > * SAT_ADD (1, 254)   => 255.
> > * SAT_ADD (1, 255)   => 255.
> > * SAT_ADD (2, 255)   => 255.
> > * SAT_ADD (255, 255) => 255.
> >
> > Given below example for the unsigned scalar integer uint64_t:
> >
> > uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> > {
> >   return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> > }
> >
> > Before this patch:
> > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> > {
> >   long unsigned int _1;
> >   _Bool _2;
> >   long unsigned int _3;
> >   long unsigned int _4;
> >   uint64_t _7;
> >   long unsigned int _10;
> >   __complex__ long unsigned int _11;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
> >   _1 = REALPART_EXPR <_11>;
> >   _10 = IMAGPART_EXPR <_11>;
> >   _2 = _10 != 0;
> >   _3 = (long unsigned int) _2;
> >   _4 = -_3;
> >   _7 = _1 | _4;
> >   return _7;
> > ;;    succ:       EXIT
> >
> > }
> >
> > After this patch:
> > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> > {
> >   uint64_t _7;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> >   return _7;
> > ;;    succ:       EXIT
> > }
> >
> > The below tests are passed for this patch:
> > 1. The riscv fully regression tests.
> > 3. The x86 bootstrap tests.
> > 4. The x86 fully regression tests.
> >
> > 	PR target/51492
> > 	PR target/112600
> >
> > gcc/ChangeLog:
> >
> > 	* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
> > 	to the return true switch case(s).
> > 	* internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
> > 	* match.pd: Add unsigned SAT_ADD match(es).
> > 	* optabs.def (OPTAB_NL): Remove fixed-point limitation for
> > 	us/ssadd.
> > 	* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
> > 	extern func decl generated in match.pd match.
> > 	(match_saturation_arith): New func impl to match the saturation arith.
> > 	(math_opts_dom_walker::after_dom_children): Try match saturation
> > 	arith when IOR expr.
> >
> 
>  LGTM but you'll need an OK from Richard,
> 
> Thanks for working on this!
> 
> Tamar
> 
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/internal-fn.cc        |  1 +
> >  gcc/internal-fn.def       |  2 ++
> >  gcc/match.pd              | 51 +++++++++++++++++++++++++++++++++++++++
> >  gcc/optabs.def            |  4 +--
> >  gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++
> >  5 files changed, 88 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> > index 0a7053c2286..73045ca8c8c 100644
> > --- a/gcc/internal-fn.cc
> > +++ b/gcc/internal-fn.cc
> > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
> >      case IFN_UBSAN_CHECK_MUL:
> >      case IFN_ADD_OVERFLOW:
> >      case IFN_MUL_OVERFLOW:
> > +    case IFN_SAT_ADD:
> >      case IFN_VEC_WIDEN_PLUS:
> >      case IFN_VEC_WIDEN_PLUS_LO:
> >      case IFN_VEC_WIDEN_PLUS_HI:
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 848bb9dbff3..25badbb86e5 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS,
> ECF_CONST
> > | ECF_NOTHROW, first,
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW,
> > first,
> >  			      smulhrs, umulhrs, binary)
> >
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd,
> > binary)
> > +
> >  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
> >  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
> >  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 07e743ae464..0f9c34fa897 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >         || POINTER_TYPE_P (itype))
> >        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
> >
> > +/* Unsigned Saturation Add */
> > +(match (usadd_left_part_1 @0 @1)
> > + (plus:c @0 @1)
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_left_part_2 @0 @1)
> > + (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_right_part_1 @0 @1)
> > + (negate (convert (lt (plus:c @0 @1) @0)))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_right_part_1 @0 @1)
> > + (negate (convert (gt @0 (plus:c @0 @1))))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +(match (usadd_right_part_2 @0 @1)
> > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1))
> > integer_zerop)))
> > + (if (INTEGRAL_TYPE_P (type)
> > +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@0))
> > +      && types_match (type, TREE_TYPE (@1)))))
> > +
> > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> > +   because the sub part of left_part_2 cannot work with right_part_1.
> > +   For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> > +   right_part_1 has nothing to do with .ADD_OVERFLOW.  */
> > +
> > +/* Unsigned saturation add, case 1 (branchless):
> > +   SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
> > +   SAT_U_ADD = (X + Y) | - (X > (X + Y)).  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
> > +
> > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index ad14f9328b9..3f2cb46aff8 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3")
> >  OPTAB_NX(add_optab, "add$Q$a3")
> >  OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
> >  OPTAB_VX(addv_optab, "add$F$a3")
> > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3',
> > gen_signed_fixed_libfunc)
> > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3',
> > gen_unsigned_fixed_libfunc)
> > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3',
> > gen_signed_fixed_libfunc)
> > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3',
> > gen_unsigned_fixed_libfunc)
> >  OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3',
> gen_int_fp_fixed_libfunc)
> >  OPTAB_NX(sub_optab, "sub$F$a3")
> >  OPTAB_NX(sub_optab, "sub$Q$a3")
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index e8c804f09b7..62da1c5ee08 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple
> > *cast_stmt, gimple *&use_stmt,
> >    return 0;
> >  }
> >
> > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> > +
> > +/*
> > + * Try to match saturation arith pattern(s).
> > + *   1. SAT_ADD (unsigned)
> > + *      _7 = _4 + _6;
> > + *      _8 = _4 > _7;
> > + *      _9 = (long unsigned int) _8;
> > + *      _10 = -_9;
> > + *      _12 = _7 | _10;
> > + *      =>
> > + *      _12 = .SAT_ADD (_4, _6);  */
> > +static void
> > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> > +{
> > +  gcall *call = NULL;
> > +
> > +  tree ops[2];
> > +  tree lhs = gimple_assign_lhs (stmt);
> > +
> > +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> > +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> > +					 OPTIMIZE_FOR_BOTH))
> > +    {
> > +      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> > +      gimple_call_set_lhs (call, lhs);
> > +      gsi_replace (gsi, call, true);
> > +    }
> > +}
> > +
> >  /* Recognize for unsigned x
> >     x = y - z;
> >     if (x > y)
> > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children
> > (basic_block bb)
> >  	      break;
> >
> >  	    case BIT_IOR_EXPR:
> > +	      match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> > +	      /* fall-through  */
> >  	    case BIT_XOR_EXPR:
> >  	      match_uaddc_usubc (&gsi, stmt, code);
> >  	      break;
> > --
> > 2.34.1
diff mbox series

Patch

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 0a7053c2286..73045ca8c8c 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4202,6 +4202,7 @@  commutative_binary_fn_p (internal_fn fn)
     case IFN_UBSAN_CHECK_MUL:
     case IFN_ADD_OVERFLOW:
     case IFN_MUL_OVERFLOW:
+    case IFN_SAT_ADD:
     case IFN_VEC_WIDEN_PLUS:
     case IFN_VEC_WIDEN_PLUS_LO:
     case IFN_VEC_WIDEN_PLUS_HI:
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 848bb9dbff3..25badbb86e5 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -275,6 +275,8 @@  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | ECF_NOTHROW, first,
 DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
 			      smulhrs, umulhrs, binary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
+
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
 DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 07e743ae464..0f9c34fa897 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3043,6 +3043,57 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        || POINTER_TYPE_P (itype))
       && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
 
+/* Unsigned Saturation Add */
+(match (usadd_left_part_1 @0 @1)
+ (plus:c @0 @1)
+ (if (INTEGRAL_TYPE_P (type)
+      && TYPE_UNSIGNED (TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@1)))))
+
+(match (usadd_left_part_2 @0 @1)
+ (realpart (IFN_ADD_OVERFLOW:c @0 @1))
+ (if (INTEGRAL_TYPE_P (type)
+      && TYPE_UNSIGNED (TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@1)))))
+
+(match (usadd_right_part_1 @0 @1)
+ (negate (convert (lt (plus:c @0 @1) @0)))
+ (if (INTEGRAL_TYPE_P (type)
+      && TYPE_UNSIGNED (TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@1)))))
+
+(match (usadd_right_part_1 @0 @1)
+ (negate (convert (gt @0 (plus:c @0 @1))))
+ (if (INTEGRAL_TYPE_P (type)
+      && TYPE_UNSIGNED (TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@1)))))
+
+(match (usadd_right_part_2 @0 @1)
+ (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
+ (if (INTEGRAL_TYPE_P (type)
+      && TYPE_UNSIGNED (TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@0))
+      && types_match (type, TREE_TYPE (@1)))))
+
+/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
+   because the sub part of left_part_2 cannot work with right_part_1.
+   For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
+   right_part_1 has nothing to do with .ADD_OVERFLOW.  */
+
+/* Unsigned saturation add, case 1 (branchless):
+   SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
+   SAT_U_ADD = (X + Y) | - (X > (X + Y)).  */
+(match (unsigned_integer_sat_add @0 @1)
+ (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
+
+/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
+(match (unsigned_integer_sat_add @0 @1)
+ (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
    x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index ad14f9328b9..3f2cb46aff8 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -111,8 +111,8 @@  OPTAB_NX(add_optab, "add$F$a3")
 OPTAB_NX(add_optab, "add$Q$a3")
 OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
 OPTAB_VX(addv_optab, "add$F$a3")
-OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', gen_signed_fixed_libfunc)
-OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', gen_unsigned_fixed_libfunc)
+OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', gen_signed_fixed_libfunc)
+OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', gen_unsigned_fixed_libfunc)
 OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc)
 OPTAB_NX(sub_optab, "sub$F$a3")
 OPTAB_NX(sub_optab, "sub$Q$a3")
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index e8c804f09b7..62da1c5ee08 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4086,6 +4086,36 @@  arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
   return 0;
 }
 
+extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+
+/*
+ * Try to match saturation arith pattern(s).
+ *   1. SAT_ADD (unsigned)
+ *      _7 = _4 + _6;
+ *      _8 = _4 > _7;
+ *      _9 = (long unsigned int) _8;
+ *      _10 = -_9;
+ *      _12 = _7 | _10;
+ *      =>
+ *      _12 = .SAT_ADD (_4, _6);  */
+static void
+match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
+{
+  gcall *call = NULL;
+
+  tree ops[2];
+  tree lhs = gimple_assign_lhs (stmt);
+
+  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
+      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
+					 OPTIMIZE_FOR_BOTH))
+    {
+      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
+      gimple_call_set_lhs (call, lhs);
+      gsi_replace (gsi, call, true);
+    }
+}
+
 /* Recognize for unsigned x
    x = y - z;
    if (x > y)
@@ -6048,6 +6078,8 @@  math_opts_dom_walker::after_dom_children (basic_block bb)
 	      break;
 
 	    case BIT_IOR_EXPR:
+	      match_saturation_arith (&gsi, as_a<gassign *> (stmt));
+	      /* fall-through  */
 	    case BIT_XOR_EXPR:
 	      match_uaddc_usubc (&gsi, stmt, code);
 	      break;