diff mbox series

[v2] combine: Narrow comparison of memory and constant

Message ID 20230619142356.345159-1-stefansf@linux.ibm.com
State New
Headers show
Series [v2] combine: Narrow comparison of memory and constant | expand

Commit Message

Stefan Schulze Frielinghaus June 19, 2023, 2:23 p.m. UTC
Comparisons between memory and constants might be done in a smaller mode
resulting in smaller constants which might finally end up as immediates
instead of in the literal pool.

For example, on s390x a non-symmetric comparison like
  x <= 0x3fffffffffffffff
results in the constant being spilled to the literal pool and an 8 byte
memory comparison is emitted.  Ideally, an equivalent comparison
  x0 <= 0x3f
where x0 is the most significant byte of x, is emitted where the
constant is smaller and more likely to materialize as an immediate.

Similarly, comparisons of the form
  x >= 0x4000000000000000
can be shortened into x0 >= 0x40.

Bootstrapped and regtested on s390x, x64, aarch64, and powerpc64le.
Note, the new tests show that for the mentioned little-endian targets
the optimization does not materialize since either the costs of the new
instructions are higher or they do not match.  Still ok for mainline?

gcc/ChangeLog:

	* combine.cc (simplify_compare_const): Narrow comparison of
	memory and constant.
	(try_combine): Adapt new function signature.
	(simplify_comparison): Adapt new function signature.

gcc/testsuite/ChangeLog:

	* gcc.dg/cmp-mem-const-1.c: New test.
	* gcc.dg/cmp-mem-const-2.c: New test.
	* gcc.dg/cmp-mem-const-3.c: New test.
	* gcc.dg/cmp-mem-const-4.c: New test.
	* gcc.dg/cmp-mem-const-5.c: New test.
	* gcc.dg/cmp-mem-const-6.c: New test.
	* gcc.target/s390/cmp-mem-const-1.c: New test.
---
 gcc/combine.cc                                | 79 +++++++++++++++++--
 gcc/testsuite/gcc.dg/cmp-mem-const-1.c        | 17 ++++
 gcc/testsuite/gcc.dg/cmp-mem-const-2.c        | 17 ++++
 gcc/testsuite/gcc.dg/cmp-mem-const-3.c        | 17 ++++
 gcc/testsuite/gcc.dg/cmp-mem-const-4.c        | 17 ++++
 gcc/testsuite/gcc.dg/cmp-mem-const-5.c        | 17 ++++
 gcc/testsuite/gcc.dg/cmp-mem-const-6.c        | 17 ++++
 .../gcc.target/s390/cmp-mem-const-1.c         | 24 ++++++
 8 files changed, 200 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-1.c
 create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-2.c
 create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-3.c
 create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-4.c
 create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-5.c
 create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-6.c
 create mode 100644 gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c

Comments

Stefan Schulze Frielinghaus July 31, 2023, 1:26 p.m. UTC | #1
ping

On Mon, Jun 19, 2023 at 04:23:57PM +0200, Stefan Schulze Frielinghaus wrote:
> Comparisons between memory and constants might be done in a smaller mode
> resulting in smaller constants which might finally end up as immediates
> instead of in the literal pool.
> 
> For example, on s390x a non-symmetric comparison like
>   x <= 0x3fffffffffffffff
> results in the constant being spilled to the literal pool and an 8 byte
> memory comparison is emitted.  Ideally, an equivalent comparison
>   x0 <= 0x3f
> where x0 is the most significant byte of x, is emitted where the
> constant is smaller and more likely to materialize as an immediate.
> 
> Similarly, comparisons of the form
>   x >= 0x4000000000000000
> can be shortened into x0 >= 0x40.
> 
> Bootstrapped and regtested on s390x, x64, aarch64, and powerpc64le.
> Note, the new tests show that for the mentioned little-endian targets
> the optimization does not materialize since either the costs of the new
> instructions are higher or they do not match.  Still ok for mainline?
> 
> gcc/ChangeLog:
> 
> 	* combine.cc (simplify_compare_const): Narrow comparison of
> 	memory and constant.
> 	(try_combine): Adapt new function signature.
> 	(simplify_comparison): Adapt new function signature.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/cmp-mem-const-1.c: New test.
> 	* gcc.dg/cmp-mem-const-2.c: New test.
> 	* gcc.dg/cmp-mem-const-3.c: New test.
> 	* gcc.dg/cmp-mem-const-4.c: New test.
> 	* gcc.dg/cmp-mem-const-5.c: New test.
> 	* gcc.dg/cmp-mem-const-6.c: New test.
> 	* gcc.target/s390/cmp-mem-const-1.c: New test.
> ---
>  gcc/combine.cc                                | 79 +++++++++++++++++--
>  gcc/testsuite/gcc.dg/cmp-mem-const-1.c        | 17 ++++
>  gcc/testsuite/gcc.dg/cmp-mem-const-2.c        | 17 ++++
>  gcc/testsuite/gcc.dg/cmp-mem-const-3.c        | 17 ++++
>  gcc/testsuite/gcc.dg/cmp-mem-const-4.c        | 17 ++++
>  gcc/testsuite/gcc.dg/cmp-mem-const-5.c        | 17 ++++
>  gcc/testsuite/gcc.dg/cmp-mem-const-6.c        | 17 ++++
>  .../gcc.target/s390/cmp-mem-const-1.c         | 24 ++++++
>  8 files changed, 200 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-3.c
>  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-4.c
>  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-5.c
>  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-6.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
> 
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index 5aa0ec5c45a..56e15a93409 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -460,7 +460,7 @@ static rtx simplify_shift_const (rtx, enum rtx_code, machine_mode, rtx,
>  static int recog_for_combine (rtx *, rtx_insn *, rtx *);
>  static rtx gen_lowpart_for_combine (machine_mode, rtx);
>  static enum rtx_code simplify_compare_const (enum rtx_code, machine_mode,
> -					     rtx, rtx *);
> +					     rtx *, rtx *);
>  static enum rtx_code simplify_comparison (enum rtx_code, rtx *, rtx *);
>  static void update_table_tick (rtx);
>  static void record_value_for_reg (rtx, rtx_insn *, rtx);
> @@ -3185,7 +3185,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>  	  compare_code = orig_compare_code = GET_CODE (*cc_use_loc);
>  	  if (is_a <scalar_int_mode> (GET_MODE (i2dest), &mode))
>  	    compare_code = simplify_compare_const (compare_code, mode,
> -						   op0, &op1);
> +						   &op0, &op1);
>  	  target_canonicalize_comparison (&compare_code, &op0, &op1, 1);
>  	}
>  
> @@ -11796,13 +11796,14 @@ gen_lowpart_for_combine (machine_mode omode, rtx x)
>     (CODE OP0 const0_rtx) form.
>  
>     The result is a possibly different comparison code to use.
> -   *POP1 may be updated.  */
> +   *POP0 and *POP1 may be updated.  */
>  
>  static enum rtx_code
>  simplify_compare_const (enum rtx_code code, machine_mode mode,
> -			rtx op0, rtx *pop1)
> +			rtx *pop0, rtx *pop1)
>  {
>    scalar_int_mode int_mode;
> +  rtx op0 = *pop0;
>    HOST_WIDE_INT const_op = INTVAL (*pop1);
>  
>    /* Get the constant we are comparing against and turn off all bits
> @@ -11987,6 +11988,74 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>        break;
>      }
>  
> +  /* Narrow non-symmetric comparison of memory and constant as e.g.
> +     x0...x7 <= 0x3fffffffffffffff into x0 <= 0x3f where x0 is the most
> +     significant byte.  Likewise, transform x0...x7 >= 0x4000000000000000 into
> +     x0 >= 0x40.  */
> +  if ((code == LEU || code == LTU || code == GEU || code == GTU)
> +      && is_a <scalar_int_mode> (GET_MODE (op0), &int_mode)
> +      && MEM_P (op0)
> +      && !MEM_VOLATILE_P (op0)
> +      /* The optimization makes only sense for constants which are big enough
> +	 so that we have a chance to chop off something at all.  */
> +      && (unsigned HOST_WIDE_INT) const_op > 0xff
> +      /* Ensure that we do not overflow during normalization.  */
> +      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
> +    {
> +      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> +      enum rtx_code adjusted_code;
> +
> +      /* Normalize code to either LEU or GEU.  */
> +      if (code == LTU)
> +	{
> +	  --n;
> +	  adjusted_code = LEU;
> +	}
> +      else if (code == GTU)
> +	{
> +	  ++n;
> +	  adjusted_code = GEU;
> +	}
> +      else
> +	adjusted_code = code;
> +
> +      scalar_int_mode narrow_mode_iter;
> +      FOR_EACH_MODE_UNTIL (narrow_mode_iter, int_mode)
> +	{
> +	  unsigned nbits = GET_MODE_PRECISION (int_mode)
> +			   - GET_MODE_PRECISION (narrow_mode_iter);
> +	  unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << nbits) - 1;
> +	  unsigned HOST_WIDE_INT lower_bits = n & mask;
> +	  if ((adjusted_code == LEU && lower_bits == mask)
> +	      || (adjusted_code == GEU && lower_bits == 0))
> +	    {
> +	      n >>= nbits;
> +	      break;
> +	    }
> +	}
> +
> +      if (narrow_mode_iter < int_mode)
> +	{
> +	  if (dump_file && (dump_flags & TDF_DETAILS))
> +	    {
> +	      fprintf (
> +		dump_file, "narrow comparison from mode %s to %s: (MEM %s "
> +		HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
> +		HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
> +		GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> +		(unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> +		n);
> +	    }
> +	  poly_int64 offset = (BYTES_BIG_ENDIAN
> +			       ? 0
> +			       : (GET_MODE_SIZE (int_mode)
> +				  - GET_MODE_SIZE (narrow_mode_iter)));
> +	  *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> +	  *pop1 = GEN_INT (n);
> +	  return adjusted_code;
> +	}
> +    }
> +
>    *pop1 = GEN_INT (const_op);
>    return code;
>  }
> @@ -12179,7 +12248,7 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
>  
>        /* Try to simplify the compare to constant, possibly changing the
>  	 comparison op, and/or changing op1 to zero.  */
> -      code = simplify_compare_const (code, raw_mode, op0, &op1);
> +      code = simplify_compare_const (code, raw_mode, &op0, &op1);
>        const_op = INTVAL (op1);
>  
>        /* Compute some predicates to simplify code below.  */
> diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-1.c b/gcc/testsuite/gcc.dg/cmp-mem-const-1.c
> new file mode 100644
> index 00000000000..263ad98af79
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-1.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
> +
> +typedef __UINT64_TYPE__ uint64_t;
> +
> +int
> +le_1byte_a (uint64_t *x)
> +{
> +  return *x <= 0x3fffffffffffffff;
> +}
> +
> +int
> +le_1byte_b (uint64_t *x)
> +{
> +  return *x < 0x4000000000000000;
> +}
> diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-2.c b/gcc/testsuite/gcc.dg/cmp-mem-const-2.c
> new file mode 100644
> index 00000000000..a7cc5348295
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-2.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
> +
> +typedef __UINT64_TYPE__ uint64_t;
> +
> +int
> +ge_1byte_a (uint64_t *x)
> +{
> +  return *x > 0x3fffffffffffffff;
> +}
> +
> +int
> +ge_1byte_b (uint64_t *x)
> +{
> +  return *x >= 0x4000000000000000;
> +}
> diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-3.c b/gcc/testsuite/gcc.dg/cmp-mem-const-3.c
> new file mode 100644
> index 00000000000..06f80bf72d8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-3.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to HI" "combine" } } */
> +
> +typedef __UINT64_TYPE__ uint64_t;
> +
> +int
> +le_2bytes_a (uint64_t *x)
> +{
> +  return *x <= 0x3ffdffffffffffff;
> +}
> +
> +int
> +le_2bytes_b (uint64_t *x)
> +{
> +  return *x < 0x3ffe000000000000;
> +}
> diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-4.c b/gcc/testsuite/gcc.dg/cmp-mem-const-4.c
> new file mode 100644
> index 00000000000..407999abf7e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-4.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to HI" "combine" } } */
> +
> +typedef __UINT64_TYPE__ uint64_t;
> +
> +int
> +ge_2bytes_a (uint64_t *x)
> +{
> +  return *x > 0x400cffffffffffff;
> +}
> +
> +int
> +ge_2bytes_b (uint64_t *x)
> +{
> +  return *x >= 0x400d000000000000;
> +}
> diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-5.c b/gcc/testsuite/gcc.dg/cmp-mem-const-5.c
> new file mode 100644
> index 00000000000..e16773f5bcf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-5.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to SI" "combine" } } */
> +
> +typedef __UINT64_TYPE__ uint64_t;
> +
> +int
> +le_4bytes_a (uint64_t *x)
> +{
> +  return *x <= 0x3ffffdffffffffff;
> +}
> +
> +int
> +le_4bytes_b (uint64_t *x)
> +{
> +  return *x < 0x3ffffe0000000000;
> +}
> diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-6.c b/gcc/testsuite/gcc.dg/cmp-mem-const-6.c
> new file mode 100644
> index 00000000000..8f53b5678bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-6.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to SI" "combine" } } */
> +
> +typedef __UINT64_TYPE__ uint64_t;
> +
> +int
> +ge_4bytes_a (uint64_t *x)
> +{
> +  return *x > 0x4000cfffffffffff;
> +}
> +
> +int
> +ge_4bytes_b (uint64_t *x)
> +{
> +  return *x >= 0x4000d00000000000;
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c b/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
> new file mode 100644
> index 00000000000..309aafbec01
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -march=z13 -mzarch -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-assembler-not {\tclc\t} } } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
> +
> +struct s
> +{
> +  long a;
> +  unsigned b : 1;
> +  unsigned c : 1;
> +};
> +
> +int foo (struct s *x)
> +{
> +  /* Expression
> +       x->b || x->c
> +     is transformed into
> +       _1 = BIT_FIELD_REF <*x_4(D), 64, 64>;
> +       _2 = _1 > 0x3FFFFFFFFFFFFFFF;
> +     where the constant may materialize in the literal pool and an 8 byte CLC
> +     may be emitted.  Ensure this is not the case.
> +  */
> +  return x->b || x->c;
> +}
> -- 
> 2.39.2
>
Jeff Law July 31, 2023, 1:59 p.m. UTC | #2
On 6/19/23 08:23, Stefan Schulze Frielinghaus via Gcc-patches wrote:
> Comparisons between memory and constants might be done in a smaller mode
> resulting in smaller constants which might finally end up as immediates
> instead of in the literal pool.
> 
> For example, on s390x a non-symmetric comparison like
>    x <= 0x3fffffffffffffff
> results in the constant being spilled to the literal pool and an 8 byte
> memory comparison is emitted.  Ideally, an equivalent comparison
>    x0 <= 0x3f
> where x0 is the most significant byte of x, is emitted where the
> constant is smaller and more likely to materialize as an immediate.
> 
> Similarly, comparisons of the form
>    x >= 0x4000000000000000
> can be shortened into x0 >= 0x40.
> 
> Bootstrapped and regtested on s390x, x64, aarch64, and powerpc64le.
> Note, the new tests show that for the mentioned little-endian targets
> the optimization does not materialize since either the costs of the new
> instructions are higher or they do not match.  Still ok for mainline?
> 
> gcc/ChangeLog:
> 
> 	* combine.cc (simplify_compare_const): Narrow comparison of
> 	memory and constant.
> 	(try_combine): Adapt new function signature.
> 	(simplify_comparison): Adapt new function signature.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/cmp-mem-const-1.c: New test.
> 	* gcc.dg/cmp-mem-const-2.c: New test.
> 	* gcc.dg/cmp-mem-const-3.c: New test.
> 	* gcc.dg/cmp-mem-const-4.c: New test.
> 	* gcc.dg/cmp-mem-const-5.c: New test.
> 	* gcc.dg/cmp-mem-const-6.c: New test.
> 	* gcc.target/s390/cmp-mem-const-1.c: New test.
Sorry.  I'd looked at this a while back, wanted to take another looksie 
and totally forgot about it.

OK for the trunk.  Thanks for your patience.

jeff
Prathamesh Kulkarni July 31, 2023, 9:43 p.m. UTC | #3
On Mon, 19 Jun 2023 at 19:59, Stefan Schulze Frielinghaus via
Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
>
> Comparisons between memory and constants might be done in a smaller mode
> resulting in smaller constants which might finally end up as immediates
> instead of in the literal pool.
>
> For example, on s390x a non-symmetric comparison like
>   x <= 0x3fffffffffffffff
> results in the constant being spilled to the literal pool and an 8 byte
> memory comparison is emitted.  Ideally, an equivalent comparison
>   x0 <= 0x3f
> where x0 is the most significant byte of x, is emitted where the
> constant is smaller and more likely to materialize as an immediate.
>
> Similarly, comparisons of the form
>   x >= 0x4000000000000000
> can be shortened into x0 >= 0x40.
>
> Bootstrapped and regtested on s390x, x64, aarch64, and powerpc64le.
> Note, the new tests show that for the mentioned little-endian targets
> the optimization does not materialize since either the costs of the new
> instructions are higher or they do not match.  Still ok for mainline?
Hi Stefan,
Unfortunately this patch (committed in 7cdd0860949c6c3232e6cff1d7ca37bb5234074c)
caused the following ICE on armv8l-unknown-linux-gnu:
during RTL pass: combine
../../../gcc/libgcc/fixed-bit.c: In function ‘__gnu_saturate1sq’:
../../../gcc/libgcc/fixed-bit.c:210:1: internal compiler error: in
decompose, at rtl.h:2297
  210 | }
      | ^
0xaa23e3 wi::int_traits<std::pair<rtx_def*, machine_mode>
>::decompose(long long*, unsigned int, std::pair<rtx_def*,
machine_mode> const&)
        ../../gcc/gcc/rtl.h:2297
0xaf5ab3 wide_int_ref_storage<false,
true>::wide_int_ref_storage<std::pair<rtx_def*, machine_mode>
>(std::pair<rtx_def*, machine_mode> const&)
        ../../gcc/gcc/wide-int.h:1030
0xaf5023 generic_wide_int<wide_int_ref_storage<false, true>
>::generic_wide_int<std::pair<rtx_def*, machine_mode>
>(std::pair<rtx_def*, machine_mode> const&)
        ../../gcc/gcc/wide-int.h:788
0xf916f9 simplify_const_unary_operation(rtx_code, machine_mode,
rtx_def*, machine_mode)
        ../../gcc/gcc/simplify-rtx.cc:2131
0xf8bad5 simplify_context::simplify_unary_operation(rtx_code,
machine_mode, rtx_def*, machine_mode)
        ../../gcc/gcc/simplify-rtx.cc:889
0xf8a591 simplify_context::simplify_gen_unary(rtx_code, machine_mode,
rtx_def*, machine_mode)
        ../../gcc/gcc/simplify-rtx.cc:360
0x9bd1b7 simplify_gen_unary(rtx_code, machine_mode, rtx_def*, machine_mode)
        ../../gcc/gcc/rtl.h:3520
0x1bd5677 simplify_comparison
        ../../gcc/gcc/combine.cc:13125
0x1bc2b2b simplify_set
        ../../gcc/gcc/combine.cc:6848
0x1bc1647 combine_simplify_rtx
        ../../gcc/gcc/combine.cc:6353
0x1bbf97f subst
        ../../gcc/gcc/combine.cc:5609
0x1bb864b try_combine
        ../../gcc/gcc/combine.cc:3302
0x1bb30fb combine_instructions
        ../../gcc/gcc/combine.cc:1264
0x1bd8d25 rest_of_handle_combine
        ../../gcc/gcc/combine.cc:15059
0x1bd8dd5 execute
        ../../gcc/gcc/combine.cc:15103
Please submit a full bug report, with preprocessed source (by using
-freport-bug).
Please include the complete backtrace with any bug report.
See <https://gcc.gnu.org/bugs/> for instructions.

Could you please take a look ?

Thanks,
Prathamesh
>
> gcc/ChangeLog:
>
>         * combine.cc (simplify_compare_const): Narrow comparison of
>         memory and constant.
>         (try_combine): Adapt new function signature.
>         (simplify_comparison): Adapt new function signature.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/cmp-mem-const-1.c: New test.
>         * gcc.dg/cmp-mem-const-2.c: New test.
>         * gcc.dg/cmp-mem-const-3.c: New test.
>         * gcc.dg/cmp-mem-const-4.c: New test.
>         * gcc.dg/cmp-mem-const-5.c: New test.
>         * gcc.dg/cmp-mem-const-6.c: New test.
>         * gcc.target/s390/cmp-mem-const-1.c: New test.
> ---
>  gcc/combine.cc                                | 79 +++++++++++++++++--
>  gcc/testsuite/gcc.dg/cmp-mem-const-1.c        | 17 ++++
>  gcc/testsuite/gcc.dg/cmp-mem-const-2.c        | 17 ++++
>  gcc/testsuite/gcc.dg/cmp-mem-const-3.c        | 17 ++++
>  gcc/testsuite/gcc.dg/cmp-mem-const-4.c        | 17 ++++
>  gcc/testsuite/gcc.dg/cmp-mem-const-5.c        | 17 ++++
>  gcc/testsuite/gcc.dg/cmp-mem-const-6.c        | 17 ++++
>  .../gcc.target/s390/cmp-mem-const-1.c         | 24 ++++++
>  8 files changed, 200 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-3.c
>  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-4.c
>  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-5.c
>  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-6.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
>
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index 5aa0ec5c45a..56e15a93409 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -460,7 +460,7 @@ static rtx simplify_shift_const (rtx, enum rtx_code, machine_mode, rtx,
>  static int recog_for_combine (rtx *, rtx_insn *, rtx *);
>  static rtx gen_lowpart_for_combine (machine_mode, rtx);
>  static enum rtx_code simplify_compare_const (enum rtx_code, machine_mode,
> -                                            rtx, rtx *);
> +                                            rtx *, rtx *);
>  static enum rtx_code simplify_comparison (enum rtx_code, rtx *, rtx *);
>  static void update_table_tick (rtx);
>  static void record_value_for_reg (rtx, rtx_insn *, rtx);
> @@ -3185,7 +3185,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>           compare_code = orig_compare_code = GET_CODE (*cc_use_loc);
>           if (is_a <scalar_int_mode> (GET_MODE (i2dest), &mode))
>             compare_code = simplify_compare_const (compare_code, mode,
> -                                                  op0, &op1);
> +                                                  &op0, &op1);
>           target_canonicalize_comparison (&compare_code, &op0, &op1, 1);
>         }
>
> @@ -11796,13 +11796,14 @@ gen_lowpart_for_combine (machine_mode omode, rtx x)
>     (CODE OP0 const0_rtx) form.
>
>     The result is a possibly different comparison code to use.
> -   *POP1 may be updated.  */
> +   *POP0 and *POP1 may be updated.  */
>
>  static enum rtx_code
>  simplify_compare_const (enum rtx_code code, machine_mode mode,
> -                       rtx op0, rtx *pop1)
> +                       rtx *pop0, rtx *pop1)
>  {
>    scalar_int_mode int_mode;
> +  rtx op0 = *pop0;
>    HOST_WIDE_INT const_op = INTVAL (*pop1);
>
>    /* Get the constant we are comparing against and turn off all bits
> @@ -11987,6 +11988,74 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>        break;
>      }
>
> +  /* Narrow non-symmetric comparison of memory and constant as e.g.
> +     x0...x7 <= 0x3fffffffffffffff into x0 <= 0x3f where x0 is the most
> +     significant byte.  Likewise, transform x0...x7 >= 0x4000000000000000 into
> +     x0 >= 0x40.  */
> +  if ((code == LEU || code == LTU || code == GEU || code == GTU)
> +      && is_a <scalar_int_mode> (GET_MODE (op0), &int_mode)
> +      && MEM_P (op0)
> +      && !MEM_VOLATILE_P (op0)
> +      /* The optimization makes only sense for constants which are big enough
> +        so that we have a chance to chop off something at all.  */
> +      && (unsigned HOST_WIDE_INT) const_op > 0xff
> +      /* Ensure that we do not overflow during normalization.  */
> +      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
> +    {
> +      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> +      enum rtx_code adjusted_code;
> +
> +      /* Normalize code to either LEU or GEU.  */
> +      if (code == LTU)
> +       {
> +         --n;
> +         adjusted_code = LEU;
> +       }
> +      else if (code == GTU)
> +       {
> +         ++n;
> +         adjusted_code = GEU;
> +       }
> +      else
> +       adjusted_code = code;
> +
> +      scalar_int_mode narrow_mode_iter;
> +      FOR_EACH_MODE_UNTIL (narrow_mode_iter, int_mode)
> +       {
> +         unsigned nbits = GET_MODE_PRECISION (int_mode)
> +                          - GET_MODE_PRECISION (narrow_mode_iter);
> +         unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << nbits) - 1;
> +         unsigned HOST_WIDE_INT lower_bits = n & mask;
> +         if ((adjusted_code == LEU && lower_bits == mask)
> +             || (adjusted_code == GEU && lower_bits == 0))
> +           {
> +             n >>= nbits;
> +             break;
> +           }
> +       }
> +
> +      if (narrow_mode_iter < int_mode)
> +       {
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           {
> +             fprintf (
> +               dump_file, "narrow comparison from mode %s to %s: (MEM %s "
> +               HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
> +               HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
> +               GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> +               (unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> +               n);
> +           }
> +         poly_int64 offset = (BYTES_BIG_ENDIAN
> +                              ? 0
> +                              : (GET_MODE_SIZE (int_mode)
> +                                 - GET_MODE_SIZE (narrow_mode_iter)));
> +         *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> +         *pop1 = GEN_INT (n);
> +         return adjusted_code;
> +       }
> +    }
> +
>    *pop1 = GEN_INT (const_op);
>    return code;
>  }
> @@ -12179,7 +12248,7 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
>
>        /* Try to simplify the compare to constant, possibly changing the
>          comparison op, and/or changing op1 to zero.  */
> -      code = simplify_compare_const (code, raw_mode, op0, &op1);
> +      code = simplify_compare_const (code, raw_mode, &op0, &op1);
>        const_op = INTVAL (op1);
>
>        /* Compute some predicates to simplify code below.  */
> diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-1.c b/gcc/testsuite/gcc.dg/cmp-mem-const-1.c
> new file mode 100644
> index 00000000000..263ad98af79
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-1.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
> +
> +typedef __UINT64_TYPE__ uint64_t;
> +
> +int
> +le_1byte_a (uint64_t *x)
> +{
> +  return *x <= 0x3fffffffffffffff;
> +}
> +
> +int
> +le_1byte_b (uint64_t *x)
> +{
> +  return *x < 0x4000000000000000;
> +}
> diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-2.c b/gcc/testsuite/gcc.dg/cmp-mem-const-2.c
> new file mode 100644
> index 00000000000..a7cc5348295
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-2.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
> +
> +typedef __UINT64_TYPE__ uint64_t;
> +
> +int
> +ge_1byte_a (uint64_t *x)
> +{
> +  return *x > 0x3fffffffffffffff;
> +}
> +
> +int
> +ge_1byte_b (uint64_t *x)
> +{
> +  return *x >= 0x4000000000000000;
> +}
> diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-3.c b/gcc/testsuite/gcc.dg/cmp-mem-const-3.c
> new file mode 100644
> index 00000000000..06f80bf72d8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-3.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to HI" "combine" } } */
> +
> +typedef __UINT64_TYPE__ uint64_t;
> +
> +int
> +le_2bytes_a (uint64_t *x)
> +{
> +  return *x <= 0x3ffdffffffffffff;
> +}
> +
> +int
> +le_2bytes_b (uint64_t *x)
> +{
> +  return *x < 0x3ffe000000000000;
> +}
> diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-4.c b/gcc/testsuite/gcc.dg/cmp-mem-const-4.c
> new file mode 100644
> index 00000000000..407999abf7e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-4.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to HI" "combine" } } */
> +
> +typedef __UINT64_TYPE__ uint64_t;
> +
> +int
> +ge_2bytes_a (uint64_t *x)
> +{
> +  return *x > 0x400cffffffffffff;
> +}
> +
> +int
> +ge_2bytes_b (uint64_t *x)
> +{
> +  return *x >= 0x400d000000000000;
> +}
> diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-5.c b/gcc/testsuite/gcc.dg/cmp-mem-const-5.c
> new file mode 100644
> index 00000000000..e16773f5bcf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-5.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to SI" "combine" } } */
> +
> +typedef __UINT64_TYPE__ uint64_t;
> +
> +int
> +le_4bytes_a (uint64_t *x)
> +{
> +  return *x <= 0x3ffffdffffffffff;
> +}
> +
> +int
> +le_4bytes_b (uint64_t *x)
> +{
> +  return *x < 0x3ffffe0000000000;
> +}
> diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-6.c b/gcc/testsuite/gcc.dg/cmp-mem-const-6.c
> new file mode 100644
> index 00000000000..8f53b5678bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-6.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to SI" "combine" } } */
> +
> +typedef __UINT64_TYPE__ uint64_t;
> +
> +int
> +ge_4bytes_a (uint64_t *x)
> +{
> +  return *x > 0x4000cfffffffffff;
> +}
> +
> +int
> +ge_4bytes_b (uint64_t *x)
> +{
> +  return *x >= 0x4000d00000000000;
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c b/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
> new file mode 100644
> index 00000000000..309aafbec01
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O1 -march=z13 -mzarch -fdump-rtl-combine-details" } */
> +/* { dg-final { scan-assembler-not {\tclc\t} } } */
> +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
> +
> +struct s
> +{
> +  long a;
> +  unsigned b : 1;
> +  unsigned c : 1;
> +};
> +
> +int foo (struct s *x)
> +{
> +  /* Expression
> +       x->b || x->c
> +     is transformed into
> +       _1 = BIT_FIELD_REF <*x_4(D), 64, 64>;
> +       _2 = _1 > 0x3FFFFFFFFFFFFFFF;
> +     where the constant may materialize in the literal pool and an 8 byte CLC
> +     may be emitted.  Ensure this is not the case.
> +  */
> +  return x->b || x->c;
> +}
> --
> 2.39.2
>
Prathamesh Kulkarni July 31, 2023, 9:46 p.m. UTC | #4
On Tue, 1 Aug 2023 at 03:13, Prathamesh Kulkarni
<prathamesh.kulkarni@linaro.org> wrote:
>
> On Mon, 19 Jun 2023 at 19:59, Stefan Schulze Frielinghaus via
> Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> >
> > Comparisons between memory and constants might be done in a smaller mode
> > resulting in smaller constants which might finally end up as immediates
> > instead of in the literal pool.
> >
> > For example, on s390x a non-symmetric comparison like
> >   x <= 0x3fffffffffffffff
> > results in the constant being spilled to the literal pool and an 8 byte
> > memory comparison is emitted.  Ideally, an equivalent comparison
> >   x0 <= 0x3f
> > where x0 is the most significant byte of x, is emitted where the
> > constant is smaller and more likely to materialize as an immediate.
> >
> > Similarly, comparisons of the form
> >   x >= 0x4000000000000000
> > can be shortened into x0 >= 0x40.
> >
> > Bootstrapped and regtested on s390x, x64, aarch64, and powerpc64le.
> > Note, the new tests show that for the mentioned little-endian targets
> > the optimization does not materialize since either the costs of the new
> > instructions are higher or they do not match.  Still ok for mainline?
> Hi Stefan,
> Unfortunately this patch (committed in 7cdd0860949c6c3232e6cff1d7ca37bb5234074c)
> caused the following ICE on armv8l-unknown-linux-gnu:
Sorry I meant armv8l-unknown-linux-gnueabihf.
> during RTL pass: combine
> ../../../gcc/libgcc/fixed-bit.c: In function ‘__gnu_saturate1sq’:
> ../../../gcc/libgcc/fixed-bit.c:210:1: internal compiler error: in
> decompose, at rtl.h:2297
>   210 | }
>       | ^
> 0xaa23e3 wi::int_traits<std::pair<rtx_def*, machine_mode>
> >::decompose(long long*, unsigned int, std::pair<rtx_def*,
> machine_mode> const&)
>         ../../gcc/gcc/rtl.h:2297
> 0xaf5ab3 wide_int_ref_storage<false,
> true>::wide_int_ref_storage<std::pair<rtx_def*, machine_mode>
> >(std::pair<rtx_def*, machine_mode> const&)
>         ../../gcc/gcc/wide-int.h:1030
> 0xaf5023 generic_wide_int<wide_int_ref_storage<false, true>
> >::generic_wide_int<std::pair<rtx_def*, machine_mode>
> >(std::pair<rtx_def*, machine_mode> const&)
>         ../../gcc/gcc/wide-int.h:788
> 0xf916f9 simplify_const_unary_operation(rtx_code, machine_mode,
> rtx_def*, machine_mode)
>         ../../gcc/gcc/simplify-rtx.cc:2131
> 0xf8bad5 simplify_context::simplify_unary_operation(rtx_code,
> machine_mode, rtx_def*, machine_mode)
>         ../../gcc/gcc/simplify-rtx.cc:889
> 0xf8a591 simplify_context::simplify_gen_unary(rtx_code, machine_mode,
> rtx_def*, machine_mode)
>         ../../gcc/gcc/simplify-rtx.cc:360
> 0x9bd1b7 simplify_gen_unary(rtx_code, machine_mode, rtx_def*, machine_mode)
>         ../../gcc/gcc/rtl.h:3520
> 0x1bd5677 simplify_comparison
>         ../../gcc/gcc/combine.cc:13125
> 0x1bc2b2b simplify_set
>         ../../gcc/gcc/combine.cc:6848
> 0x1bc1647 combine_simplify_rtx
>         ../../gcc/gcc/combine.cc:6353
> 0x1bbf97f subst
>         ../../gcc/gcc/combine.cc:5609
> 0x1bb864b try_combine
>         ../../gcc/gcc/combine.cc:3302
> 0x1bb30fb combine_instructions
>         ../../gcc/gcc/combine.cc:1264
> 0x1bd8d25 rest_of_handle_combine
>         ../../gcc/gcc/combine.cc:15059
> 0x1bd8dd5 execute
>         ../../gcc/gcc/combine.cc:15103
> Please submit a full bug report, with preprocessed source (by using
> -freport-bug).
> Please include the complete backtrace with any bug report.
> See <https://gcc.gnu.org/bugs/> for instructions.
>
> Could you please take a look ?
>
> Thanks,
> Prathamesh
> >
> > gcc/ChangeLog:
> >
> >         * combine.cc (simplify_compare_const): Narrow comparison of
> >         memory and constant.
> >         (try_combine): Adapt new function signature.
> >         (simplify_comparison): Adapt new function signature.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.dg/cmp-mem-const-1.c: New test.
> >         * gcc.dg/cmp-mem-const-2.c: New test.
> >         * gcc.dg/cmp-mem-const-3.c: New test.
> >         * gcc.dg/cmp-mem-const-4.c: New test.
> >         * gcc.dg/cmp-mem-const-5.c: New test.
> >         * gcc.dg/cmp-mem-const-6.c: New test.
> >         * gcc.target/s390/cmp-mem-const-1.c: New test.
> > ---
> >  gcc/combine.cc                                | 79 +++++++++++++++++--
> >  gcc/testsuite/gcc.dg/cmp-mem-const-1.c        | 17 ++++
> >  gcc/testsuite/gcc.dg/cmp-mem-const-2.c        | 17 ++++
> >  gcc/testsuite/gcc.dg/cmp-mem-const-3.c        | 17 ++++
> >  gcc/testsuite/gcc.dg/cmp-mem-const-4.c        | 17 ++++
> >  gcc/testsuite/gcc.dg/cmp-mem-const-5.c        | 17 ++++
> >  gcc/testsuite/gcc.dg/cmp-mem-const-6.c        | 17 ++++
> >  .../gcc.target/s390/cmp-mem-const-1.c         | 24 ++++++
> >  8 files changed, 200 insertions(+), 5 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-1.c
> >  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-2.c
> >  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-3.c
> >  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-4.c
> >  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-5.c
> >  create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-6.c
> >  create mode 100644 gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
> >
> > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > index 5aa0ec5c45a..56e15a93409 100644
> > --- a/gcc/combine.cc
> > +++ b/gcc/combine.cc
> > @@ -460,7 +460,7 @@ static rtx simplify_shift_const (rtx, enum rtx_code, machine_mode, rtx,
> >  static int recog_for_combine (rtx *, rtx_insn *, rtx *);
> >  static rtx gen_lowpart_for_combine (machine_mode, rtx);
> >  static enum rtx_code simplify_compare_const (enum rtx_code, machine_mode,
> > -                                            rtx, rtx *);
> > +                                            rtx *, rtx *);
> >  static enum rtx_code simplify_comparison (enum rtx_code, rtx *, rtx *);
> >  static void update_table_tick (rtx);
> >  static void record_value_for_reg (rtx, rtx_insn *, rtx);
> > @@ -3185,7 +3185,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> >           compare_code = orig_compare_code = GET_CODE (*cc_use_loc);
> >           if (is_a <scalar_int_mode> (GET_MODE (i2dest), &mode))
> >             compare_code = simplify_compare_const (compare_code, mode,
> > -                                                  op0, &op1);
> > +                                                  &op0, &op1);
> >           target_canonicalize_comparison (&compare_code, &op0, &op1, 1);
> >         }
> >
> > @@ -11796,13 +11796,14 @@ gen_lowpart_for_combine (machine_mode omode, rtx x)
> >     (CODE OP0 const0_rtx) form.
> >
> >     The result is a possibly different comparison code to use.
> > -   *POP1 may be updated.  */
> > +   *POP0 and *POP1 may be updated.  */
> >
> >  static enum rtx_code
> >  simplify_compare_const (enum rtx_code code, machine_mode mode,
> > -                       rtx op0, rtx *pop1)
> > +                       rtx *pop0, rtx *pop1)
> >  {
> >    scalar_int_mode int_mode;
> > +  rtx op0 = *pop0;
> >    HOST_WIDE_INT const_op = INTVAL (*pop1);
> >
> >    /* Get the constant we are comparing against and turn off all bits
> > @@ -11987,6 +11988,74 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> >        break;
> >      }
> >
> > +  /* Narrow non-symmetric comparison of memory and constant as e.g.
> > +     x0...x7 <= 0x3fffffffffffffff into x0 <= 0x3f where x0 is the most
> > +     significant byte.  Likewise, transform x0...x7 >= 0x4000000000000000 into
> > +     x0 >= 0x40.  */
> > +  if ((code == LEU || code == LTU || code == GEU || code == GTU)
> > +      && is_a <scalar_int_mode> (GET_MODE (op0), &int_mode)
> > +      && MEM_P (op0)
> > +      && !MEM_VOLATILE_P (op0)
> > +      /* The optimization makes only sense for constants which are big enough
> > +        so that we have a chance to chop off something at all.  */
> > +      && (unsigned HOST_WIDE_INT) const_op > 0xff
> > +      /* Ensure that we do not overflow during normalization.  */
> > +      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
> > +    {
> > +      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> > +      enum rtx_code adjusted_code;
> > +
> > +      /* Normalize code to either LEU or GEU.  */
> > +      if (code == LTU)
> > +       {
> > +         --n;
> > +         adjusted_code = LEU;
> > +       }
> > +      else if (code == GTU)
> > +       {
> > +         ++n;
> > +         adjusted_code = GEU;
> > +       }
> > +      else
> > +       adjusted_code = code;
> > +
> > +      scalar_int_mode narrow_mode_iter;
> > +      FOR_EACH_MODE_UNTIL (narrow_mode_iter, int_mode)
> > +       {
> > +         unsigned nbits = GET_MODE_PRECISION (int_mode)
> > +                          - GET_MODE_PRECISION (narrow_mode_iter);
> > +         unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << nbits) - 1;
> > +         unsigned HOST_WIDE_INT lower_bits = n & mask;
> > +         if ((adjusted_code == LEU && lower_bits == mask)
> > +             || (adjusted_code == GEU && lower_bits == 0))
> > +           {
> > +             n >>= nbits;
> > +             break;
> > +           }
> > +       }
> > +
> > +      if (narrow_mode_iter < int_mode)
> > +       {
> > +         if (dump_file && (dump_flags & TDF_DETAILS))
> > +           {
> > +             fprintf (
> > +               dump_file, "narrow comparison from mode %s to %s: (MEM %s "
> > +               HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
> > +               HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
> > +               GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> > +               (unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> > +               n);
> > +           }
> > +         poly_int64 offset = (BYTES_BIG_ENDIAN
> > +                              ? 0
> > +                              : (GET_MODE_SIZE (int_mode)
> > +                                 - GET_MODE_SIZE (narrow_mode_iter)));
> > +         *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> > +         *pop1 = GEN_INT (n);
> > +         return adjusted_code;
> > +       }
> > +    }
> > +
> >    *pop1 = GEN_INT (const_op);
> >    return code;
> >  }
> > @@ -12179,7 +12248,7 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
> >
> >        /* Try to simplify the compare to constant, possibly changing the
> >          comparison op, and/or changing op1 to zero.  */
> > -      code = simplify_compare_const (code, raw_mode, op0, &op1);
> > +      code = simplify_compare_const (code, raw_mode, &op0, &op1);
> >        const_op = INTVAL (op1);
> >
> >        /* Compute some predicates to simplify code below.  */
> > diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-1.c b/gcc/testsuite/gcc.dg/cmp-mem-const-1.c
> > new file mode 100644
> > index 00000000000..263ad98af79
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-1.c
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile { target { lp64 } } } */
> > +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> > +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
> > +
> > +typedef __UINT64_TYPE__ uint64_t;
> > +
> > +int
> > +le_1byte_a (uint64_t *x)
> > +{
> > +  return *x <= 0x3fffffffffffffff;
> > +}
> > +
> > +int
> > +le_1byte_b (uint64_t *x)
> > +{
> > +  return *x < 0x4000000000000000;
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-2.c b/gcc/testsuite/gcc.dg/cmp-mem-const-2.c
> > new file mode 100644
> > index 00000000000..a7cc5348295
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-2.c
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile { target { lp64 } } } */
> > +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> > +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
> > +
> > +typedef __UINT64_TYPE__ uint64_t;
> > +
> > +int
> > +ge_1byte_a (uint64_t *x)
> > +{
> > +  return *x > 0x3fffffffffffffff;
> > +}
> > +
> > +int
> > +ge_1byte_b (uint64_t *x)
> > +{
> > +  return *x >= 0x4000000000000000;
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-3.c b/gcc/testsuite/gcc.dg/cmp-mem-const-3.c
> > new file mode 100644
> > index 00000000000..06f80bf72d8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-3.c
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile { target { lp64 } } } */
> > +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> > +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to HI" "combine" } } */
> > +
> > +typedef __UINT64_TYPE__ uint64_t;
> > +
> > +int
> > +le_2bytes_a (uint64_t *x)
> > +{
> > +  return *x <= 0x3ffdffffffffffff;
> > +}
> > +
> > +int
> > +le_2bytes_b (uint64_t *x)
> > +{
> > +  return *x < 0x3ffe000000000000;
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-4.c b/gcc/testsuite/gcc.dg/cmp-mem-const-4.c
> > new file mode 100644
> > index 00000000000..407999abf7e
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-4.c
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile { target { lp64 } } } */
> > +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> > +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to HI" "combine" } } */
> > +
> > +typedef __UINT64_TYPE__ uint64_t;
> > +
> > +int
> > +ge_2bytes_a (uint64_t *x)
> > +{
> > +  return *x > 0x400cffffffffffff;
> > +}
> > +
> > +int
> > +ge_2bytes_b (uint64_t *x)
> > +{
> > +  return *x >= 0x400d000000000000;
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-5.c b/gcc/testsuite/gcc.dg/cmp-mem-const-5.c
> > new file mode 100644
> > index 00000000000..e16773f5bcf
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-5.c
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile { target { lp64 } } } */
> > +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> > +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to SI" "combine" } } */
> > +
> > +typedef __UINT64_TYPE__ uint64_t;
> > +
> > +int
> > +le_4bytes_a (uint64_t *x)
> > +{
> > +  return *x <= 0x3ffffdffffffffff;
> > +}
> > +
> > +int
> > +le_4bytes_b (uint64_t *x)
> > +{
> > +  return *x < 0x3ffffe0000000000;
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-6.c b/gcc/testsuite/gcc.dg/cmp-mem-const-6.c
> > new file mode 100644
> > index 00000000000..8f53b5678bd
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/cmp-mem-const-6.c
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile { target { lp64 } } } */
> > +/* { dg-options "-O1 -fdump-rtl-combine-details" } */
> > +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to SI" "combine" } } */
> > +
> > +typedef __UINT64_TYPE__ uint64_t;
> > +
> > +int
> > +ge_4bytes_a (uint64_t *x)
> > +{
> > +  return *x > 0x4000cfffffffffff;
> > +}
> > +
> > +int
> > +ge_4bytes_b (uint64_t *x)
> > +{
> > +  return *x >= 0x4000d00000000000;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c b/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
> > new file mode 100644
> > index 00000000000..309aafbec01
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
> > @@ -0,0 +1,24 @@
> > +/* { dg-do compile { target { lp64 } } } */
> > +/* { dg-options "-O1 -march=z13 -mzarch -fdump-rtl-combine-details" } */
> > +/* { dg-final { scan-assembler-not {\tclc\t} } } */
> > +/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
> > +
> > +struct s
> > +{
> > +  long a;
> > +  unsigned b : 1;
> > +  unsigned c : 1;
> > +};
> > +
> > +int foo (struct s *x)
> > +{
> > +  /* Expression
> > +       x->b || x->c
> > +     is transformed into
> > +       _1 = BIT_FIELD_REF <*x_4(D), 64, 64>;
> > +       _2 = _1 > 0x3FFFFFFFFFFFFFFF;
> > +     where the constant may materialize in the literal pool and an 8 byte CLC
> > +     may be emitted.  Ensure this is not the case.
> > +  */
> > +  return x->b || x->c;
> > +}
> > --
> > 2.39.2
> >
Jeff Law July 31, 2023, 11:50 p.m. UTC | #5
On 7/31/23 15:43, Prathamesh Kulkarni via Gcc-patches wrote:
> On Mon, 19 Jun 2023 at 19:59, Stefan Schulze Frielinghaus via
> Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
>>
>> Comparisons between memory and constants might be done in a smaller mode
>> resulting in smaller constants which might finally end up as immediates
>> instead of in the literal pool.
>>
>> For example, on s390x a non-symmetric comparison like
>>    x <= 0x3fffffffffffffff
>> results in the constant being spilled to the literal pool and an 8 byte
>> memory comparison is emitted.  Ideally, an equivalent comparison
>>    x0 <= 0x3f
>> where x0 is the most significant byte of x, is emitted where the
>> constant is smaller and more likely to materialize as an immediate.
>>
>> Similarly, comparisons of the form
>>    x >= 0x4000000000000000
>> can be shortened into x0 >= 0x40.
>>
>> Bootstrapped and regtested on s390x, x64, aarch64, and powerpc64le.
>> Note, the new tests show that for the mentioned little-endian targets
>> the optimization does not materialize since either the costs of the new
>> instructions are higher or they do not match.  Still ok for mainline?
> Hi Stefan,
> Unfortunately this patch (committed in 7cdd0860949c6c3232e6cff1d7ca37bb5234074c)
> caused the following ICE on armv8l-unknown-linux-gnu:
> during RTL pass: combine
> ../../../gcc/libgcc/fixed-bit.c: In function ‘__gnu_saturate1sq’:
> ../../../gcc/libgcc/fixed-bit.c:210:1: internal compiler error: in
> decompose, at rtl.h:2297
>    210 | }
>        | ^
> 0xaa23e3 wi::int_traits<std::pair<rtx_def*, machine_mode>
>> ::decompose(long long*, unsigned int, std::pair<rtx_def*,
> machine_mode> const&)
>          ../../gcc/gcc/rtl.h:2297
[ ... ]
Yea, we're seeing something very similar on nios2-linux-gnu building the 
kernel.

Prathamesh, can you extract the .i file for fixed-bit on armv8 and open 
a bug for this issue, attaching the .i file as well as the right command 
line options necessary to reproduce the failure.  THat way Stefan can 
tackle it with a cross compiler.

Thanks,
jeff
Prathamesh Kulkarni Aug. 1, 2023, 8:22 a.m. UTC | #6
On Tue, 1 Aug 2023 at 05:20, Jeff Law <jeffreyalaw@gmail.com> wrote:
>
>
>
> On 7/31/23 15:43, Prathamesh Kulkarni via Gcc-patches wrote:
> > On Mon, 19 Jun 2023 at 19:59, Stefan Schulze Frielinghaus via
> > Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> >>
> >> Comparisons between memory and constants might be done in a smaller mode
> >> resulting in smaller constants which might finally end up as immediates
> >> instead of in the literal pool.
> >>
> >> For example, on s390x a non-symmetric comparison like
> >>    x <= 0x3fffffffffffffff
> >> results in the constant being spilled to the literal pool and an 8 byte
> >> memory comparison is emitted.  Ideally, an equivalent comparison
> >>    x0 <= 0x3f
> >> where x0 is the most significant byte of x, is emitted where the
> >> constant is smaller and more likely to materialize as an immediate.
> >>
> >> Similarly, comparisons of the form
> >>    x >= 0x4000000000000000
> >> can be shortened into x0 >= 0x40.
> >>
> >> Bootstrapped and regtested on s390x, x64, aarch64, and powerpc64le.
> >> Note, the new tests show that for the mentioned little-endian targets
> >> the optimization does not materialize since either the costs of the new
> >> instructions are higher or they do not match.  Still ok for mainline?
> > Hi Stefan,
> > Unfortunately this patch (committed in 7cdd0860949c6c3232e6cff1d7ca37bb5234074c)
> > caused the following ICE on armv8l-unknown-linux-gnu:
> > during RTL pass: combine
> > ../../../gcc/libgcc/fixed-bit.c: In function ‘__gnu_saturate1sq’:
> > ../../../gcc/libgcc/fixed-bit.c:210:1: internal compiler error: in
> > decompose, at rtl.h:2297
> >    210 | }
> >        | ^
> > 0xaa23e3 wi::int_traits<std::pair<rtx_def*, machine_mode>
> >> ::decompose(long long*, unsigned int, std::pair<rtx_def*,
> > machine_mode> const&)
> >          ../../gcc/gcc/rtl.h:2297
> [ ... ]
> Yea, we're seeing something very similar on nios2-linux-gnu building the
> kernel.
>
> Prathamesh, can you extract the .i file for fixed-bit on armv8 and open
> a bug for this issue, attaching the .i file as well as the right command
> line options necessary to reproduce the failure.  THat way Stefan can
> tackle it with a cross compiler.
Hi Jeff,
Filed the issue in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110867

Thanks,
Prathamesh
>
> Thanks,
> jeff
Stefan Schulze Frielinghaus Aug. 1, 2023, 9:36 a.m. UTC | #7
On Tue, Aug 01, 2023 at 01:52:16PM +0530, Prathamesh Kulkarni wrote:
> On Tue, 1 Aug 2023 at 05:20, Jeff Law <jeffreyalaw@gmail.com> wrote:
> >
> >
> >
> > On 7/31/23 15:43, Prathamesh Kulkarni via Gcc-patches wrote:
> > > On Mon, 19 Jun 2023 at 19:59, Stefan Schulze Frielinghaus via
> > > Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> > >>
> > >> Comparisons between memory and constants might be done in a smaller mode
> > >> resulting in smaller constants which might finally end up as immediates
> > >> instead of in the literal pool.
> > >>
> > >> For example, on s390x a non-symmetric comparison like
> > >>    x <= 0x3fffffffffffffff
> > >> results in the constant being spilled to the literal pool and an 8 byte
> > >> memory comparison is emitted.  Ideally, an equivalent comparison
> > >>    x0 <= 0x3f
> > >> where x0 is the most significant byte of x, is emitted where the
> > >> constant is smaller and more likely to materialize as an immediate.
> > >>
> > >> Similarly, comparisons of the form
> > >>    x >= 0x4000000000000000
> > >> can be shortened into x0 >= 0x40.
> > >>
> > >> Bootstrapped and regtested on s390x, x64, aarch64, and powerpc64le.
> > >> Note, the new tests show that for the mentioned little-endian targets
> > >> the optimization does not materialize since either the costs of the new
> > >> instructions are higher or they do not match.  Still ok for mainline?
> > > Hi Stefan,
> > > Unfortunately this patch (committed in 7cdd0860949c6c3232e6cff1d7ca37bb5234074c)
> > > caused the following ICE on armv8l-unknown-linux-gnu:
> > > during RTL pass: combine
> > > ../../../gcc/libgcc/fixed-bit.c: In function ‘__gnu_saturate1sq’:
> > > ../../../gcc/libgcc/fixed-bit.c:210:1: internal compiler error: in
> > > decompose, at rtl.h:2297
> > >    210 | }
> > >        | ^
> > > 0xaa23e3 wi::int_traits<std::pair<rtx_def*, machine_mode>
> > >> ::decompose(long long*, unsigned int, std::pair<rtx_def*,
> > > machine_mode> const&)
> > >          ../../gcc/gcc/rtl.h:2297
> > [ ... ]
> > Yea, we're seeing something very similar on nios2-linux-gnu building the
> > kernel.
> >
> > Prathamesh, can you extract the .i file for fixed-bit on armv8 and open
> > a bug for this issue, attaching the .i file as well as the right command
> > line options necessary to reproduce the failure.  THat way Stefan can
> > tackle it with a cross compiler.
> Hi Jeff,
> Filed the issue in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110867

Hi Prathamesh,

Sorry for the inconvenience.  I will have a look at this and thanks for
the small reproducer.  I already started to come up with a cross
compiler.

Thanks,
Stefan

> 
> Thanks,
> Prathamesh
> >
> > Thanks,
> > jeff
diff mbox series

Patch

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 5aa0ec5c45a..56e15a93409 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -460,7 +460,7 @@  static rtx simplify_shift_const (rtx, enum rtx_code, machine_mode, rtx,
 static int recog_for_combine (rtx *, rtx_insn *, rtx *);
 static rtx gen_lowpart_for_combine (machine_mode, rtx);
 static enum rtx_code simplify_compare_const (enum rtx_code, machine_mode,
-					     rtx, rtx *);
+					     rtx *, rtx *);
 static enum rtx_code simplify_comparison (enum rtx_code, rtx *, rtx *);
 static void update_table_tick (rtx);
 static void record_value_for_reg (rtx, rtx_insn *, rtx);
@@ -3185,7 +3185,7 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	  compare_code = orig_compare_code = GET_CODE (*cc_use_loc);
 	  if (is_a <scalar_int_mode> (GET_MODE (i2dest), &mode))
 	    compare_code = simplify_compare_const (compare_code, mode,
-						   op0, &op1);
+						   &op0, &op1);
 	  target_canonicalize_comparison (&compare_code, &op0, &op1, 1);
 	}
 
@@ -11796,13 +11796,14 @@  gen_lowpart_for_combine (machine_mode omode, rtx x)
    (CODE OP0 const0_rtx) form.
 
    The result is a possibly different comparison code to use.
-   *POP1 may be updated.  */
+   *POP0 and *POP1 may be updated.  */
 
 static enum rtx_code
 simplify_compare_const (enum rtx_code code, machine_mode mode,
-			rtx op0, rtx *pop1)
+			rtx *pop0, rtx *pop1)
 {
   scalar_int_mode int_mode;
+  rtx op0 = *pop0;
   HOST_WIDE_INT const_op = INTVAL (*pop1);
 
   /* Get the constant we are comparing against and turn off all bits
@@ -11987,6 +11988,74 @@  simplify_compare_const (enum rtx_code code, machine_mode mode,
       break;
     }
 
+  /* Narrow non-symmetric comparison of memory and constant as e.g.
+     x0...x7 <= 0x3fffffffffffffff into x0 <= 0x3f where x0 is the most
+     significant byte.  Likewise, transform x0...x7 >= 0x4000000000000000 into
+     x0 >= 0x40.  */
+  if ((code == LEU || code == LTU || code == GEU || code == GTU)
+      && is_a <scalar_int_mode> (GET_MODE (op0), &int_mode)
+      && MEM_P (op0)
+      && !MEM_VOLATILE_P (op0)
+      /* The optimization makes only sense for constants which are big enough
+	 so that we have a chance to chop off something at all.  */
+      && (unsigned HOST_WIDE_INT) const_op > 0xff
+      /* Ensure that we do not overflow during normalization.  */
+      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
+    {
+      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
+      enum rtx_code adjusted_code;
+
+      /* Normalize code to either LEU or GEU.  */
+      if (code == LTU)
+	{
+	  --n;
+	  adjusted_code = LEU;
+	}
+      else if (code == GTU)
+	{
+	  ++n;
+	  adjusted_code = GEU;
+	}
+      else
+	adjusted_code = code;
+
+      scalar_int_mode narrow_mode_iter;
+      FOR_EACH_MODE_UNTIL (narrow_mode_iter, int_mode)
+	{
+	  unsigned nbits = GET_MODE_PRECISION (int_mode)
+			   - GET_MODE_PRECISION (narrow_mode_iter);
+	  unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << nbits) - 1;
+	  unsigned HOST_WIDE_INT lower_bits = n & mask;
+	  if ((adjusted_code == LEU && lower_bits == mask)
+	      || (adjusted_code == GEU && lower_bits == 0))
+	    {
+	      n >>= nbits;
+	      break;
+	    }
+	}
+
+      if (narrow_mode_iter < int_mode)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (
+		dump_file, "narrow comparison from mode %s to %s: (MEM %s "
+		HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
+		HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
+		GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
+		(unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
+		n);
+	    }
+	  poly_int64 offset = (BYTES_BIG_ENDIAN
+			       ? 0
+			       : (GET_MODE_SIZE (int_mode)
+				  - GET_MODE_SIZE (narrow_mode_iter)));
+	  *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
+	  *pop1 = GEN_INT (n);
+	  return adjusted_code;
+	}
+    }
+
   *pop1 = GEN_INT (const_op);
   return code;
 }
@@ -12179,7 +12248,7 @@  simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
 
       /* Try to simplify the compare to constant, possibly changing the
 	 comparison op, and/or changing op1 to zero.  */
-      code = simplify_compare_const (code, raw_mode, op0, &op1);
+      code = simplify_compare_const (code, raw_mode, &op0, &op1);
       const_op = INTVAL (op1);
 
       /* Compute some predicates to simplify code below.  */
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-1.c b/gcc/testsuite/gcc.dg/cmp-mem-const-1.c
new file mode 100644
index 00000000000..263ad98af79
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-1.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+le_1byte_a (uint64_t *x)
+{
+  return *x <= 0x3fffffffffffffff;
+}
+
+int
+le_1byte_b (uint64_t *x)
+{
+  return *x < 0x4000000000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-2.c b/gcc/testsuite/gcc.dg/cmp-mem-const-2.c
new file mode 100644
index 00000000000..a7cc5348295
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-2.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+ge_1byte_a (uint64_t *x)
+{
+  return *x > 0x3fffffffffffffff;
+}
+
+int
+ge_1byte_b (uint64_t *x)
+{
+  return *x >= 0x4000000000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-3.c b/gcc/testsuite/gcc.dg/cmp-mem-const-3.c
new file mode 100644
index 00000000000..06f80bf72d8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-3.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to HI" "combine" } } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+le_2bytes_a (uint64_t *x)
+{
+  return *x <= 0x3ffdffffffffffff;
+}
+
+int
+le_2bytes_b (uint64_t *x)
+{
+  return *x < 0x3ffe000000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-4.c b/gcc/testsuite/gcc.dg/cmp-mem-const-4.c
new file mode 100644
index 00000000000..407999abf7e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-4.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to HI" "combine" } } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+ge_2bytes_a (uint64_t *x)
+{
+  return *x > 0x400cffffffffffff;
+}
+
+int
+ge_2bytes_b (uint64_t *x)
+{
+  return *x >= 0x400d000000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-5.c b/gcc/testsuite/gcc.dg/cmp-mem-const-5.c
new file mode 100644
index 00000000000..e16773f5bcf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-5.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to SI" "combine" } } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+le_4bytes_a (uint64_t *x)
+{
+  return *x <= 0x3ffffdffffffffff;
+}
+
+int
+le_4bytes_b (uint64_t *x)
+{
+  return *x < 0x3ffffe0000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-6.c b/gcc/testsuite/gcc.dg/cmp-mem-const-6.c
new file mode 100644
index 00000000000..8f53b5678bd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-6.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to SI" "combine" } } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+ge_4bytes_a (uint64_t *x)
+{
+  return *x > 0x4000cfffffffffff;
+}
+
+int
+ge_4bytes_b (uint64_t *x)
+{
+  return *x >= 0x4000d00000000000;
+}
diff --git a/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c b/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
new file mode 100644
index 00000000000..309aafbec01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -march=z13 -mzarch -fdump-rtl-combine-details" } */
+/* { dg-final { scan-assembler-not {\tclc\t} } } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
+
+struct s
+{
+  long a;
+  unsigned b : 1;
+  unsigned c : 1;
+};
+
+int foo (struct s *x)
+{
+  /* Expression
+       x->b || x->c
+     is transformed into
+       _1 = BIT_FIELD_REF <*x_4(D), 64, 64>;
+       _2 = _1 > 0x3FFFFFFFFFFFFFFF;
+     where the constant may materialize in the literal pool and an 8 byte CLC
+     may be emitted.  Ensure this is not the case.
+  */
+  return x->b || x->c;
+}