diff mbox series

rtl-optimization/110939 Really fix narrow comparison of memory and constant

Message ID 20230810130402.752335-2-stefansf@linux.ibm.com
State New
Headers show
Series rtl-optimization/110939 Really fix narrow comparison of memory and constant | expand

Commit Message

Stefan Schulze Frielinghaus Aug. 10, 2023, 1:04 p.m. UTC
In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
completely missed the fact that the normal form of a generated constant for a
mode with fewer bits than in HOST_WIDE_INT is a sign extended version of the
actual constant.  This even holds true for unsigned constants.

Fixed by masking out the upper bits for the incoming constant and sign
extending the resulting unsigned constant.

Bootstrapped and regtested on x64 and s390x.  Ok for mainline?

While reading existing optimizations in combine I stumbled across two
optimizations where either my intuition about the representation of
unsigned integers via a const_int rtx is wrong, which then in turn would
probably also mean that this patch is wrong, or that the optimizations
are missed sometimes.  In other words in the following I would assume
that the upper bits are masked out:

Comments

Xi Ruoyao Aug. 12, 2023, 1:04 a.m. UTC | #1
On Thu, 2023-08-10 at 15:04 +0200, Stefan Schulze Frielinghaus via Gcc-
patches wrote:
> In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
> completely missed the fact that the normal form of a generated constant for a
> mode with fewer bits than in HOST_WIDE_INT is a sign extended version of the
> actual constant.  This even holds true for unsigned constants.
> 
> Fixed by masking out the upper bits for the incoming constant and sign
> extending the resulting unsigned constant.
> 
> Bootstrapped and regtested on x64 and s390x.  Ok for mainline?

The patch fails to apply:

patching file gcc/combine.cc
Hunk #1 FAILED at 11923.
Hunk #2 FAILED at 11962.

It looks like some indents are tabs in the source file, but white spaces
in the patch.

> While reading existing optimizations in combine I stumbled across two
> optimizations where either my intuition about the representation of
> unsigned integers via a const_int rtx is wrong, which then in turn would
> probably also mean that this patch is wrong, or that the optimizations
> are missed sometimes.  In other words in the following I would assume
> that the upper bits are masked out:
> 
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index 468b7fde911..80c4ff0fbaf 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -11923,7 +11923,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>        /* (unsigned) < 0x80000000 is equivalent to >= 0.  */
>        else if (is_a <scalar_int_mode> (mode, &int_mode)
>                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> -              && ((unsigned HOST_WIDE_INT) const_op
> +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
>                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
>         {
>           const_op = 0;
> @@ -11962,7 +11962,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>        /* (unsigned) >= 0x80000000 is equivalent to < 0.  */
>        else if (is_a <scalar_int_mode> (mode, &int_mode)
>                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> -              && ((unsigned HOST_WIDE_INT) const_op
> +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
>                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
>         {
>           const_op = 0;
> 
> For example, while bootstrapping on x64 the optimization is missed since
> a LTU comparison in QImode is done and the constant equals
> 0xffffffffffffff80.
> 
> Sorry for inlining another patch, but I would really like to make sure
> that my understanding is correct, now, before I come up with another
> patch.  Thus it would be great if someone could shed some light on this.
> 
> gcc/ChangeLog:
> 
>         * combine.cc (simplify_compare_const): Properly handle unsigned
>         constants while narrowing comparison of memory and constants.
> ---
>  gcc/combine.cc | 19 ++++++++++---------
>  1 file changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index e46d202d0a7..468b7fde911 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -12003,14 +12003,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>        && !MEM_VOLATILE_P (op0)
>        /* The optimization makes only sense for constants which are big enough
>          so that we have a chance to chop off something at all.  */
> -      && (unsigned HOST_WIDE_INT) const_op > 0xff
> -      /* Bail out, if the constant does not fit into INT_MODE.  */
> -      && (unsigned HOST_WIDE_INT) const_op
> -        < ((HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1) << 1) - 1)
> +      && ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode)) > 0xff
>        /* Ensure that we do not overflow during normalization.  */
> -      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
> +      && (code != GTU
> +         || ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> +            < HOST_WIDE_INT_M1U)
> +      && trunc_int_for_mode (const_op, int_mode) == const_op)
>      {
> -      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> +      unsigned HOST_WIDE_INT n
> +       = (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode);
>        enum rtx_code adjusted_code;
>  
>        /* Normalize code to either LEU or GEU.  */
> @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>                 HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
>                 HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
>                 GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> -               (unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> -               n);
> +               (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode),
> +               GET_RTX_NAME (adjusted_code), n);
>             }
>           poly_int64 offset = (BYTES_BIG_ENDIAN
>                                ? 0
>                                : (GET_MODE_SIZE (int_mode)
>                                   - GET_MODE_SIZE (narrow_mode_iter)));
>           *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> -         *pop1 = GEN_INT (n);
> +         *pop1 = gen_int_mode (n, narrow_mode_iter);
>           return adjusted_code;
>         }
>      }
Stefan Schulze Frielinghaus Aug. 14, 2023, 6:39 a.m. UTC | #2
On Sat, Aug 12, 2023 at 09:04:19AM +0800, Xi Ruoyao wrote:
> On Thu, 2023-08-10 at 15:04 +0200, Stefan Schulze Frielinghaus via Gcc-
> patches wrote:
> > In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
> > completely missed the fact that the normal form of a generated constant for a
> > mode with fewer bits than in HOST_WIDE_INT is a sign extended version of the
> > actual constant.  This even holds true for unsigned constants.
> > 
> > Fixed by masking out the upper bits for the incoming constant and sign
> > extending the resulting unsigned constant.
> > 
> > Bootstrapped and regtested on x64 and s390x.  Ok for mainline?
> 
> The patch fails to apply:
> 
> patching file gcc/combine.cc
> Hunk #1 FAILED at 11923.
> Hunk #2 FAILED at 11962.
> 
> It looks like some indents are tabs in the source file, but white spaces
> in the patch.

The patch itself applies cleanly.  This is due to my inlined diff in
order to raise some discussion, i.e., just remove the following from
the email and the patch applies:

> > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > index 468b7fde911..80c4ff0fbaf 100644
> > --- a/gcc/combine.cc
> > +++ b/gcc/combine.cc
> > @@ -11923,7 +11923,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> >        /* (unsigned) < 0x80000000 is equivalent to >= 0.  */
> >        else if (is_a <scalar_int_mode> (mode, &int_mode)
> >                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> > -              && ((unsigned HOST_WIDE_INT) const_op
> > +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> >                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
> >         {
> >           const_op = 0;
> > @@ -11962,7 +11962,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> >        /* (unsigned) >= 0x80000000 is equivalent to < 0.  */
> >        else if (is_a <scalar_int_mode> (mode, &int_mode)
> >                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> > -              && ((unsigned HOST_WIDE_INT) const_op
> > +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> >                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
> >         {
> >           const_op = 0;

Looks like git am/apply is confused by that.

Cheers,
Stefan

> > 
> > For example, while bootstrapping on x64 the optimization is missed since
> > a LTU comparison in QImode is done and the constant equals
> > 0xffffffffffffff80.
> > 
> > Sorry for inlining another patch, but I would really like to make sure
> > that my understanding is correct, now, before I come up with another
> > patch.  Thus it would be great if someone could shed some light on this.
> > 
> > gcc/ChangeLog:
> > 
> >         * combine.cc (simplify_compare_const): Properly handle unsigned
> >         constants while narrowing comparison of memory and constants.
> > ---
> >  gcc/combine.cc | 19 ++++++++++---------
> >  1 file changed, 10 insertions(+), 9 deletions(-)
> > 
> > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > index e46d202d0a7..468b7fde911 100644
> > --- a/gcc/combine.cc
> > +++ b/gcc/combine.cc
> > @@ -12003,14 +12003,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> >        && !MEM_VOLATILE_P (op0)
> >        /* The optimization makes only sense for constants which are big enough
> >          so that we have a chance to chop off something at all.  */
> > -      && (unsigned HOST_WIDE_INT) const_op > 0xff
> > -      /* Bail out, if the constant does not fit into INT_MODE.  */
> > -      && (unsigned HOST_WIDE_INT) const_op
> > -        < ((HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1) << 1) - 1)
> > +      && ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode)) > 0xff
> >        /* Ensure that we do not overflow during normalization.  */
> > -      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
> > +      && (code != GTU
> > +         || ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> > +            < HOST_WIDE_INT_M1U)
> > +      && trunc_int_for_mode (const_op, int_mode) == const_op)
> >      {
> > -      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> > +      unsigned HOST_WIDE_INT n
> > +       = (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode);
> >        enum rtx_code adjusted_code;
> >  
> >        /* Normalize code to either LEU or GEU.  */
> > @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> >                 HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
> >                 HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
> >                 GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> > -               (unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> > -               n);
> > +               (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode),
> > +               GET_RTX_NAME (adjusted_code), n);
> >             }
> >           poly_int64 offset = (BYTES_BIG_ENDIAN
> >                                ? 0
> >                                : (GET_MODE_SIZE (int_mode)
> >                                   - GET_MODE_SIZE (narrow_mode_iter)));
> >           *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> > -         *pop1 = GEN_INT (n);
> > +         *pop1 = gen_int_mode (n, narrow_mode_iter);
> >           return adjusted_code;
> >         }
> >      }
> 
> -- 
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University
Stefan Schulze Frielinghaus Aug. 18, 2023, 11:04 a.m. UTC | #3
Ping.  Since this fixes bootstrap problem PR110939 for Loongarch I'm
pingen this one earlier.

On Thu, Aug 10, 2023 at 03:04:03PM +0200, Stefan Schulze Frielinghaus wrote:
> In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
> completely missed the fact that the normal form of a generated constant for a
> mode with fewer bits than in HOST_WIDE_INT is a sign extended version of the
> actual constant.  This even holds true for unsigned constants.
> 
> Fixed by masking out the upper bits for the incoming constant and sign
> extending the resulting unsigned constant.
> 
> Bootstrapped and regtested on x64 and s390x.  Ok for mainline?
> 
> While reading existing optimizations in combine I stumbled across two
> optimizations where either my intuition about the representation of
> unsigned integers via a const_int rtx is wrong, which then in turn would
> probably also mean that this patch is wrong, or that the optimizations
> are missed sometimes.  In other words in the following I would assume
> that the upper bits are masked out:
> 
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index 468b7fde911..80c4ff0fbaf 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -11923,7 +11923,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>        /* (unsigned) < 0x80000000 is equivalent to >= 0.  */
>        else if (is_a <scalar_int_mode> (mode, &int_mode)
>                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> -              && ((unsigned HOST_WIDE_INT) const_op
> +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
>                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
>         {
>           const_op = 0;
> @@ -11962,7 +11962,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>        /* (unsigned) >= 0x80000000 is equivalent to < 0.  */
>        else if (is_a <scalar_int_mode> (mode, &int_mode)
>                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> -              && ((unsigned HOST_WIDE_INT) const_op
> +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
>                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
>         {
>           const_op = 0;
> 
> For example, while bootstrapping on x64 the optimization is missed since
> a LTU comparison in QImode is done and the constant equals
> 0xffffffffffffff80.
> 
> Sorry for inlining another patch, but I would really like to make sure
> that my understanding is correct, now, before I come up with another
> patch.  Thus it would be great if someone could shed some light on this.
> 
> gcc/ChangeLog:
> 
> 	* combine.cc (simplify_compare_const): Properly handle unsigned
> 	constants while narrowing comparison of memory and constants.
> ---
>  gcc/combine.cc | 19 ++++++++++---------
>  1 file changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index e46d202d0a7..468b7fde911 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -12003,14 +12003,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>        && !MEM_VOLATILE_P (op0)
>        /* The optimization makes only sense for constants which are big enough
>  	 so that we have a chance to chop off something at all.  */
> -      && (unsigned HOST_WIDE_INT) const_op > 0xff
> -      /* Bail out, if the constant does not fit into INT_MODE.  */
> -      && (unsigned HOST_WIDE_INT) const_op
> -	 < ((HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1) << 1) - 1)
> +      && ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode)) > 0xff
>        /* Ensure that we do not overflow during normalization.  */
> -      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
> +      && (code != GTU
> +	  || ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> +	     < HOST_WIDE_INT_M1U)
> +      && trunc_int_for_mode (const_op, int_mode) == const_op)
>      {
> -      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> +      unsigned HOST_WIDE_INT n
> +	= (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode);
>        enum rtx_code adjusted_code;
>  
>        /* Normalize code to either LEU or GEU.  */
> @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>  		HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
>  		HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
>  		GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> -		(unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> -		n);
> +		(unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode),
> +		GET_RTX_NAME (adjusted_code), n);
>  	    }
>  	  poly_int64 offset = (BYTES_BIG_ENDIAN
>  			       ? 0
>  			       : (GET_MODE_SIZE (int_mode)
>  				  - GET_MODE_SIZE (narrow_mode_iter)));
>  	  *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> -	  *pop1 = GEN_INT (n);
> +	  *pop1 = gen_int_mode (n, narrow_mode_iter);
>  	  return adjusted_code;
>  	}
>      }
> -- 
> 2.41.0
>
Xi Ruoyao Aug. 24, 2023, 3:31 a.m. UTC | #4
Ping again.

On Fri, 2023-08-18 at 13:04 +0200, Stefan Schulze Frielinghaus via Gcc-patches wrote:
> Ping.  Since this fixes bootstrap problem PR110939 for Loongarch I'm
> pingen this one earlier.
> 
> On Thu, Aug 10, 2023 at 03:04:03PM +0200, Stefan Schulze Frielinghaus wrote:
> > In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
> > completely missed the fact that the normal form of a generated constant for a
> > mode with fewer bits than in HOST_WIDE_INT is a sign extended version of the
> > actual constant.  This even holds true for unsigned constants.
> > 
> > Fixed by masking out the upper bits for the incoming constant and sign
> > extending the resulting unsigned constant.
> > 
> > Bootstrapped and regtested on x64 and s390x.  Ok for mainline?
> > 
> > While reading existing optimizations in combine I stumbled across two
> > optimizations where either my intuition about the representation of
> > unsigned integers via a const_int rtx is wrong, which then in turn would
> > probably also mean that this patch is wrong, or that the optimizations
> > are missed sometimes.  In other words in the following I would assume
> > that the upper bits are masked out:
> > 
> > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > index 468b7fde911..80c4ff0fbaf 100644
> > --- a/gcc/combine.cc
> > +++ b/gcc/combine.cc
> > @@ -11923,7 +11923,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> >        /* (unsigned) < 0x80000000 is equivalent to >= 0.  */
> >        else if (is_a <scalar_int_mode> (mode, &int_mode)
> >                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> > -              && ((unsigned HOST_WIDE_INT) const_op
> > +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> >                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
> >         {
> >           const_op = 0;
> > @@ -11962,7 +11962,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> >        /* (unsigned) >= 0x80000000 is equivalent to < 0.  */
> >        else if (is_a <scalar_int_mode> (mode, &int_mode)
> >                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> > -              && ((unsigned HOST_WIDE_INT) const_op
> > +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> >                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
> >         {
> >           const_op = 0;
> > 
> > For example, while bootstrapping on x64 the optimization is missed since
> > a LTU comparison in QImode is done and the constant equals
> > 0xffffffffffffff80.
> > 
> > Sorry for inlining another patch, but I would really like to make sure
> > that my understanding is correct, now, before I come up with another
> > patch.  Thus it would be great if someone could shed some light on this.
> > 
> > gcc/ChangeLog:
> > 
> >         * combine.cc (simplify_compare_const): Properly handle unsigned
> >         constants while narrowing comparison of memory and constants.
> > ---
> >  gcc/combine.cc | 19 ++++++++++---------
> >  1 file changed, 10 insertions(+), 9 deletions(-)
> > 
> > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > index e46d202d0a7..468b7fde911 100644
> > --- a/gcc/combine.cc
> > +++ b/gcc/combine.cc
> > @@ -12003,14 +12003,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> >        && !MEM_VOLATILE_P (op0)
> >        /* The optimization makes only sense for constants which are big enough
> >          so that we have a chance to chop off something at all.  */
> > -      && (unsigned HOST_WIDE_INT) const_op > 0xff
> > -      /* Bail out, if the constant does not fit into INT_MODE.  */
> > -      && (unsigned HOST_WIDE_INT) const_op
> > -        < ((HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1) << 1) - 1)
> > +      && ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode)) > 0xff
> >        /* Ensure that we do not overflow during normalization.  */
> > -      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
> > +      && (code != GTU
> > +         || ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> > +            < HOST_WIDE_INT_M1U)
> > +      && trunc_int_for_mode (const_op, int_mode) == const_op)
> >      {
> > -      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> > +      unsigned HOST_WIDE_INT n
> > +       = (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode);
> >        enum rtx_code adjusted_code;
> >  
> >        /* Normalize code to either LEU or GEU.  */
> > @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> >                 HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
> >                 HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
> >                 GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> > -               (unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> > -               n);
> > +               (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode),
> > +               GET_RTX_NAME (adjusted_code), n);
> >             }
> >           poly_int64 offset = (BYTES_BIG_ENDIAN
> >                                ? 0
> >                                : (GET_MODE_SIZE (int_mode)
> >                                   - GET_MODE_SIZE (narrow_mode_iter)));
> >           *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> > -         *pop1 = GEN_INT (n);
> > +         *pop1 = gen_int_mode (n, narrow_mode_iter);
> >           return adjusted_code;
> >         }
> >      }
> > -- 
> > 2.41.0
> >
Xi Ruoyao Aug. 29, 2023, 10:24 a.m. UTC | #5
Hi Jeff,

Can you take a look at the patch?  It fixes a bootstrap failure on
LoongArch.  And in this month 3 related bugzilla tickets have been
created (110939, 111124, 111171).

On Thu, 2023-08-10 at 15:04 +0200, Stefan Schulze Frielinghaus via Gcc-
patches wrote:
> In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
> completely missed the fact that the normal form of a generated constant for a
> mode with fewer bits than in HOST_WIDE_INT is a sign extended version of the
> actual constant.  This even holds true for unsigned constants.
> 
> Fixed by masking out the upper bits for the incoming constant and sign
> extending the resulting unsigned constant.
> 
> Bootstrapped and regtested on x64 and s390x.  Ok for mainline?
> 
> While reading existing optimizations in combine I stumbled across two
> optimizations where either my intuition about the representation of
> unsigned integers via a const_int rtx is wrong, which then in turn would
> probably also mean that this patch is wrong, or that the optimizations
> are missed sometimes.  In other words in the following I would assume
> that the upper bits are masked out:

/* removed the inlined patch to avoid confusion */

> For example, while bootstrapping on x64 the optimization is missed since
> a LTU comparison in QImode is done and the constant equals
> 0xffffffffffffff80.
> 
> Sorry for inlining another patch, but I would really like to make sure
> that my understanding is correct, now, before I come up with another
> patch.  Thus it would be great if someone could shed some light on this.
> 
> gcc/ChangeLog:
> 
>         * combine.cc (simplify_compare_const): Properly handle unsigned
>         constants while narrowing comparison of memory and constants.
> ---
>  gcc/combine.cc | 19 ++++++++++---------
>  1 file changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index e46d202d0a7..468b7fde911 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -12003,14 +12003,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>        && !MEM_VOLATILE_P (op0)
>        /* The optimization makes only sense for constants which are big enough
>          so that we have a chance to chop off something at all.  */
> -      && (unsigned HOST_WIDE_INT) const_op > 0xff
> -      /* Bail out, if the constant does not fit into INT_MODE.  */
> -      && (unsigned HOST_WIDE_INT) const_op
> -        < ((HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1) << 1) - 1)
> +      && ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode)) > 0xff
>        /* Ensure that we do not overflow during normalization.  */
> -      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
> +      && (code != GTU
> +         || ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> +            < HOST_WIDE_INT_M1U)
> +      && trunc_int_for_mode (const_op, int_mode) == const_op)
>      {
> -      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> +      unsigned HOST_WIDE_INT n
> +       = (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode);
>        enum rtx_code adjusted_code;
>  
>        /* Normalize code to either LEU or GEU.  */
> @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>                 HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
>                 HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
>                 GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> -               (unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> -               n);
> +               (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode),
> +               GET_RTX_NAME (adjusted_code), n);
>             }
>           poly_int64 offset = (BYTES_BIG_ENDIAN
>                                ? 0
>                                : (GET_MODE_SIZE (int_mode)
>                                   - GET_MODE_SIZE (narrow_mode_iter)));
>           *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> -         *pop1 = GEN_INT (n);
> +         *pop1 = gen_int_mode (n, narrow_mode_iter);
>           return adjusted_code;
>         }
>      }
Stefan Schulze Frielinghaus Sept. 4, 2023, 5:55 a.m. UTC | #6
Ping.

On Thu, Aug 24, 2023 at 11:31:32AM +0800, Xi Ruoyao wrote:
> Ping again.
> 
> On Fri, 2023-08-18 at 13:04 +0200, Stefan Schulze Frielinghaus via Gcc-patches wrote:
> > Ping.  Since this fixes bootstrap problem PR110939 for Loongarch I'm
> > pingen this one earlier.
> > 
> > On Thu, Aug 10, 2023 at 03:04:03PM +0200, Stefan Schulze Frielinghaus wrote:
> > > In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
> > > completely missed the fact that the normal form of a generated constant for a
> > > mode with fewer bits than in HOST_WIDE_INT is a sign extended version of the
> > > actual constant.  This even holds true for unsigned constants.
> > > 
> > > Fixed by masking out the upper bits for the incoming constant and sign
> > > extending the resulting unsigned constant.
> > > 
> > > Bootstrapped and regtested on x64 and s390x.  Ok for mainline?
> > > 
> > > While reading existing optimizations in combine I stumbled across two
> > > optimizations where either my intuition about the representation of
> > > unsigned integers via a const_int rtx is wrong, which then in turn would
> > > probably also mean that this patch is wrong, or that the optimizations
> > > are missed sometimes.  In other words in the following I would assume
> > > that the upper bits are masked out:
> > > 
> > > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > > index 468b7fde911..80c4ff0fbaf 100644
> > > --- a/gcc/combine.cc
> > > +++ b/gcc/combine.cc
> > > @@ -11923,7 +11923,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> > >        /* (unsigned) < 0x80000000 is equivalent to >= 0.  */
> > >        else if (is_a <scalar_int_mode> (mode, &int_mode)
> > >                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> > > -              && ((unsigned HOST_WIDE_INT) const_op
> > > +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> > >                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
> > >         {
> > >           const_op = 0;
> > > @@ -11962,7 +11962,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> > >        /* (unsigned) >= 0x80000000 is equivalent to < 0.  */
> > >        else if (is_a <scalar_int_mode> (mode, &int_mode)
> > >                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> > > -              && ((unsigned HOST_WIDE_INT) const_op
> > > +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> > >                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
> > >         {
> > >           const_op = 0;
> > > 
> > > For example, while bootstrapping on x64 the optimization is missed since
> > > a LTU comparison in QImode is done and the constant equals
> > > 0xffffffffffffff80.
> > > 
> > > Sorry for inlining another patch, but I would really like to make sure
> > > that my understanding is correct, now, before I come up with another
> > > patch.  Thus it would be great if someone could shed some light on this.
> > > 
> > > gcc/ChangeLog:
> > > 
> > >         * combine.cc (simplify_compare_const): Properly handle unsigned
> > >         constants while narrowing comparison of memory and constants.
> > > ---
> > >  gcc/combine.cc | 19 ++++++++++---------
> > >  1 file changed, 10 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > > index e46d202d0a7..468b7fde911 100644
> > > --- a/gcc/combine.cc
> > > +++ b/gcc/combine.cc
> > > @@ -12003,14 +12003,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> > >        && !MEM_VOLATILE_P (op0)
> > >        /* The optimization makes only sense for constants which are big enough
> > >          so that we have a chance to chop off something at all.  */
> > > -      && (unsigned HOST_WIDE_INT) const_op > 0xff
> > > -      /* Bail out, if the constant does not fit into INT_MODE.  */
> > > -      && (unsigned HOST_WIDE_INT) const_op
> > > -        < ((HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1) << 1) - 1)
> > > +      && ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode)) > 0xff
> > >        /* Ensure that we do not overflow during normalization.  */
> > > -      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
> > > +      && (code != GTU
> > > +         || ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> > > +            < HOST_WIDE_INT_M1U)
> > > +      && trunc_int_for_mode (const_op, int_mode) == const_op)
> > >      {
> > > -      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> > > +      unsigned HOST_WIDE_INT n
> > > +       = (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode);
> > >        enum rtx_code adjusted_code;
> > >  
> > >        /* Normalize code to either LEU or GEU.  */
> > > @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> > >                 HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
> > >                 HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
> > >                 GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> > > -               (unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> > > -               n);
> > > +               (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode),
> > > +               GET_RTX_NAME (adjusted_code), n);
> > >             }
> > >           poly_int64 offset = (BYTES_BIG_ENDIAN
> > >                                ? 0
> > >                                : (GET_MODE_SIZE (int_mode)
> > >                                   - GET_MODE_SIZE (narrow_mode_iter)));
> > >           *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> > > -         *pop1 = GEN_INT (n);
> > > +         *pop1 = gen_int_mode (n, narrow_mode_iter);
> > >           return adjusted_code;
> > >         }
> > >      }
> > > -- 
> > > 2.41.0
> > > 
> 
> -- 
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University
Xi Ruoyao Sept. 10, 2023, 4:56 p.m. UTC | #7
Ping.

> > > On Thu, Aug 10, 2023 at 03:04:03PM +0200, Stefan Schulze Frielinghaus wrote:
> > > > In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
> > > > completely missed the fact that the normal form of a generated constant for a
> > > > mode with fewer bits than in HOST_WIDE_INT is a sign extended version of the
> > > > actual constant.  This even holds true for unsigned constants.
> > > > 
> > > > Fixed by masking out the upper bits for the incoming constant and sign
> > > > extending the resulting unsigned constant.
> > > > 
> > > > Bootstrapped and regtested on x64 and s390x.  Ok for mainline?
> > > > 
> > > > While reading existing optimizations in combine I stumbled across two
> > > > optimizations where either my intuition about the representation of
> > > > unsigned integers via a const_int rtx is wrong, which then in turn would
> > > > probably also mean that this patch is wrong, or that the optimizations
> > > > are missed sometimes.  In other words in the following I would assume
> > > > that the upper bits are masked out:
> > > > 
> > > > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > > > index 468b7fde911..80c4ff0fbaf 100644
> > > > --- a/gcc/combine.cc
> > > > +++ b/gcc/combine.cc
> > > > @@ -11923,7 +11923,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> > > >        /* (unsigned) < 0x80000000 is equivalent to >= 0.  */
> > > >        else if (is_a <scalar_int_mode> (mode, &int_mode)
> > > >                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> > > > -              && ((unsigned HOST_WIDE_INT) const_op
> > > > +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> > > >                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
> > > >         {
> > > >           const_op = 0;
> > > > @@ -11962,7 +11962,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> > > >        /* (unsigned) >= 0x80000000 is equivalent to < 0.  */
> > > >        else if (is_a <scalar_int_mode> (mode, &int_mode)
> > > >                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> > > > -              && ((unsigned HOST_WIDE_INT) const_op
> > > > +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> > > >                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
> > > >         {
> > > >           const_op = 0;
> > > > 
> > > > For example, while bootstrapping on x64 the optimization is missed since
> > > > a LTU comparison in QImode is done and the constant equals
> > > > 0xffffffffffffff80.
> > > > 
> > > > Sorry for inlining another patch, but I would really like to make sure
> > > > that my understanding is correct, now, before I come up with another
> > > > patch.  Thus it would be great if someone could shed some light on this.
> > > > 
> > > > gcc/ChangeLog:
> > > > 
> > > >         * combine.cc (simplify_compare_const): Properly handle unsigned
> > > >         constants while narrowing comparison of memory and constants.
> > > > ---
> > > >  gcc/combine.cc | 19 ++++++++++---------
> > > >  1 file changed, 10 insertions(+), 9 deletions(-)
> > > > 
> > > > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > > > index e46d202d0a7..468b7fde911 100644
> > > > --- a/gcc/combine.cc
> > > > +++ b/gcc/combine.cc
> > > > @@ -12003,14 +12003,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> > > >        && !MEM_VOLATILE_P (op0)
> > > >        /* The optimization makes only sense for constants which are big enough
> > > >          so that we have a chance to chop off something at all.  */
> > > > -      && (unsigned HOST_WIDE_INT) const_op > 0xff
> > > > -      /* Bail out, if the constant does not fit into INT_MODE.  */
> > > > -      && (unsigned HOST_WIDE_INT) const_op
> > > > -        < ((HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1) << 1) - 1)
> > > > +      && ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode)) > 0xff
> > > >        /* Ensure that we do not overflow during normalization.  */
> > > > -      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
> > > > +      && (code != GTU
> > > > +         || ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> > > > +            < HOST_WIDE_INT_M1U)
> > > > +      && trunc_int_for_mode (const_op, int_mode) == const_op)
> > > >      {
> > > > -      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> > > > +      unsigned HOST_WIDE_INT n
> > > > +       = (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode);
> > > >        enum rtx_code adjusted_code;
> > > >  
> > > >        /* Normalize code to either LEU or GEU.  */
> > > > @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> > > >                 HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
> > > >                 HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
> > > >                 GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> > > > -               (unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> > > > -               n);
> > > > +               (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode),
> > > > +               GET_RTX_NAME (adjusted_code), n);
> > > >             }
> > > >           poly_int64 offset = (BYTES_BIG_ENDIAN
> > > >                                ? 0
> > > >                                : (GET_MODE_SIZE (int_mode)
> > > >                                   - GET_MODE_SIZE (narrow_mode_iter)));
> > > >           *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> > > > -         *pop1 = GEN_INT (n);
> > > > +         *pop1 = gen_int_mode (n, narrow_mode_iter);
> > > >           return adjusted_code;
> > > >         }
> > > >      }
> > > > -- 
> > > > 2.41.0
> > > > 
> > 
> > -- 
> > Xi Ruoyao <xry111@xry111.site>
> > School of Aerospace Science and Technology, Xidian University
Xi Ruoyao Sept. 19, 2023, 7:31 a.m. UTC | #8
Ping^5.

> > > On Thu, Aug 10, 2023 at 03:04:03PM +0200, Stefan Schulze Frielinghaus wrote:
> > > > In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
> > > > completely missed the fact that the normal form of a generated constant for a
> > > > mode with fewer bits than in HOST_WIDE_INT is a sign extended version of the
> > > > actual constant.  This even holds true for unsigned constants.
> > > > 
> > > > Fixed by masking out the upper bits for the incoming constant and sign
> > > > extending the resulting unsigned constant.
> > > > 
> > > > Bootstrapped and regtested on x64 and s390x.  Ok for mainline?
> > > > 
> > > > While reading existing optimizations in combine I stumbled across two
> > > > optimizations where either my intuition about the representation of
> > > > unsigned integers via a const_int rtx is wrong, which then in turn would
> > > > probably also mean that this patch is wrong, or that the optimizations
> > > > are missed sometimes.  In other words in the following I would assume
> > > > that the upper bits are masked out:
> > > > 
> > > > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > > > index 468b7fde911..80c4ff0fbaf 100644
> > > > --- a/gcc/combine.cc
> > > > +++ b/gcc/combine.cc
> > > > @@ -11923,7 +11923,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> > > >        /* (unsigned) < 0x80000000 is equivalent to >= 0.  */
> > > >        else if (is_a <scalar_int_mode> (mode, &int_mode)
> > > >                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> > > > -              && ((unsigned HOST_WIDE_INT) const_op
> > > > +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> > > >                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
> > > >         {
> > > >           const_op = 0;
> > > > @@ -11962,7 +11962,7 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> > > >        /* (unsigned) >= 0x80000000 is equivalent to < 0.  */
> > > >        else if (is_a <scalar_int_mode> (mode, &int_mode)
> > > >                && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> > > > -              && ((unsigned HOST_WIDE_INT) const_op
> > > > +              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> > > >                    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
> > > >         {
> > > >           const_op = 0;
> > > > 
> > > > For example, while bootstrapping on x64 the optimization is missed since
> > > > a LTU comparison in QImode is done and the constant equals
> > > > 0xffffffffffffff80.
> > > > 
> > > > Sorry for inlining another patch, but I would really like to make sure
> > > > that my understanding is correct, now, before I come up with another
> > > > patch.  Thus it would be great if someone could shed some light on this.
> > > > 
> > > > gcc/ChangeLog:
> > > > 
> > > >         * combine.cc (simplify_compare_const): Properly handle unsigned
> > > >         constants while narrowing comparison of memory and constants.
> > > > ---
> > > >  gcc/combine.cc | 19 ++++++++++---------
> > > >  1 file changed, 10 insertions(+), 9 deletions(-)
> > > > 
> > > > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > > > index e46d202d0a7..468b7fde911 100644
> > > > --- a/gcc/combine.cc
> > > > +++ b/gcc/combine.cc
> > > > @@ -12003,14 +12003,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> > > >        && !MEM_VOLATILE_P (op0)
> > > >        /* The optimization makes only sense for constants which are big enough
> > > >          so that we have a chance to chop off something at all.  */
> > > > -      && (unsigned HOST_WIDE_INT) const_op > 0xff
> > > > -      /* Bail out, if the constant does not fit into INT_MODE.  */
> > > > -      && (unsigned HOST_WIDE_INT) const_op
> > > > -        < ((HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1) << 1) - 1)
> > > > +      && ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode)) > 0xff
> > > >        /* Ensure that we do not overflow during normalization.  */
> > > > -      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
> > > > +      && (code != GTU
> > > > +         || ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> > > > +            < HOST_WIDE_INT_M1U)
> > > > +      && trunc_int_for_mode (const_op, int_mode) == const_op)
> > > >      {
> > > > -      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> > > > +      unsigned HOST_WIDE_INT n
> > > > +       = (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode);
> > > >        enum rtx_code adjusted_code;
> > > >  
> > > >        /* Normalize code to either LEU or GEU.  */
> > > > @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> > > >                 HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
> > > >                 HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
> > > >                 GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> > > > -               (unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> > > > -               n);
> > > > +               (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode),
> > > > +               GET_RTX_NAME (adjusted_code), n);
> > > >             }
> > > >           poly_int64 offset = (BYTES_BIG_ENDIAN
> > > >                                ? 0
> > > >                                : (GET_MODE_SIZE (int_mode)
> > > >                                   - GET_MODE_SIZE (narrow_mode_iter)));
> > > >           *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> > > > -         *pop1 = GEN_INT (n);
> > > > +         *pop1 = gen_int_mode (n, narrow_mode_iter);
> > > >           return adjusted_code;
> > > >         }
> > > >      }
> > > > -- 
> > > > 2.41.0
> > > > 
> > 
> > -- 
> > Xi Ruoyao <xry111@xry111.site>
> > School of Aerospace Science and Technology, Xidian University
Stefan Schulze Frielinghaus Sept. 19, 2023, 4:06 p.m. UTC | #9
Since this patch is sitting in the queue for quite some time and (more
importantly?) solves a bootstrap problem let me reiterate:

While writing the initial commit 7cdd0860949c6c3232e6cff1d7ca37bb5234074c
and the subsequent (potential) fix 41ef5a34161356817807be3a2e51fbdbe575ae85
I was not aware of the fact that the normal form of a CONST_INT,
representing an unsigned integer with fewer bits than in HOST_WIDE_INT,
is a sign-extended version of the unsigned integer.  This invariant is
checked in rtl.h where we have at line 2297:

    case CONST_INT:
      if (precision < HOST_BITS_PER_WIDE_INT)
        /* Nonzero BImodes are stored as STORE_FLAG_VALUE, which on many
           targets is 1 rather than -1.  */
        gcc_checking_assert (INTVAL (x.first)
                             == sext_hwi (INTVAL (x.first), precision)
                             || (x.second == BImode && INTVAL (x.first) == 1));

This was pretty surprising and frankly speaking unintuitive to me which
is why I was skimming further over existing code where I have found this
in combine.cc:

      /* (unsigned) < 0x80000000 is equivalent to >= 0.  */
      else if (is_a <scalar_int_mode> (mode, &int_mode)
               && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
               && ((unsigned HOST_WIDE_INT) const_op
                   == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
        {

The expression of the if-statement is a contradiction rendering the then-part
unreachable unless you mask out the upper bits of the sign-extended
unsigned integer const_op (as proposed in the inlined patch):

      ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))

This is why I got a bit uncertain and hoped to get some feedback whether
my intuition is correct or not.  Meanwhile I also found a comment in
the internals book at "14.7 Constant Expression Types" where we have:

   "Constants generated for modes with fewer bits than in HOST_WIDE_INT
    must be sign extended to full width (e.g., with gen_int_mode).
    [...]
    Note however that values are neither inherently signed nor
    inherently unsigned; where necessary, signedness is determined by
    the rtl operation instead."

At least this and the assert statement document that the normal form of
a CONST_INT is kind of special w.r.t. unsigned integers.  Is there
anyone who can shed some light on _why_ such a normal form was chosen?

Independent of why such a normal form was chosen, this patch restores
the normal form and solves the bootstrap problem for Loongarch.

Cheers,
Stefan
Eric Botcazou Sept. 25, 2023, 10:52 a.m. UTC | #10
> This is why I got a bit uncertain and hoped to get some feedback whether
> my intuition is correct or not.  Meanwhile I also found a comment in
> the internals book at "14.7 Constant Expression Types" where we have:
> 
>    "Constants generated for modes with fewer bits than in HOST_WIDE_INT
>     must be sign extended to full width (e.g., with gen_int_mode).
>     [...]
>     Note however that values are neither inherently signed nor
>     inherently unsigned; where necessary, signedness is determined by
>     the rtl operation instead."
> 
> At least this and the assert statement document that the normal form of
> a CONST_INT is kind of special w.r.t. unsigned integers.  Is there
> anyone who can shed some light on _why_ such a normal form was chosen?

In RTL integral values have no sign, they just represent a given pattern of 
bits so, in order to have a 1-to-1 mapping, you need to choose a canonical 
form.  The signed form is probably more natural and, since CONST_INTs have no 
mode, the same objects are used for e.g. QImode and HImode, which means that 
you need to sign-extend the bit pattern.
Jeff Law Sept. 29, 2023, 6:51 p.m. UTC | #11
On 9/19/23 10:06, Stefan Schulze Frielinghaus wrote:
> Since this patch is sitting in the queue for quite some time and (more
> importantly?) solves a bootstrap problem let me reiterate:
> 
> While writing the initial commit 7cdd0860949c6c3232e6cff1d7ca37bb5234074c
> and the subsequent (potential) fix 41ef5a34161356817807be3a2e51fbdbe575ae85
> I was not aware of the fact that the normal form of a CONST_INT,
> representing an unsigned integer with fewer bits than in HOST_WIDE_INT,
> is a sign-extended version of the unsigned integer.  This invariant is
> checked in rtl.h where we have at line 2297:
> 
>      case CONST_INT:
>        if (precision < HOST_BITS_PER_WIDE_INT)
>          /* Nonzero BImodes are stored as STORE_FLAG_VALUE, which on many
>             targets is 1 rather than -1.  */
>          gcc_checking_assert (INTVAL (x.first)
>                               == sext_hwi (INTVAL (x.first), precision)
>                               || (x.second == BImode && INTVAL (x.first) == 1));
> 
> This was pretty surprising and frankly speaking unintuitive to me which

At the heart of the matter (as I think Eric recently mentioned) is that 
CONST_INT objects do not have a mode.  So consider something like 
(const_int 32768) on a 32bit host.

In modes wider than HImode is does exactly what you'd expect.  But if 
you tried to use it in a HImode context, then it's actually invalid as 
it needs to be sign extended resulting in (const_int -32768).  That's 
what gen_int_mode handles for you -- you provide a mode for the context 
in which the constant will be used and you get back suitable RTL.

This actually mimicks what some targets do in hardware for 32 bit 
objects being held in 64bit registers.

Anyway, that's the background.  It is highly likely there are bugs where 
we fail to use gen_int_mode when we should or in some tests for when an 
optimization may or may not be applicable.  So I wouldn't be surprised 
at all if you were to grub around and find cases that aren't properly 
handled -- particularly of the missed-optimization variety.


>
> 
> Independent of why such a normal form was chosen, this patch restores
> the normal form and solves the bootstrap problem for Loongarch.
Digging it out and looking at it momentarily.  It just keep falling off 
the end of the TODO list day-to-day.  Sorry for that, particularly since 
you're probably getting bugged by folks looking to restore builds/test 
results.

jeff
Jeff Law Sept. 29, 2023, 7:01 p.m. UTC | #12
On 8/10/23 07:04, Stefan Schulze Frielinghaus via Gcc-patches wrote:
> In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
> completely missed the fact that the normal form of a generated constant for a
> mode with fewer bits than in HOST_WIDE_INT is a sign extended version of the
> actual constant.  This even holds true for unsigned constants.
> 
> Fixed by masking out the upper bits for the incoming constant and sign
> extending the resulting unsigned constant.
> 
> Bootstrapped and regtested on x64 and s390x.  Ok for mainline?
> 
[ snip]

> 
> gcc/ChangeLog:
> 
> 	* combine.cc (simplify_compare_const): Properly handle unsigned
> 	constants while narrowing comparison of memory and constants.
OK.


> ---
> @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
>   		HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
>   		HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
>   		GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> -		(unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> -		n);
> +		(unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode),
> +		GET_RTX_NAME (adjusted_code), n);
>   	    }
>   	  poly_int64 offset = (BYTES_BIG_ENDIAN
>   			       ? 0
>   			       : (GET_MODE_SIZE (int_mode)
>   				  - GET_MODE_SIZE (narrow_mode_iter)));
>   	  *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> -	  *pop1 = GEN_INT (n);
> +	  *pop1 = gen_int_mode (n, narrow_mode_iter);
>   	  return adjusted_code;
FWIW, I should definitely have caught this hunk earlier -- we've gone 
the rounds in this same space (GEN_INT vs gen_int_mode) elsewhere.

Again, sorry for the long wait.

jeff
Stefan Schulze Frielinghaus Oct. 1, 2023, 2:26 p.m. UTC | #13
On Fri, Sep 29, 2023 at 01:01:57PM -0600, Jeff Law wrote:
> 
> 
> On 8/10/23 07:04, Stefan Schulze Frielinghaus via Gcc-patches wrote:
> > In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
> > completely missed the fact that the normal form of a generated constant for a
> > mode with fewer bits than in HOST_WIDE_INT is a sign extended version of the
> > actual constant.  This even holds true for unsigned constants.
> > 
> > Fixed by masking out the upper bits for the incoming constant and sign
> > extending the resulting unsigned constant.
> > 
> > Bootstrapped and regtested on x64 and s390x.  Ok for mainline?
> > 
> [ snip]
> 
> > 
> > gcc/ChangeLog:
> > 
> > 	* combine.cc (simplify_compare_const): Properly handle unsigned
> > 	constants while narrowing comparison of memory and constants.
> OK.
> 
> 
> > ---
> > @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
> >   		HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
> >   		HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
> >   		GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> > -		(unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
> > -		n);
> > +		(unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode),
> > +		GET_RTX_NAME (adjusted_code), n);
> >   	    }
> >   	  poly_int64 offset = (BYTES_BIG_ENDIAN
> >   			       ? 0
> >   			       : (GET_MODE_SIZE (int_mode)
> >   				  - GET_MODE_SIZE (narrow_mode_iter)));
> >   	  *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
> > -	  *pop1 = GEN_INT (n);
> > +	  *pop1 = gen_int_mode (n, narrow_mode_iter);
> >   	  return adjusted_code;
> FWIW, I should definitely have caught this hunk earlier -- we've gone the
> rounds in this same space (GEN_INT vs gen_int_mode) elsewhere.
> 
> Again, sorry for the long wait.
> 
> jeff

No worries at all.  At least I have learned something new :)

Thanks Jeff and Eric for clarification.  This matches with my intuition,
now, so I've pushed the patch.

Cheers,
Stefan
Jeff Law Oct. 1, 2023, 2:36 p.m. UTC | #14
On 10/1/23 08:26, Stefan Schulze Frielinghaus wrote:

>> FWIW, I should definitely have caught this hunk earlier -- we've gone the
>> rounds in this same space (GEN_INT vs gen_int_mode) elsewhere.
>>
>> Again, sorry for the long wait.
>>
>> jeff
> 
> No worries at all.  At least I have learned something new :)
> 
> Thanks Jeff and Eric for clarification.  This matches with my intuition,
> now, so I've pushed the patch.
BTW, in a completely different context I need to do some testing on the 
alpha port, which hasn't bootstrapped in a couple months.  I'd been 
assuming the failure was completely due to ongoing changes I'm making in 
the infrastructure I used to bootstrap QEMU emulated systems.

As it turns out, the infrastructure changes and this minor combine issue 
were both playing a role!

Jeff
diff mbox series

Patch

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 468b7fde911..80c4ff0fbaf 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -11923,7 +11923,7 @@  simplify_compare_const (enum rtx_code code, machine_mode mode,
       /* (unsigned) < 0x80000000 is equivalent to >= 0.  */
       else if (is_a <scalar_int_mode> (mode, &int_mode)
               && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
-              && ((unsigned HOST_WIDE_INT) const_op
+              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
                   == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
        {
          const_op = 0;
@@ -11962,7 +11962,7 @@  simplify_compare_const (enum rtx_code code, machine_mode mode,
       /* (unsigned) >= 0x80000000 is equivalent to < 0.  */
       else if (is_a <scalar_int_mode> (mode, &int_mode)
               && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
-              && ((unsigned HOST_WIDE_INT) const_op
+              && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
                   == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1)))
        {
          const_op = 0;

For example, while bootstrapping on x64 the optimization is missed since
a LTU comparison in QImode is done and the constant equals
0xffffffffffffff80.

Sorry for inlining another patch, but I would really like to make sure
that my understanding is correct, now, before I come up with another
patch.  Thus it would be great if someone could shed some light on this.

gcc/ChangeLog:

	* combine.cc (simplify_compare_const): Properly handle unsigned
	constants while narrowing comparison of memory and constants.
---
 gcc/combine.cc | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index e46d202d0a7..468b7fde911 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -12003,14 +12003,15 @@  simplify_compare_const (enum rtx_code code, machine_mode mode,
       && !MEM_VOLATILE_P (op0)
       /* The optimization makes only sense for constants which are big enough
 	 so that we have a chance to chop off something at all.  */
-      && (unsigned HOST_WIDE_INT) const_op > 0xff
-      /* Bail out, if the constant does not fit into INT_MODE.  */
-      && (unsigned HOST_WIDE_INT) const_op
-	 < ((HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1) << 1) - 1)
+      && ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode)) > 0xff
       /* Ensure that we do not overflow during normalization.  */
-      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
+      && (code != GTU
+	  || ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
+	     < HOST_WIDE_INT_M1U)
+      && trunc_int_for_mode (const_op, int_mode) == const_op)
     {
-      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
+      unsigned HOST_WIDE_INT n
+	= (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode);
       enum rtx_code adjusted_code;
 
       /* Normalize code to either LEU or GEU.  */
@@ -12051,15 +12052,15 @@  simplify_compare_const (enum rtx_code code, machine_mode mode,
 		HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
 		HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
 		GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
-		(unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
-		n);
+		(unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode),
+		GET_RTX_NAME (adjusted_code), n);
 	    }
 	  poly_int64 offset = (BYTES_BIG_ENDIAN
 			       ? 0
 			       : (GET_MODE_SIZE (int_mode)
 				  - GET_MODE_SIZE (narrow_mode_iter)));
 	  *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
-	  *pop1 = GEN_INT (n);
+	  *pop1 = gen_int_mode (n, narrow_mode_iter);
 	  return adjusted_code;
 	}
     }