diff mbox series

[v2] DSE: Bugfix ICE after allow vector type in get_stored_val

Message ID 20240226142235.3215553-1-pan2.li@intel.com
State New
Headers show
Series [v2] DSE: Bugfix ICE after allow vector type in get_stored_val | expand

Commit Message

Li, Pan2 Feb. 26, 2024, 2:22 p.m. UTC
From: Pan Li <pan2.li@intel.com>

We allowed vector type for get_stored_val when read is less than or
equal to store in previous.  Unfortunately, we missed to adjust the
validate_subreg part accordingly.  When the vector type's size is
less than vector register, it will be considered as invalid in the
validate_subreg.

Consider the validate_subreg is kind of a can with worms and we are
in stage 4.  We will fix the issue from the DES side, and make sure
the subreg is valid for both the read_mode and store_mode before
perform the real gen_lowpart.

The below test are passed for this patch:

* The x86 bootstrap test.
* The x86 regression test.
* The riscv regression test.
* The aarch64 regression test.

gcc/ChangeLog:

	* dse.cc (get_stored_val): Add validate_subreg check before
	perform the gen_lowpart for rtl.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/ssa-fre-44.c: Add compile option to trigger
	the ICE.
	* gcc.target/riscv/rvv/base/bug-6.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/dse.cc                                    |  4 +++-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c    |  2 +-
 .../gcc.target/riscv/rvv/base/bug-6.c         | 22 +++++++++++++++++++
 3 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c

Comments

Richard Biener Feb. 27, 2024, 9:47 a.m. UTC | #1
On Mon, Feb 26, 2024 at 3:22 PM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> We allowed vector type for get_stored_val when read is less than or
> equal to store in previous.  Unfortunately, we missed to adjust the
> validate_subreg part accordingly.  When the vector type's size is
> less than vector register, it will be considered as invalid in the
> validate_subreg.
>
> Consider the validate_subreg is kind of a can with worms and we are
> in stage 4.  We will fix the issue from the DES side, and make sure
> the subreg is valid for both the read_mode and store_mode before
> perform the real gen_lowpart.
>
> The below test are passed for this patch:
>
> * The x86 bootstrap test.
> * The x86 regression test.
> * The riscv regression test.
> * The aarch64 regression test.
>
> gcc/ChangeLog:
>
>         * dse.cc (get_stored_val): Add validate_subreg check before
>         perform the gen_lowpart for rtl.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/ssa-fre-44.c: Add compile option to trigger
>         the ICE.
>         * gcc.target/riscv/rvv/base/bug-6.c: New test.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/dse.cc                                    |  4 +++-
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c    |  2 +-
>  .../gcc.target/riscv/rvv/base/bug-6.c         | 22 +++++++++++++++++++
>  3 files changed, 26 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
>
> diff --git a/gcc/dse.cc b/gcc/dse.cc
> index edc7a1dfecf..1596da91da0 100644
> --- a/gcc/dse.cc
> +++ b/gcc/dse.cc
> @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode,
>                                  copy_rtx (store_info->const_rhs));
>    else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
>      && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
> -    && targetm.modes_tieable_p (read_mode, store_mode))
> +    && targetm.modes_tieable_p (read_mode, store_mode)
> +    && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs),
> +                       subreg_lowpart_offset (read_mode, store_mode)))
>      read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));

Thanks for the 2nd try.  I'll note the above uses gen_lowpart but
validate_subreg
which is sort-of a mismatch?  But I'll leave this for review to people with more
knowledge in this area.  Jeff?  Richard?

Thanks,
Richard.

>    else
>      read_reg = extract_low_bits (read_mode, store_mode,
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> index f79b4c142ae..624a00a4f32 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-fre1" } */
> +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
>
>  struct A { float x, y; };
>  struct B { struct A u; };
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> new file mode 100644
> index 00000000000..5bb00b8f587
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> @@ -0,0 +1,22 @@
> +/* Test that we do not have ice when compile */
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
> +
> +struct A { float x, y; };
> +struct B { struct A u; };
> +
> +extern void bar (struct A *);
> +
> +float
> +f3 (struct B *x, int y)
> +{
> +  struct A p = {1.0f, 2.0f};
> +  struct A *q = &x[y].u;
> +
> +  __builtin_memcpy (&q->x, &p.x, sizeof (float));
> +  __builtin_memcpy (&q->y, &p.y, sizeof (float));
> +
> +  bar (&p);
> +
> +  return x[y].u.x + x[y].u.y;
> +}
> --
> 2.34.1
>
Jeff Law Feb. 27, 2024, 3:02 p.m. UTC | #2
On 2/26/24 07:22, pan2.li@intel.com wrote:
> From: Pan Li <pan2.li@intel.com>
> 
> We allowed vector type for get_stored_val when read is less than or
> equal to store in previous.  Unfortunately, we missed to adjust the
> validate_subreg part accordingly.  When the vector type's size is
> less than vector register, it will be considered as invalid in the
> validate_subreg.
> 
> Consider the validate_subreg is kind of a can with worms and we are
> in stage 4.  We will fix the issue from the DES side, and make sure
> the subreg is valid for both the read_mode and store_mode before
> perform the real gen_lowpart.
> 
> The below test are passed for this patch:
> 
> * The x86 bootstrap test.
> * The x86 regression test.
> * The riscv regression test.
> * The aarch64 regression test.
> 
> gcc/ChangeLog:
> 
> 	* dse.cc (get_stored_val): Add validate_subreg check before
> 	perform the gen_lowpart for rtl.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/tree-ssa/ssa-fre-44.c: Add compile option to trigger
> 	the ICE.
> 	* gcc.target/riscv/rvv/base/bug-6.c: New test.
> 
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>   gcc/dse.cc                                    |  4 +++-
>   gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c    |  2 +-
>   .../gcc.target/riscv/rvv/base/bug-6.c         | 22 +++++++++++++++++++
>   3 files changed, 26 insertions(+), 2 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> 
> diff --git a/gcc/dse.cc b/gcc/dse.cc
> index edc7a1dfecf..1596da91da0 100644
> --- a/gcc/dse.cc
> +++ b/gcc/dse.cc
> @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode,
>   				 copy_rtx (store_info->const_rhs));
>     else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
>       && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
> -    && targetm.modes_tieable_p (read_mode, store_mode))
> +    && targetm.modes_tieable_p (read_mode, store_mode)
> +    && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs),
> +			subreg_lowpart_offset (read_mode, store_mode)))
>       read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
>     else
>       read_reg = extract_low_bits (read_mode, store_mode,

So we're just changing whether or not we call gen_lowpart directly or go 
through extract_low_bits, which may in turn generate subreg, call 
gen_lowpart itself and a few other things.

I'm guessing that extract_low_bits is going to return NULL in this case 
via this code (specifically the second test).

>   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>     return NULL_RTX;
>   if (!targetm.modes_tieable_p (int_mode, mode))
>     return NULL_RTX;


Pan, can you confirm what path we take through extract_low_bits?

One might argue that we should just call into extract_low_bits 
unconditionally since it'll ultimately call gen_lowpart when it safely 
can.  The downside is that's a bigger change than I'd like at this stage 
in our development cycle.

I wouldn't be surprised if other direct uses of gen_lowpart have similar 
problems.





> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> index f79b4c142ae..624a00a4f32 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> @@ -1,5 +1,5 @@
>   /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-fre1" } */
> +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
>   
>   struct A { float x, y; };
>   struct B { struct A u; };
So this may compromise the original intent of this test.  What I would 
suggest instead is to create a new test with the dg-do & dg-options you 
want with a #include "ssa-fre-44.c".

So to move forward.  Let's confirm the path we take through 
extract_low_bits matches expectations and fixup the testsuite change.

Jeff
Li, Pan2 Feb. 28, 2024, 1:40 a.m. UTC | #3
> Pan, can you confirm what path we take through extract_low_bits?

Thanks Jeff for comments, will have a try soon and keep you posted.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Tuesday, February 27, 2024 11:03 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; rdapp.gcc@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val



On 2/26/24 07:22, pan2.li@intel.com wrote:
> From: Pan Li <pan2.li@intel.com>
> 
> We allowed vector type for get_stored_val when read is less than or
> equal to store in previous.  Unfortunately, we missed to adjust the
> validate_subreg part accordingly.  When the vector type's size is
> less than vector register, it will be considered as invalid in the
> validate_subreg.
> 
> Consider the validate_subreg is kind of a can with worms and we are
> in stage 4.  We will fix the issue from the DES side, and make sure
> the subreg is valid for both the read_mode and store_mode before
> perform the real gen_lowpart.
> 
> The below test are passed for this patch:
> 
> * The x86 bootstrap test.
> * The x86 regression test.
> * The riscv regression test.
> * The aarch64 regression test.
> 
> gcc/ChangeLog:
> 
> 	* dse.cc (get_stored_val): Add validate_subreg check before
> 	perform the gen_lowpart for rtl.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/tree-ssa/ssa-fre-44.c: Add compile option to trigger
> 	the ICE.
> 	* gcc.target/riscv/rvv/base/bug-6.c: New test.
> 
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>   gcc/dse.cc                                    |  4 +++-
>   gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c    |  2 +-
>   .../gcc.target/riscv/rvv/base/bug-6.c         | 22 +++++++++++++++++++
>   3 files changed, 26 insertions(+), 2 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> 
> diff --git a/gcc/dse.cc b/gcc/dse.cc
> index edc7a1dfecf..1596da91da0 100644
> --- a/gcc/dse.cc
> +++ b/gcc/dse.cc
> @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode,
>   				 copy_rtx (store_info->const_rhs));
>     else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
>       && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
> -    && targetm.modes_tieable_p (read_mode, store_mode))
> +    && targetm.modes_tieable_p (read_mode, store_mode)
> +    && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs),
> +			subreg_lowpart_offset (read_mode, store_mode)))
>       read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
>     else
>       read_reg = extract_low_bits (read_mode, store_mode,

So we're just changing whether or not we call gen_lowpart directly or go 
through extract_low_bits, which may in turn generate subreg, call 
gen_lowpart itself and a few other things.

I'm guessing that extract_low_bits is going to return NULL in this case 
via this code (specifically the second test).

>   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>     return NULL_RTX;
>   if (!targetm.modes_tieable_p (int_mode, mode))
>     return NULL_RTX;


Pan, can you confirm what path we take through extract_low_bits?

One might argue that we should just call into extract_low_bits 
unconditionally since it'll ultimately call gen_lowpart when it safely 
can.  The downside is that's a bigger change than I'd like at this stage 
in our development cycle.

I wouldn't be surprised if other direct uses of gen_lowpart have similar 
problems.





> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> index f79b4c142ae..624a00a4f32 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> @@ -1,5 +1,5 @@
>   /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-fre1" } */
> +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
>   
>   struct A { float x, y; };
>   struct B { struct A u; };
So this may compromise the original intent of this test.  What I would 
suggest instead is to create a new test with the dg-do & dg-options you 
want with a #include "ssa-fre-44.c".

So to move forward.  Let's confirm the path we take through 
extract_low_bits matches expectations and fixup the testsuite change.

Jeff
Li, Pan2 Feb. 28, 2024, 4:51 a.m. UTC | #4
>   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>     return NULL_RTX;
>   if (!targetm.modes_tieable_p (int_mode, mode))
>     return NULL_RTX;

Yes, will return NULL_RTX for in the first if, given src_int_mode is E_DImode while src_mode is 
E_V2SFmode and mode is E_V4QImode. The extract_low_bits convert the modes E_V2SFmode/E_V4QImode 
to E_DImode/E_SImode in advance before tieable checking, validate_subreg and gen_lowpart.

Not sure if my understanding is correct but looks extract_low_bits cannot take care of vector modes 
up to a point because vector modes are always untieable to its' int mode, and then return NULL_RTX.

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Wednesday, February 28, 2024 9:41 AM
To: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; rdapp.gcc@gmail.com; Liu, Hongtao <Hongtao.Liu@intel.com>
Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val

> Pan, can you confirm what path we take through extract_low_bits?

Thanks Jeff for comments, will have a try soon and keep you posted.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Tuesday, February 27, 2024 11:03 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; rdapp.gcc@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val



On 2/26/24 07:22, pan2.li@intel.com wrote:
> From: Pan Li <pan2.li@intel.com>
> 
> We allowed vector type for get_stored_val when read is less than or
> equal to store in previous.  Unfortunately, we missed to adjust the
> validate_subreg part accordingly.  When the vector type's size is
> less than vector register, it will be considered as invalid in the
> validate_subreg.
> 
> Consider the validate_subreg is kind of a can with worms and we are
> in stage 4.  We will fix the issue from the DES side, and make sure
> the subreg is valid for both the read_mode and store_mode before
> perform the real gen_lowpart.
> 
> The below test are passed for this patch:
> 
> * The x86 bootstrap test.
> * The x86 regression test.
> * The riscv regression test.
> * The aarch64 regression test.
> 
> gcc/ChangeLog:
> 
> 	* dse.cc (get_stored_val): Add validate_subreg check before
> 	perform the gen_lowpart for rtl.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/tree-ssa/ssa-fre-44.c: Add compile option to trigger
> 	the ICE.
> 	* gcc.target/riscv/rvv/base/bug-6.c: New test.
> 
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>   gcc/dse.cc                                    |  4 +++-
>   gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c    |  2 +-
>   .../gcc.target/riscv/rvv/base/bug-6.c         | 22 +++++++++++++++++++
>   3 files changed, 26 insertions(+), 2 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
> 
> diff --git a/gcc/dse.cc b/gcc/dse.cc
> index edc7a1dfecf..1596da91da0 100644
> --- a/gcc/dse.cc
> +++ b/gcc/dse.cc
> @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode,
>   				 copy_rtx (store_info->const_rhs));
>     else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
>       && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
> -    && targetm.modes_tieable_p (read_mode, store_mode))
> +    && targetm.modes_tieable_p (read_mode, store_mode)
> +    && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs),
> +			subreg_lowpart_offset (read_mode, store_mode)))
>       read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
>     else
>       read_reg = extract_low_bits (read_mode, store_mode,

So we're just changing whether or not we call gen_lowpart directly or go 
through extract_low_bits, which may in turn generate subreg, call 
gen_lowpart itself and a few other things.

I'm guessing that extract_low_bits is going to return NULL in this case 
via this code (specifically the second test).

>   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>     return NULL_RTX;
>   if (!targetm.modes_tieable_p (int_mode, mode))
>     return NULL_RTX;


Pan, can you confirm what path we take through extract_low_bits?

One might argue that we should just call into extract_low_bits 
unconditionally since it'll ultimately call gen_lowpart when it safely 
can.  The downside is that's a bigger change than I'd like at this stage 
in our development cycle.

I wouldn't be surprised if other direct uses of gen_lowpart have similar 
problems.





> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> index f79b4c142ae..624a00a4f32 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> @@ -1,5 +1,5 @@
>   /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-fre1" } */
> +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
>   
>   struct A { float x, y; };
>   struct B { struct A u; };
So this may compromise the original intent of this test.  What I would 
suggest instead is to create a new test with the dg-do & dg-options you 
want with a #include "ssa-fre-44.c".

So to move forward.  Let's confirm the path we take through 
extract_low_bits matches expectations and fixup the testsuite change.

Jeff
Jeff Law Feb. 28, 2024, 5:33 p.m. UTC | #5
On 2/27/24 21:51, Li, Pan2 wrote:
>>    if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>>      return NULL_RTX;
>>    if (!targetm.modes_tieable_p (int_mode, mode))
>>      return NULL_RTX;
> 
> Yes, will return NULL_RTX for in the first if, given src_int_mode is E_DImode while src_mode is
> E_V2SFmode and mode is E_V4QImode. The extract_low_bits convert the modes E_V2SFmode/E_V4QImode
> to E_DImode/E_SImode in advance before tieable checking, validate_subreg and gen_lowpart.
> 
> Not sure if my understanding is correct but looks extract_low_bits cannot take care of vector modes
> up to a point because vector modes are always untieable to its' int mode, and then return NULL_RTX.
Well, the code tries to turn the vector mode into a suitable integer 
mode via int_mode_for_mode.  That takes a mode, including vector modes 
and tries to find an integer mode of the exact same size.

So it's going to check if V2SF can be tied to DI and V4QI with SI.  I 
suspect those are going to fail for RISC-V as those aren't tieable.

Jeff
Li, Pan2 Feb. 29, 2024, 1:38 a.m. UTC | #6
> So it's going to check if V2SF can be tied to DI and V4QI with SI.  I 
> suspect those are going to fail for RISC-V as those aren't tieable.

Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V.

static bool
riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
{
  /* We don't allow different REG_CLASS modes tieable since it
     will cause ICE in register allocation (RA).
     E.g. V2SI and DI are not tieable.  */
  if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
    return false;
  return (mode1 == mode2
          || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
               && GET_MODE_CLASS (mode2) == MODE_FLOAT));
}

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Thursday, February 29, 2024 1:33 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; rdapp.gcc@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val



On 2/27/24 21:51, Li, Pan2 wrote:
>>    if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>>      return NULL_RTX;
>>    if (!targetm.modes_tieable_p (int_mode, mode))
>>      return NULL_RTX;
> 
> Yes, will return NULL_RTX for in the first if, given src_int_mode is E_DImode while src_mode is
> E_V2SFmode and mode is E_V4QImode. The extract_low_bits convert the modes E_V2SFmode/E_V4QImode
> to E_DImode/E_SImode in advance before tieable checking, validate_subreg and gen_lowpart.
> 
> Not sure if my understanding is correct but looks extract_low_bits cannot take care of vector modes
> up to a point because vector modes are always untieable to its' int mode, and then return NULL_RTX.
Well, the code tries to turn the vector mode into a suitable integer 
mode via int_mode_for_mode.  That takes a mode, including vector modes 
and tries to find an integer mode of the exact same size.

So it's going to check if V2SF can be tied to DI and V4QI with SI.  I 
suspect those are going to fail for RISC-V as those aren't tieable.

Jeff
Robin Dapp Feb. 29, 2024, 1:28 p.m. UTC | #7
On 2/29/24 02:38, Li, Pan2 wrote:
>> So it's going to check if V2SF can be tied to DI and V4QI with SI.  I 
>> suspect those are going to fail for RISC-V as those aren't tieable.
> 
> Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V.
> 
> static bool
> riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
> {
>   /* We don't allow different REG_CLASS modes tieable since it
>      will cause ICE in register allocation (RA).
>      E.g. V2SI and DI are not tieable.  */
>   if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
>     return false;
>   return (mode1 == mode2
>           || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
>                && GET_MODE_CLASS (mode2) == MODE_FLOAT));
> }

Yes, but what we set tieable is e.g. V4QI and V2SF.

I suggested a target band-aid before:

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 799d7919a4a..982ca1a4250 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -8208,6 +8208,11 @@ riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
      E.g. V2SI and DI are not tieable.  */
   if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
     return false;
+  if (GET_MODE_CLASS (GET_MODE_INNER (mode1)) == MODE_INT
+      && GET_MODE_CLASS (GET_MODE_INNER (mode2)) == MODE_FLOAT
+      && GET_MODE_SIZE (GET_MODE_INNER (mode1))
+                       != GET_MODE_SIZE (GET_MODE_INNER (mode2)))
+    return false;
   return (mode1 == mode2
          || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
               && GET_MODE_CLASS (mode2) == MODE_FLOAT));

but I don't like that as it just works around something
that I didn't even understand fully...

Regards
 Robin
Li, Pan2 March 2, 2024, 1:04 a.m. UTC | #8
Yeah, talking about this with robin offline for this fix.

> Yes, but what we set tieable is e.g. V4QI and V2SF.

That comes from different code lines.
Jeff would like to learn more about extract_low_bits, it will first convert to int_mode and then call the tieable_p.
And I bet the V4QI and V2SF comes from the if condition for gen_lowpart.

--- a/gcc/dse.cc
+++ b/gcc/dse.cc
@@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode,
 				 copy_rtx (store_info->const_rhs));
   else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
     && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
-    && targetm.modes_tieable_p (read_mode, store_mode))  // <= V4QI and V2SF here.
+    && targetm.modes_tieable_p (read_mode, store_mode)
+    && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs),
+			subreg_lowpart_offset (read_mode, store_mode)))
     read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
   else
     read_reg = extract_low_bits (read_mode, store_mode,

Pan

-----Original Message-----
From: Robin Dapp <rdapp.gcc@gmail.com> 
Sent: Thursday, February 29, 2024 9:29 PM
To: Li, Pan2 <pan2.li@intel.com>; Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org
Cc: rdapp.gcc@gmail.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val

On 2/29/24 02:38, Li, Pan2 wrote:
>> So it's going to check if V2SF can be tied to DI and V4QI with SI.  I 
>> suspect those are going to fail for RISC-V as those aren't tieable.
> 
> Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V.
> 
> static bool
> riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
> {
>   /* We don't allow different REG_CLASS modes tieable since it
>      will cause ICE in register allocation (RA).
>      E.g. V2SI and DI are not tieable.  */
>   if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
>     return false;
>   return (mode1 == mode2
>           || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
>                && GET_MODE_CLASS (mode2) == MODE_FLOAT));
> }

Yes, but what we set tieable is e.g. V4QI and V2SF.

I suggested a target band-aid before:

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 799d7919a4a..982ca1a4250 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -8208,6 +8208,11 @@ riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
      E.g. V2SI and DI are not tieable.  */
   if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
     return false;
+  if (GET_MODE_CLASS (GET_MODE_INNER (mode1)) == MODE_INT
+      && GET_MODE_CLASS (GET_MODE_INNER (mode2)) == MODE_FLOAT
+      && GET_MODE_SIZE (GET_MODE_INNER (mode1))
+                       != GET_MODE_SIZE (GET_MODE_INNER (mode2)))
+    return false;
   return (mode1 == mode2
          || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
               && GET_MODE_CLASS (mode2) == MODE_FLOAT));

but I don't like that as it just works around something
that I didn't even understand fully...

Regards
 Robin
Jeff Law March 3, 2024, 10:46 p.m. UTC | #9
On 2/29/24 06:28, Robin Dapp wrote:
> On 2/29/24 02:38, Li, Pan2 wrote:
>>> So it's going to check if V2SF can be tied to DI and V4QI with SI.  I
>>> suspect those are going to fail for RISC-V as those aren't tieable.
>>
>> Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V.
>>
>> static bool
>> riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
>> {
>>    /* We don't allow different REG_CLASS modes tieable since it
>>       will cause ICE in register allocation (RA).
>>       E.g. V2SI and DI are not tieable.  */
>>    if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
>>      return false;
>>    return (mode1 == mode2
>>            || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
>>                 && GET_MODE_CLASS (mode2) == MODE_FLOAT));
>> }
> 
> Yes, but what we set tieable is e.g. V4QI and V2SF.
But in the case of a vector modes, we can usually reinterpret the 
underlying bits in whatever mode we want and do any of the usual 
operations on those bits.

In my mind that's fundamentally different than the int vs fp case.  If 
we have an integer value in an FP register, we can't really operate on 
the value in any sensible way without first copying it over to the 
integer register file and vice-versa.

Jeff
Li, Pan2 March 5, 2024, 6:22 a.m. UTC | #10
Thanks Jeff for comments.

> But in the case of a vector modes, we can usually reinterpret the 
> underlying bits in whatever mode we want and do any of the usual 
> operations on those bits.

Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct.
And then the different modes will return by gen_low_part. Unfortunately, there are some modes
 (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg, 
and return NULL_RTX result in the final ICE.

Thus, consider stage 4 I wonder if this is a acceptable fix, aka find some where to filter-out the invalid
modes before goes to gen_low_part.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Monday, March 4, 2024 6:47 AM
To: Robin Dapp <rdapp.gcc@gmail.com>; Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val



On 2/29/24 06:28, Robin Dapp wrote:
> On 2/29/24 02:38, Li, Pan2 wrote:
>>> So it's going to check if V2SF can be tied to DI and V4QI with SI.  I
>>> suspect those are going to fail for RISC-V as those aren't tieable.
>>
>> Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V.
>>
>> static bool
>> riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
>> {
>>    /* We don't allow different REG_CLASS modes tieable since it
>>       will cause ICE in register allocation (RA).
>>       E.g. V2SI and DI are not tieable.  */
>>    if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
>>      return false;
>>    return (mode1 == mode2
>>            || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
>>                 && GET_MODE_CLASS (mode2) == MODE_FLOAT));
>> }
> 
> Yes, but what we set tieable is e.g. V4QI and V2SF.
But in the case of a vector modes, we can usually reinterpret the 
underlying bits in whatever mode we want and do any of the usual 
operations on those bits.

In my mind that's fundamentally different than the int vs fp case.  If 
we have an integer value in an FP register, we can't really operate on 
the value in any sensible way without first copying it over to the 
integer register file and vice-versa.

Jeff
Li, Pan2 March 12, 2024, 2:08 a.m. UTC | #11
Hi Jeff,

Is there any suggestion(s) for how to fix this ICE in the reasonable approach? Thanks a lot.

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Tuesday, March 5, 2024 2:23 PM
To: Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <Hongtao.Liu@intel.com>
Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val

Thanks Jeff for comments.

> But in the case of a vector modes, we can usually reinterpret the 
> underlying bits in whatever mode we want and do any of the usual 
> operations on those bits.

Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct.
And then the different modes will return by gen_low_part. Unfortunately, there are some modes
 (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg, 
and return NULL_RTX result in the final ICE.

Thus, consider stage 4 I wonder if this is a acceptable fix, aka find some where to filter-out the invalid
modes before goes to gen_low_part.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Monday, March 4, 2024 6:47 AM
To: Robin Dapp <rdapp.gcc@gmail.com>; Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val



On 2/29/24 06:28, Robin Dapp wrote:
> On 2/29/24 02:38, Li, Pan2 wrote:
>>> So it's going to check if V2SF can be tied to DI and V4QI with SI.  I
>>> suspect those are going to fail for RISC-V as those aren't tieable.
>>
>> Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V.
>>
>> static bool
>> riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
>> {
>>    /* We don't allow different REG_CLASS modes tieable since it
>>       will cause ICE in register allocation (RA).
>>       E.g. V2SI and DI are not tieable.  */
>>    if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
>>      return false;
>>    return (mode1 == mode2
>>            || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
>>                 && GET_MODE_CLASS (mode2) == MODE_FLOAT));
>> }
> 
> Yes, but what we set tieable is e.g. V4QI and V2SF.
But in the case of a vector modes, we can usually reinterpret the 
underlying bits in whatever mode we want and do any of the usual 
operations on those bits.

In my mind that's fundamentally different than the int vs fp case.  If 
we have an integer value in an FP register, we can't really operate on 
the value in any sensible way without first copying it over to the 
integer register file and vice-versa.

Jeff
Li, Pan2 March 22, 2024, 1:15 a.m. UTC | #12
Sorry for disturbing, kindly ping for this ICE.

Pan

-----Original Message-----
From: Li, Pan2 <pan2.li@intel.com> 
Sent: Tuesday, March 12, 2024 10:09 AM
To: Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com>
Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val

Hi Jeff,

Is there any suggestion(s) for how to fix this ICE in the reasonable approach? Thanks a lot.

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Tuesday, March 5, 2024 2:23 PM
To: Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <Hongtao.Liu@intel.com>
Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val

Thanks Jeff for comments.

> But in the case of a vector modes, we can usually reinterpret the 
> underlying bits in whatever mode we want and do any of the usual 
> operations on those bits.

Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct.
And then the different modes will return by gen_low_part. Unfortunately, there are some modes
 (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg, 
and return NULL_RTX result in the final ICE.

Thus, consider stage 4 I wonder if this is a acceptable fix, aka find some where to filter-out the invalid
modes before goes to gen_low_part.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Monday, March 4, 2024 6:47 AM
To: Robin Dapp <rdapp.gcc@gmail.com>; Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val



On 2/29/24 06:28, Robin Dapp wrote:
> On 2/29/24 02:38, Li, Pan2 wrote:
>>> So it's going to check if V2SF can be tied to DI and V4QI with SI.  I
>>> suspect those are going to fail for RISC-V as those aren't tieable.
>>
>> Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V.
>>
>> static bool
>> riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
>> {
>>    /* We don't allow different REG_CLASS modes tieable since it
>>       will cause ICE in register allocation (RA).
>>       E.g. V2SI and DI are not tieable.  */
>>    if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
>>      return false;
>>    return (mode1 == mode2
>>            || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
>>                 && GET_MODE_CLASS (mode2) == MODE_FLOAT));
>> }
> 
> Yes, but what we set tieable is e.g. V4QI and V2SF.
But in the case of a vector modes, we can usually reinterpret the 
underlying bits in whatever mode we want and do any of the usual 
operations on those bits.

In my mind that's fundamentally different than the int vs fp case.  If 
we have an integer value in an FP register, we can't really operate on 
the value in any sensible way without first copying it over to the 
integer register file and vice-versa.

Jeff
Jeff Law March 22, 2024, 6:53 p.m. UTC | #13
On 3/4/24 11:22 PM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> But in the case of a vector modes, we can usually reinterpret the
>> underlying bits in whatever mode we want and do any of the usual
>> operations on those bits.
> 
> Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct.
> And then the different modes will return by gen_low_part. Unfortunately, there are some modes
>   (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg,
> and return NULL_RTX result in the final ICE.
That doesn't make a lot of sense to me.  Even for vlen=128 I would have 
expected that we can still use a subreg to access low bits.  After all 
we might have had a V16QI vector and done a reduction of some sort 
storing the result in the first element and we have to be able to 
extract that result and move it around.

I'm not real keen on a target workaround.  While extremely safe, I 
wouldn't be surprised if other ports could trigger the ICE and we'd end 
up patching up multiple targets for what is, IMHO, a more generic issue.

As Richi noted using validate_subreg here isn't great.  Does it work to 
factor out this code from extract_low_bits:


>   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
>       || !int_mode_for_mode (mode).exists (&int_mode))
>     return NULL_RTX;
> 
>   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>     return NULL_RTX;
>   if (!targetm.modes_tieable_p (int_mode, mode))
>     return NULL_RTX;

And use that in the condition (and in extract_low_bits rather than 
duplicating the code)?

jeff

ps.  No need to apologize for the pings.  This completely fell off my radar.
Li, Pan2 March 23, 2024, 5:45 a.m. UTC | #14
Thanks Jeff for comments.

> As Richi noted using validate_subreg here isn't great.  Does it work to 
> factor out this code from extract_low_bits
>
>>   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
>>       || !int_mode_for_mode (mode).exists (&int_mode))
>>     return NULL_RTX;
>> 
>>   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>>     return NULL_RTX;
>>   if (!targetm.modes_tieable_p (int_mode, mode))
>>     return NULL_RTX;

> And use that in the condition (and in extract_low_bits rather than 
> duplicating the code)?

It can solve the ICE but will forbid all vector modes goes gen_lowpart.
Actually only the vector mode size is less than reg nature size will trigger the ICE.
Thus, how about just add one more condition before goes to gen_lowpart as below?

Feel free to correct me if any misunderstandings. 😉!

diff --git a/gcc/dse.cc b/gcc/dse.cc
index edc7a1dfecf..258d2ccc299 100644
--- a/gcc/dse.cc
+++ b/gcc/dse.cc
@@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode,
                                 copy_rtx (store_info->const_rhs));
   else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
     && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
-    && targetm.modes_tieable_p (read_mode, store_mode))
+    && targetm.modes_tieable_p (read_mode, store_mode)
+    /* It's invalid in validate_subreg if read_mode size is < reg natural.  */
+    && known_ge (GET_MODE_SIZE (read_mode), REGMODE_NATURAL_SIZE (read_mode)))
     read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
   else
     read_reg = extract_low_bits (read_mode, store_mode,

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Saturday, March 23, 2024 2:54 AM
To: Li, Pan2 <pan2.li@intel.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val



On 3/4/24 11:22 PM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> But in the case of a vector modes, we can usually reinterpret the
>> underlying bits in whatever mode we want and do any of the usual
>> operations on those bits.
> 
> Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct.
> And then the different modes will return by gen_low_part. Unfortunately, there are some modes
>   (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg,
> and return NULL_RTX result in the final ICE.
That doesn't make a lot of sense to me.  Even for vlen=128 I would have 
expected that we can still use a subreg to access low bits.  After all 
we might have had a V16QI vector and done a reduction of some sort 
storing the result in the first element and we have to be able to 
extract that result and move it around.

I'm not real keen on a target workaround.  While extremely safe, I 
wouldn't be surprised if other ports could trigger the ICE and we'd end 
up patching up multiple targets for what is, IMHO, a more generic issue.

As Richi noted using validate_subreg here isn't great.  Does it work to 
factor out this code from extract_low_bits:


>   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
>       || !int_mode_for_mode (mode).exists (&int_mode))
>     return NULL_RTX;
> 
>   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>     return NULL_RTX;
>   if (!targetm.modes_tieable_p (int_mode, mode))
>     return NULL_RTX;

And use that in the condition (and in extract_low_bits rather than 
duplicating the code)?

jeff

ps.  No need to apologize for the pings.  This completely fell off my radar.
Li, Pan2 April 6, 2024, 12:02 p.m. UTC | #15
Kindly ping for this ice.

Pan

-----Original Message-----
From: Li, Pan2 <pan2.li@intel.com> 
Sent: Saturday, March 23, 2024 1:45 PM
To: Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com>
Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val

Thanks Jeff for comments.

> As Richi noted using validate_subreg here isn't great.  Does it work to 
> factor out this code from extract_low_bits
>
>>   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
>>       || !int_mode_for_mode (mode).exists (&int_mode))
>>     return NULL_RTX;
>> 
>>   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>>     return NULL_RTX;
>>   if (!targetm.modes_tieable_p (int_mode, mode))
>>     return NULL_RTX;

> And use that in the condition (and in extract_low_bits rather than 
> duplicating the code)?

It can solve the ICE but will forbid all vector modes goes gen_lowpart.
Actually only the vector mode size is less than reg nature size will trigger the ICE.
Thus, how about just add one more condition before goes to gen_lowpart as below?

Feel free to correct me if any misunderstandings. 😉!

diff --git a/gcc/dse.cc b/gcc/dse.cc
index edc7a1dfecf..258d2ccc299 100644
--- a/gcc/dse.cc
+++ b/gcc/dse.cc
@@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode,
                                 copy_rtx (store_info->const_rhs));
   else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
     && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
-    && targetm.modes_tieable_p (read_mode, store_mode))
+    && targetm.modes_tieable_p (read_mode, store_mode)
+    /* It's invalid in validate_subreg if read_mode size is < reg natural.  */
+    && known_ge (GET_MODE_SIZE (read_mode), REGMODE_NATURAL_SIZE (read_mode)))
     read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
   else
     read_reg = extract_low_bits (read_mode, store_mode,

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Saturday, March 23, 2024 2:54 AM
To: Li, Pan2 <pan2.li@intel.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val



On 3/4/24 11:22 PM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> But in the case of a vector modes, we can usually reinterpret the
>> underlying bits in whatever mode we want and do any of the usual
>> operations on those bits.
> 
> Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct.
> And then the different modes will return by gen_low_part. Unfortunately, there are some modes
>   (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg,
> and return NULL_RTX result in the final ICE.
That doesn't make a lot of sense to me.  Even for vlen=128 I would have 
expected that we can still use a subreg to access low bits.  After all 
we might have had a V16QI vector and done a reduction of some sort 
storing the result in the first element and we have to be able to 
extract that result and move it around.

I'm not real keen on a target workaround.  While extremely safe, I 
wouldn't be surprised if other ports could trigger the ICE and we'd end 
up patching up multiple targets for what is, IMHO, a more generic issue.

As Richi noted using validate_subreg here isn't great.  Does it work to 
factor out this code from extract_low_bits:


>   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
>       || !int_mode_for_mode (mode).exists (&int_mode))
>     return NULL_RTX;
> 
>   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>     return NULL_RTX;
>   if (!targetm.modes_tieable_p (int_mode, mode))
>     return NULL_RTX;

And use that in the condition (and in extract_low_bits rather than 
duplicating the code)?

jeff

ps.  No need to apologize for the pings.  This completely fell off my radar.
Li, Pan2 April 18, 2024, 1:46 a.m. UTC | #16
Kindly ping^ for this ice fix.

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Saturday, April 6, 2024 8:02 PM
To: Li, Pan2 <pan2.li@intel.com>; Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <Hongtao.Liu@intel.com>
Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val

Kindly ping for this ice.

Pan

-----Original Message-----
From: Li, Pan2 <pan2.li@intel.com> 
Sent: Saturday, March 23, 2024 1:45 PM
To: Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com>
Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val

Thanks Jeff for comments.

> As Richi noted using validate_subreg here isn't great.  Does it work to 
> factor out this code from extract_low_bits
>
>>   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
>>       || !int_mode_for_mode (mode).exists (&int_mode))
>>     return NULL_RTX;
>> 
>>   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>>     return NULL_RTX;
>>   if (!targetm.modes_tieable_p (int_mode, mode))
>>     return NULL_RTX;

> And use that in the condition (and in extract_low_bits rather than 
> duplicating the code)?

It can solve the ICE but will forbid all vector modes goes gen_lowpart.
Actually only the vector mode size is less than reg nature size will trigger the ICE.
Thus, how about just add one more condition before goes to gen_lowpart as below?

Feel free to correct me if any misunderstandings. 😉!

diff --git a/gcc/dse.cc b/gcc/dse.cc
index edc7a1dfecf..258d2ccc299 100644
--- a/gcc/dse.cc
+++ b/gcc/dse.cc
@@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode,
                                 copy_rtx (store_info->const_rhs));
   else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
     && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
-    && targetm.modes_tieable_p (read_mode, store_mode))
+    && targetm.modes_tieable_p (read_mode, store_mode)
+    /* It's invalid in validate_subreg if read_mode size is < reg natural.  */
+    && known_ge (GET_MODE_SIZE (read_mode), REGMODE_NATURAL_SIZE (read_mode)))
     read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
   else
     read_reg = extract_low_bits (read_mode, store_mode,

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Saturday, March 23, 2024 2:54 AM
To: Li, Pan2 <pan2.li@intel.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val



On 3/4/24 11:22 PM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> But in the case of a vector modes, we can usually reinterpret the
>> underlying bits in whatever mode we want and do any of the usual
>> operations on those bits.
> 
> Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct.
> And then the different modes will return by gen_low_part. Unfortunately, there are some modes
>   (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg,
> and return NULL_RTX result in the final ICE.
That doesn't make a lot of sense to me.  Even for vlen=128 I would have 
expected that we can still use a subreg to access low bits.  After all 
we might have had a V16QI vector and done a reduction of some sort 
storing the result in the first element and we have to be able to 
extract that result and move it around.

I'm not real keen on a target workaround.  While extremely safe, I 
wouldn't be surprised if other ports could trigger the ICE and we'd end 
up patching up multiple targets for what is, IMHO, a more generic issue.

As Richi noted using validate_subreg here isn't great.  Does it work to 
factor out this code from extract_low_bits:


>   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
>       || !int_mode_for_mode (mode).exists (&int_mode))
>     return NULL_RTX;
> 
>   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>     return NULL_RTX;
>   if (!targetm.modes_tieable_p (int_mode, mode))
>     return NULL_RTX;

And use that in the condition (and in extract_low_bits rather than 
duplicating the code)?

jeff

ps.  No need to apologize for the pings.  This completely fell off my radar.
diff mbox series

Patch

diff --git a/gcc/dse.cc b/gcc/dse.cc
index edc7a1dfecf..1596da91da0 100644
--- a/gcc/dse.cc
+++ b/gcc/dse.cc
@@ -1946,7 +1946,9 @@  get_stored_val (store_info *store_info, machine_mode read_mode,
 				 copy_rtx (store_info->const_rhs));
   else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
     && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
-    && targetm.modes_tieable_p (read_mode, store_mode))
+    && targetm.modes_tieable_p (read_mode, store_mode)
+    && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs),
+			subreg_lowpart_offset (read_mode, store_mode)))
     read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
   else
     read_reg = extract_low_bits (read_mode, store_mode,
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
index f79b4c142ae..624a00a4f32 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-fre1" } */
+/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
 
 struct A { float x, y; };
 struct B { struct A u; };
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
new file mode 100644
index 00000000000..5bb00b8f587
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
@@ -0,0 +1,22 @@ 
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
+
+struct A { float x, y; };
+struct B { struct A u; };
+
+extern void bar (struct A *);
+
+float
+f3 (struct B *x, int y)
+{
+  struct A p = {1.0f, 2.0f};
+  struct A *q = &x[y].u;
+
+  __builtin_memcpy (&q->x, &p.x, sizeof (float));
+  __builtin_memcpy (&q->y, &p.y, sizeof (float));
+
+  bar (&p);
+
+  return x[y].u.x + x[y].u.y;
+}