diff mbox series

[v2] DSE: Allow vector type for get_stored_val when read < store

Message ID 20231109060858.3067686-1-pan2.li@intel.com
State New
Headers show
Series [v2] DSE: Allow vector type for get_stored_val when read < store | expand

Commit Message

Li, Pan2 Nov. 9, 2023, 6:08 a.m. UTC
From: Pan Li <pan2.li@intel.com>

Update in v2:
* Move vector type support to get_stored_val.

Original log:

This patch would like to allow the vector mode in the
get_stored_val in the DSE. It is valid for the read
rtx if and only if the read bitsize is less than the
stored bitsize.

Given below example code with
--param=riscv-autovec-preference=fixed-vlmax.

vuint8m1_t test () {
  uint8_t arr[32] = {
    1, 2, 7, 1, 3, 4, 5, 3, 1, 0, 1, 2, 4, 4, 9, 9,
    1, 2, 7, 1, 3, 4, 5, 3, 1, 0, 1, 2, 4, 4, 9, 9,
  };

  return __riscv_vle8_v_u8m1(arr, 32);
}

Before this patch:
test:
  lui     a5,%hi(.LANCHOR0)
  addi    sp,sp,-32
  addi    a5,a5,%lo(.LANCHOR0)
  li      a3,32
  vl2re64.v       v2,0(a5)
  vsetvli zero,a3,e8,m1,ta,ma
  vs2r.v  v2,0(sp)             <== Unnecessary store to stack
  vle8.v  v1,0(sp)             <== Ditto
  vs1r.v  v1,0(a0)
  addi    sp,sp,32
  jr      ra

After this patch:
test:
  lui     a5,%hi(.LANCHOR0)
  addi    a5,a5,%lo(.LANCHOR0)
  li      a4,32
  addi    sp,sp,-32
  vsetvli zero,a4,e8,m1,ta,ma
  vle8.v  v1,0(a5)
  vs1r.v  v1,0(a0)
  addi    sp,sp,32
  jr      ra

Below tests are passed within this patch:

* The x86 bootstrap and regression test.
* The aarch64 regression test.
* The risc-v regression test.

	PR target/111720

gcc/ChangeLog:

	* dse.cc (get_stored_val): Allow vector mode if the read
	bitsize is less than stored bitsize.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/base/pr111720-0.c: New test.
	* gcc.target/riscv/rvv/base/pr111720-1.c: New test.
	* gcc.target/riscv/rvv/base/pr111720-10.c: New test.
	* gcc.target/riscv/rvv/base/pr111720-2.c: New test.
	* gcc.target/riscv/rvv/base/pr111720-3.c: New test.
	* gcc.target/riscv/rvv/base/pr111720-4.c: New test.
	* gcc.target/riscv/rvv/base/pr111720-5.c: New test.
	* gcc.target/riscv/rvv/base/pr111720-6.c: New test.
	* gcc.target/riscv/rvv/base/pr111720-7.c: New test.
	* gcc.target/riscv/rvv/base/pr111720-8.c: New test.
	* gcc.target/riscv/rvv/base/pr111720-9.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/dse.cc                                    |  4 ++++
 .../gcc.target/riscv/rvv/base/pr111720-0.c    | 18 ++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr111720-1.c    | 18 ++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr111720-10.c   | 18 ++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr111720-2.c    | 18 ++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr111720-3.c    | 18 ++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr111720-4.c    | 18 ++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr111720-5.c    | 18 ++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr111720-6.c    | 18 ++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr111720-7.c    | 21 +++++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr111720-8.c    | 18 ++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr111720-9.c    | 15 +++++++++++++
 12 files changed, 202 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c

Comments

Jeff Law Nov. 9, 2023, 4:16 p.m. UTC | #1
On 11/8/23 23:08, pan2.li@intel.com wrote:
> From: Pan Li <pan2.li@intel.com>
> 
> Update in v2:
> * Move vector type support to get_stored_val.
> 
> Original log:
> 
> This patch would like to allow the vector mode in the
> get_stored_val in the DSE. It is valid for the read
> rtx if and only if the read bitsize is less than the
> stored bitsize.
> 
> Given below example code with
> --param=riscv-autovec-preference=fixed-vlmax.
> 
> vuint8m1_t test () {
>    uint8_t arr[32] = {
>      1, 2, 7, 1, 3, 4, 5, 3, 1, 0, 1, 2, 4, 4, 9, 9,
>      1, 2, 7, 1, 3, 4, 5, 3, 1, 0, 1, 2, 4, 4, 9, 9,
>    };
> 
>    return __riscv_vle8_v_u8m1(arr, 32);
> }
> 
> Before this patch:
> test:
>    lui     a5,%hi(.LANCHOR0)
>    addi    sp,sp,-32
>    addi    a5,a5,%lo(.LANCHOR0)
>    li      a3,32
>    vl2re64.v       v2,0(a5)
>    vsetvli zero,a3,e8,m1,ta,ma
>    vs2r.v  v2,0(sp)             <== Unnecessary store to stack
>    vle8.v  v1,0(sp)             <== Ditto
>    vs1r.v  v1,0(a0)
>    addi    sp,sp,32
>    jr      ra
> 
> After this patch:
> test:
>    lui     a5,%hi(.LANCHOR0)
>    addi    a5,a5,%lo(.LANCHOR0)
>    li      a4,32
>    addi    sp,sp,-32
>    vsetvli zero,a4,e8,m1,ta,ma
>    vle8.v  v1,0(a5)
>    vs1r.v  v1,0(a0)
>    addi    sp,sp,32
>    jr      ra
> 
> Below tests are passed within this patch:
> 
> * The x86 bootstrap and regression test.
> * The aarch64 regression test.
> * The risc-v regression test.
> 
> 	PR target/111720
> 
> gcc/ChangeLog:
> 
> 	* dse.cc (get_stored_val): Allow vector mode if the read
> 	bitsize is less than stored bitsize.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/rvv/base/pr111720-0.c: New test.
> 	* gcc.target/riscv/rvv/base/pr111720-1.c: New test.
> 	* gcc.target/riscv/rvv/base/pr111720-10.c: New test.
> 	* gcc.target/riscv/rvv/base/pr111720-2.c: New test.
> 	* gcc.target/riscv/rvv/base/pr111720-3.c: New test.
> 	* gcc.target/riscv/rvv/base/pr111720-4.c: New test.
> 	* gcc.target/riscv/rvv/base/pr111720-5.c: New test.
> 	* gcc.target/riscv/rvv/base/pr111720-6.c: New test.
> 	* gcc.target/riscv/rvv/base/pr111720-7.c: New test.
> 	* gcc.target/riscv/rvv/base/pr111720-8.c: New test.
> 	* gcc.target/riscv/rvv/base/pr111720-9.c: New test.
We're always getting the lowpart here AFAICT and it appears that all the 
right thing should happen if gen_lowpart_common fails (it returns NULL, 
which bubbles up and is the right return value from get_stored_val if it 
can't be optimized).

Did you want to use known_le so that you'd pick up the case when the two 
modes are the same size?  Or was known_lt the test you really wanted 
(and if so, why).


OK using known_lt, or known_le.  If you decide to change to known_le, 
you'll need to bootstrap & regression test again on x86.



jeff
Richard Sandiford Nov. 11, 2023, 3:23 p.m. UTC | #2
Jeff Law <jeffreyalaw@gmail.com> writes:
> On 11/8/23 23:08, pan2.li@intel.com wrote:
>> From: Pan Li <pan2.li@intel.com>
>> 
>> Update in v2:
>> * Move vector type support to get_stored_val.
>> 
>> Original log:
>> 
>> This patch would like to allow the vector mode in the
>> get_stored_val in the DSE. It is valid for the read
>> rtx if and only if the read bitsize is less than the
>> stored bitsize.
>> 
>> Given below example code with
>> --param=riscv-autovec-preference=fixed-vlmax.
>> 
>> vuint8m1_t test () {
>>    uint8_t arr[32] = {
>>      1, 2, 7, 1, 3, 4, 5, 3, 1, 0, 1, 2, 4, 4, 9, 9,
>>      1, 2, 7, 1, 3, 4, 5, 3, 1, 0, 1, 2, 4, 4, 9, 9,
>>    };
>> 
>>    return __riscv_vle8_v_u8m1(arr, 32);
>> }
>> 
>> Before this patch:
>> test:
>>    lui     a5,%hi(.LANCHOR0)
>>    addi    sp,sp,-32
>>    addi    a5,a5,%lo(.LANCHOR0)
>>    li      a3,32
>>    vl2re64.v       v2,0(a5)
>>    vsetvli zero,a3,e8,m1,ta,ma
>>    vs2r.v  v2,0(sp)             <== Unnecessary store to stack
>>    vle8.v  v1,0(sp)             <== Ditto
>>    vs1r.v  v1,0(a0)
>>    addi    sp,sp,32
>>    jr      ra
>> 
>> After this patch:
>> test:
>>    lui     a5,%hi(.LANCHOR0)
>>    addi    a5,a5,%lo(.LANCHOR0)
>>    li      a4,32
>>    addi    sp,sp,-32
>>    vsetvli zero,a4,e8,m1,ta,ma
>>    vle8.v  v1,0(a5)
>>    vs1r.v  v1,0(a0)
>>    addi    sp,sp,32
>>    jr      ra
>> 
>> Below tests are passed within this patch:
>> 
>> * The x86 bootstrap and regression test.
>> * The aarch64 regression test.
>> * The risc-v regression test.
>> 
>> 	PR target/111720
>> 
>> gcc/ChangeLog:
>> 
>> 	* dse.cc (get_stored_val): Allow vector mode if the read
>> 	bitsize is less than stored bitsize.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>> 	* gcc.target/riscv/rvv/base/pr111720-0.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-1.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-10.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-2.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-3.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-4.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-5.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-6.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-7.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-8.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-9.c: New test.
> We're always getting the lowpart here AFAICT and it appears that all the 
> right thing should happen if gen_lowpart_common fails (it returns NULL, 
> which bubbles up and is the right return value from get_stored_val if it 
> can't be optimized).

Yeah, we should always be operating on the lowpart, but it looks
like there's a latent bug.  This check:

  if (gap.is_constant () && maybe_ne (gap, 0))
    {
      ...
    }
  else ...

means that we ignore the gap if it's a nonzero runtime value.
I guess it should be:

  if (maybe_ne (gap, 0))
    {
      if (!gap.is_constant ())
        return NULL_RTX;
      ...
    }

instead.  Alternatively, we could remove the is_constant condition
and fix PR87815 in a different way, e.g. by protecting the
smallest_int_mode_for_size with a tighter condition.  That might
allow a similar DSE optimisation to this patch for nonzero offsets,
thanks to:

      if (multiple_p (shift, GET_MODE_BITSIZE (new_mode))
	  && known_le (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode)))
	{
	  /* Try to implement the shift using a subreg.  */
          ...

> Did you want to use known_le so that you'd pick up the case when the two 
> modes are the same size?  Or was known_lt the test you really wanted 
> (and if so, why).

Agree it should be known_le FWIW.

Thanks,
Richard
Li, Pan2 Nov. 12, 2023, 2:30 a.m. UTC | #3
Thanks Richard S and Jeff for comments.

> Did you want to use known_le so that you'd pick up the case when the two 
> modes are the same size?  Or was known_lt the test you really wanted 
> (and if so, why).

Take known_lt in v2 due to consideration that leave the equal go to original code path.
Just have a try for known_le and got sorts of ICE when test, I bet it may be related to the
latent bug as Richard S mentioned.

> instead.  Alternatively, we could remove the is_constant condition
> and fix PR87815 in a different way, e.g. by protecting the
> smallest_int_mode_for_size with a tighter condition.  That might
> allow a similar DSE optimisation to this patch for nonzero offsets,
> thanks to:

Thus, looks like we should fix the PR87815 from the way suggested by Richard S, before
we take known_le for vector here.

I will have a try soon and keep you posted.

Pan

-----Original Message-----
From: Richard Sandiford <richard.sandiford@arm.com> 
Sent: Saturday, November 11, 2023 11:23 PM
To: Jeff Law <jeffreyalaw@gmail.com>
Cc: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng@gmail.com; richard.guenther@gmail.com
Subject: Re: [PATCH v2] DSE: Allow vector type for get_stored_val when read < store

Jeff Law <jeffreyalaw@gmail.com> writes:
> On 11/8/23 23:08, pan2.li@intel.com wrote:
>> From: Pan Li <pan2.li@intel.com>
>> 
>> Update in v2:
>> * Move vector type support to get_stored_val.
>> 
>> Original log:
>> 
>> This patch would like to allow the vector mode in the
>> get_stored_val in the DSE. It is valid for the read
>> rtx if and only if the read bitsize is less than the
>> stored bitsize.
>> 
>> Given below example code with
>> --param=riscv-autovec-preference=fixed-vlmax.
>> 
>> vuint8m1_t test () {
>>    uint8_t arr[32] = {
>>      1, 2, 7, 1, 3, 4, 5, 3, 1, 0, 1, 2, 4, 4, 9, 9,
>>      1, 2, 7, 1, 3, 4, 5, 3, 1, 0, 1, 2, 4, 4, 9, 9,
>>    };
>> 
>>    return __riscv_vle8_v_u8m1(arr, 32);
>> }
>> 
>> Before this patch:
>> test:
>>    lui     a5,%hi(.LANCHOR0)
>>    addi    sp,sp,-32
>>    addi    a5,a5,%lo(.LANCHOR0)
>>    li      a3,32
>>    vl2re64.v       v2,0(a5)
>>    vsetvli zero,a3,e8,m1,ta,ma
>>    vs2r.v  v2,0(sp)             <== Unnecessary store to stack
>>    vle8.v  v1,0(sp)             <== Ditto
>>    vs1r.v  v1,0(a0)
>>    addi    sp,sp,32
>>    jr      ra
>> 
>> After this patch:
>> test:
>>    lui     a5,%hi(.LANCHOR0)
>>    addi    a5,a5,%lo(.LANCHOR0)
>>    li      a4,32
>>    addi    sp,sp,-32
>>    vsetvli zero,a4,e8,m1,ta,ma
>>    vle8.v  v1,0(a5)
>>    vs1r.v  v1,0(a0)
>>    addi    sp,sp,32
>>    jr      ra
>> 
>> Below tests are passed within this patch:
>> 
>> * The x86 bootstrap and regression test.
>> * The aarch64 regression test.
>> * The risc-v regression test.
>> 
>> 	PR target/111720
>> 
>> gcc/ChangeLog:
>> 
>> 	* dse.cc (get_stored_val): Allow vector mode if the read
>> 	bitsize is less than stored bitsize.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>> 	* gcc.target/riscv/rvv/base/pr111720-0.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-1.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-10.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-2.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-3.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-4.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-5.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-6.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-7.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-8.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-9.c: New test.
> We're always getting the lowpart here AFAICT and it appears that all the 
> right thing should happen if gen_lowpart_common fails (it returns NULL, 
> which bubbles up and is the right return value from get_stored_val if it 
> can't be optimized).

Yeah, we should always be operating on the lowpart, but it looks
like there's a latent bug.  This check:

  if (gap.is_constant () && maybe_ne (gap, 0))
    {
      ...
    }
  else ...

means that we ignore the gap if it's a nonzero runtime value.
I guess it should be:

  if (maybe_ne (gap, 0))
    {
      if (!gap.is_constant ())
        return NULL_RTX;
      ...
    }

instead.  Alternatively, we could remove the is_constant condition
and fix PR87815 in a different way, e.g. by protecting the
smallest_int_mode_for_size with a tighter condition.  That might
allow a similar DSE optimisation to this patch for nonzero offsets,
thanks to:

      if (multiple_p (shift, GET_MODE_BITSIZE (new_mode))
	  && known_le (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode)))
	{
	  /* Try to implement the shift using a subreg.  */
          ...

> Did you want to use known_le so that you'd pick up the case when the two 
> modes are the same size?  Or was known_lt the test you really wanted 
> (and if so, why).

Agree it should be known_le FWIW.

Thanks,
Richard
Li, Pan2 Nov. 13, 2023, 3:25 a.m. UTC | #4
Update v4 in below link, please help to ignore v3.

https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636216.html

Sorry for inconvenience.

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Sunday, November 12, 2023 10:31 AM
To: Richard Sandiford <richard.sandiford@arm.com>; Jeff Law <jeffreyalaw@gmail.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng@gmail.com; richard.guenther@gmail.com
Subject: RE: [PATCH v2] DSE: Allow vector type for get_stored_val when read < store

Thanks Richard S and Jeff for comments.

> Did you want to use known_le so that you'd pick up the case when the two 
> modes are the same size?  Or was known_lt the test you really wanted 
> (and if so, why).

Take known_lt in v2 due to consideration that leave the equal go to original code path.
Just have a try for known_le and got sorts of ICE when test, I bet it may be related to the
latent bug as Richard S mentioned.

> instead.  Alternatively, we could remove the is_constant condition
> and fix PR87815 in a different way, e.g. by protecting the
> smallest_int_mode_for_size with a tighter condition.  That might
> allow a similar DSE optimisation to this patch for nonzero offsets,
> thanks to:

Thus, looks like we should fix the PR87815 from the way suggested by Richard S, before
we take known_le for vector here.

I will have a try soon and keep you posted.

Pan

-----Original Message-----
From: Richard Sandiford <richard.sandiford@arm.com> 
Sent: Saturday, November 11, 2023 11:23 PM
To: Jeff Law <jeffreyalaw@gmail.com>
Cc: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng@gmail.com; richard.guenther@gmail.com
Subject: Re: [PATCH v2] DSE: Allow vector type for get_stored_val when read < store

Jeff Law <jeffreyalaw@gmail.com> writes:
> On 11/8/23 23:08, pan2.li@intel.com wrote:
>> From: Pan Li <pan2.li@intel.com>
>> 
>> Update in v2:
>> * Move vector type support to get_stored_val.
>> 
>> Original log:
>> 
>> This patch would like to allow the vector mode in the
>> get_stored_val in the DSE. It is valid for the read
>> rtx if and only if the read bitsize is less than the
>> stored bitsize.
>> 
>> Given below example code with
>> --param=riscv-autovec-preference=fixed-vlmax.
>> 
>> vuint8m1_t test () {
>>    uint8_t arr[32] = {
>>      1, 2, 7, 1, 3, 4, 5, 3, 1, 0, 1, 2, 4, 4, 9, 9,
>>      1, 2, 7, 1, 3, 4, 5, 3, 1, 0, 1, 2, 4, 4, 9, 9,
>>    };
>> 
>>    return __riscv_vle8_v_u8m1(arr, 32);
>> }
>> 
>> Before this patch:
>> test:
>>    lui     a5,%hi(.LANCHOR0)
>>    addi    sp,sp,-32
>>    addi    a5,a5,%lo(.LANCHOR0)
>>    li      a3,32
>>    vl2re64.v       v2,0(a5)
>>    vsetvli zero,a3,e8,m1,ta,ma
>>    vs2r.v  v2,0(sp)             <== Unnecessary store to stack
>>    vle8.v  v1,0(sp)             <== Ditto
>>    vs1r.v  v1,0(a0)
>>    addi    sp,sp,32
>>    jr      ra
>> 
>> After this patch:
>> test:
>>    lui     a5,%hi(.LANCHOR0)
>>    addi    a5,a5,%lo(.LANCHOR0)
>>    li      a4,32
>>    addi    sp,sp,-32
>>    vsetvli zero,a4,e8,m1,ta,ma
>>    vle8.v  v1,0(a5)
>>    vs1r.v  v1,0(a0)
>>    addi    sp,sp,32
>>    jr      ra
>> 
>> Below tests are passed within this patch:
>> 
>> * The x86 bootstrap and regression test.
>> * The aarch64 regression test.
>> * The risc-v regression test.
>> 
>> 	PR target/111720
>> 
>> gcc/ChangeLog:
>> 
>> 	* dse.cc (get_stored_val): Allow vector mode if the read
>> 	bitsize is less than stored bitsize.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>> 	* gcc.target/riscv/rvv/base/pr111720-0.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-1.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-10.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-2.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-3.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-4.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-5.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-6.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-7.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-8.c: New test.
>> 	* gcc.target/riscv/rvv/base/pr111720-9.c: New test.
> We're always getting the lowpart here AFAICT and it appears that all the 
> right thing should happen if gen_lowpart_common fails (it returns NULL, 
> which bubbles up and is the right return value from get_stored_val if it 
> can't be optimized).

Yeah, we should always be operating on the lowpart, but it looks
like there's a latent bug.  This check:

  if (gap.is_constant () && maybe_ne (gap, 0))
    {
      ...
    }
  else ...

means that we ignore the gap if it's a nonzero runtime value.
I guess it should be:

  if (maybe_ne (gap, 0))
    {
      if (!gap.is_constant ())
        return NULL_RTX;
      ...
    }

instead.  Alternatively, we could remove the is_constant condition
and fix PR87815 in a different way, e.g. by protecting the
smallest_int_mode_for_size with a tighter condition.  That might
allow a similar DSE optimisation to this patch for nonzero offsets,
thanks to:

      if (multiple_p (shift, GET_MODE_BITSIZE (new_mode))
	  && known_le (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode)))
	{
	  /* Try to implement the shift using a subreg.  */
          ...

> Did you want to use known_le so that you'd pick up the case when the two 
> modes are the same size?  Or was known_lt the test you really wanted 
> (and if so, why).

Agree it should be known_le FWIW.

Thanks,
Richard
diff mbox series

Patch

diff --git a/gcc/dse.cc b/gcc/dse.cc
index 1a85dae1f8c..21004becd4a 100644
--- a/gcc/dse.cc
+++ b/gcc/dse.cc
@@ -1940,6 +1940,10 @@  get_stored_val (store_info *store_info, machine_mode read_mode,
 	       || GET_MODE_CLASS (read_mode) != GET_MODE_CLASS (store_mode)))
     read_reg = extract_low_bits (read_mode, store_mode,
 				 copy_rtx (store_info->const_rhs));
+  else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
+    && known_lt (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
+    && targetm.modes_tieable_p (read_mode, store_mode))
+    read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
   else
     read_reg = extract_low_bits (read_mode, store_mode,
 				 copy_rtx (store_info->rhs));
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c
new file mode 100644
index 00000000000..a61e94a6d98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vuint8m1_t test () {
+  uint8_t arr[32] = {
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+  };
+
+  return __riscv_vle8_v_u8m1(arr, 32);
+}
+
+/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
+/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c
new file mode 100644
index 00000000000..46efd7379ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vuint8m2_t test () {
+  uint8_t arr[32] = {
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+  };
+
+  return __riscv_vle8_v_u8m2(arr, 32);
+}
+
+/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
+/* { dg-final { scan-assembler-not {vs[09]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-10.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-10.c
new file mode 100644
index 00000000000..8bebac219a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-10.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vbool4_t test () {
+  uint8_t arr[32] = {
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+  };
+
+  return __riscv_vlm_v_b4(arr, 32);
+}
+
+/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
+/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c
new file mode 100644
index 00000000000..47e4243e02e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vuint8m1_t test () {
+  uint8_t arr[32] = {
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+  };
+
+  return __riscv_vle8_v_u8m1(arr, 16);
+}
+
+/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
+/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c
new file mode 100644
index 00000000000..5331e547ed3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vuint8m2_t test () {
+  uint8_t arr[32] = {
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+  };
+
+  return __riscv_vle8_v_u8m2(arr, 8);
+}
+
+/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
+/* { dg-final { scan-assembler-not {vs[09]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c
new file mode 100644
index 00000000000..0c728f93514
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vuint8mf2_t test () {
+  uint8_t arr[32] = {
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+  };
+
+  return __riscv_vle8_v_u8mf2(arr, 32);
+}
+
+/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
+/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c
new file mode 100644
index 00000000000..ccfc40cd382
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vuint8m2_t test () {
+  uint8_t arr[32] = {
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+  };
+
+  return __riscv_vle8_v_u8m2(arr, 4);
+}
+
+/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
+/* { dg-final { scan-assembler-not {vs[09]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c
new file mode 100644
index 00000000000..ce7ddbb99b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vuint8m8_t test () {
+  uint8_t arr[32] = {
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+  };
+
+  return __riscv_vle8_v_u8m8(arr, 32);
+}
+
+/* { dg-final { scan-assembler-times {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} 1 } } */
+/* { dg-final { scan-assembler-times {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c
new file mode 100644
index 00000000000..ac0100a1211
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vbool8_t test () {
+  uint8_t arr[32] = {
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+    1, 2, 7, 1, 3, 4, 5, 3,
+    1, 0, 1, 2, 4, 4, 9, 9,
+  };
+
+  vuint8m1_t varr = __riscv_vle8_v_u8m1(arr, 32);
+  vuint8m1_t vand_m = __riscv_vand_vx_u8m1(varr, 1, 32);
+
+  return __riscv_vreinterpret_v_u8m1_b8(vand_m);
+}
+
+/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
+/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c
new file mode 100644
index 00000000000..b7ebef80954
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vfloat32m1_t test () {
+  float arr[32] = {
+    1.0, 2.2, 7.8, 1.2, 3.3, 4.7, 5.5, 3.3,
+    1.0, 0.2, 1.8, 2.2, 4.3, 4.7, 9.5, 9.3,
+    1.0, 2.2, 7.8, 1.2, 3.3, 4.7, 5.5, 3.3,
+    1.0, 0.2, 1.8, 2.2, 4.3, 4.7, 9.5, 9.3,
+  };
+
+  return __riscv_vle32_v_f32m1(arr, 32);
+}
+
+/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
+/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c
new file mode 100644
index 00000000000..21fed06d201
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vfloat64m8_t test () {
+  double arr[8] = {
+    1.0, 2.2, 7.8, 1.2, 3.3, 4.7, 5.5, 3.3,
+  };
+
+  return __riscv_vle64_v_f64m8(arr, 4);
+}
+
+/* { dg-final { scan-assembler-times {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} 1 } } */
+/* { dg-final { scan-assembler-times {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} 1 } } */