diff mbox series

[V2] extract DF/SF/SI/HI/QI subreg from parameter word on stack

Message ID 20230104134557.196235-1-guojiufu@linux.ibm.com
State New
Headers show
Series [V2] extract DF/SF/SI/HI/QI subreg from parameter word on stack | expand

Commit Message

Jiufu Guo Jan. 4, 2023, 1:45 p.m. UTC
Hi,

This patch is fixing an issue about parameter accessing if the
parameter is struct type and passed through integer registers, and
there is floating member is accessed. Like below code:

typedef struct DF {double a[4]; long l; } DF;
double foo_df (DF arg){return arg.a[3];}

On ppc64le, with trunk gcc, "std 6,-24(1) ; lfd 1,-24(1)" is
generated.  While instruction "mtvsrd 1, 6" would be enough for
this case.

This patch updates the behavior when loading floating members of a
parameter: if that floating member is stored via integer register,
then loading it as integer mode first, and converting it to floating
mode.

Compare with previous patch:
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608872.html
Previous version supports converion from DImode to DF/SF, this
version also supports conversion from DImode to SI/HI/QI modes.

I also tried to enhance CSE/DSE for this issue.  But because the
limitations (e.g. CSE does not like new pseudo, DSE is not good
at cross-blocks), some cases (as this patch) can not be handled.

Bootstrap and regtest passes on ppc64{,le}.
Is this ok for trunk?  Thanks for comments!


BR,
Jeff (Jiufu)


	PR target/108073

gcc/ChangeLog:

	* expr.cc (extract_subreg_from_loading_word): New function.
	(expand_expr_real_1): Call extract_subreg_from_loading_word.

gcc/testsuite/ChangeLog:

	* g++.target/powerpc/pr102024.C: Updated.
	* gcc.target/powerpc/pr108073.c: New test.

---
 gcc/expr.cc                                 | 76 +++++++++++++++++++++
 gcc/testsuite/g++.target/powerpc/pr102024.C |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr108073.c | 30 ++++++++
 3 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108073.c

Comments

Jiufu Guo March 2, 2023, 9:33 a.m. UTC | #1
Hi,

Gentle ping:
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609396.html

Thanks for comments and suggestions!

I'm thinking that we may use these patches to fix some of the issues
on parm and returns.

Sorry for the late ping for this patch to ask if this is acceptable.


BR,
Jeff (Jiufu)

Jiufu Guo <guojiufu@linux.ibm.com> writes:

> Hi,
>
> This patch is fixing an issue about parameter accessing if the
> parameter is struct type and passed through integer registers, and
> there is floating member is accessed. Like below code:
>
> typedef struct DF {double a[4]; long l; } DF;
> double foo_df (DF arg){return arg.a[3];}
>
> On ppc64le, with trunk gcc, "std 6,-24(1) ; lfd 1,-24(1)" is
> generated.  While instruction "mtvsrd 1, 6" would be enough for
> this case.
>
> This patch updates the behavior when loading floating members of a
> parameter: if that floating member is stored via integer register,
> then loading it as integer mode first, and converting it to floating
> mode.
>
> Compare with previous patch:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608872.html
> Previous version supports converion from DImode to DF/SF, this
> version also supports conversion from DImode to SI/HI/QI modes.
>
> I also tried to enhance CSE/DSE for this issue.  But because the
> limitations (e.g. CSE does not like new pseudo, DSE is not good
> at cross-blocks), some cases (as this patch) can not be handled.
>
> Bootstrap and regtest passes on ppc64{,le}.
> Is this ok for trunk?  Thanks for comments!
>
>
> BR,
> Jeff (Jiufu)
>
>
> 	PR target/108073
>
> gcc/ChangeLog:
>
> 	* expr.cc (extract_subreg_from_loading_word): New function.
> 	(expand_expr_real_1): Call extract_subreg_from_loading_word.
>
> gcc/testsuite/ChangeLog:
>
> 	* g++.target/powerpc/pr102024.C: Updated.
> 	* gcc.target/powerpc/pr108073.c: New test.
>
> ---
>  gcc/expr.cc                                 | 76 +++++++++++++++++++++
>  gcc/testsuite/g++.target/powerpc/pr102024.C |  2 +-
>  gcc/testsuite/gcc.target/powerpc/pr108073.c | 30 ++++++++
>  3 files changed, 107 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108073.c
>
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index d9407432ea5..6de4a985c8b 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -10631,6 +10631,69 @@ stmt_is_replaceable_p (gimple *stmt)
>    return false;
>  }
>  
> +/* Return the content of the memory slot SOURCE as MODE.
> +   SOURCE is based on BASE. BASE is a memory block that is stored via words.
> +
> +   To get the content from SOURCE:
> +   first load the word from the memory which covers the SOURCE slot first;
> +   next return the word's subreg which offsets to SOURCE slot;
> +   then convert to MODE as necessary.  */
> +
> +static rtx
> +extract_subreg_from_loading_word (machine_mode mode, rtx source, rtx base)
> +{
> +  rtx src_base = XEXP (source, 0);
> +  poly_uint64 offset = MEM_OFFSET (source);
> +
> +  if (GET_CODE (src_base) == PLUS && CONSTANT_P (XEXP (src_base, 1)))
> +    {
> +      offset += INTVAL (XEXP (src_base, 1));
> +      src_base = XEXP (src_base, 0);
> +    }
> +
> +  if (!rtx_equal_p (XEXP (base, 0), src_base))
> +    return NULL_RTX;
> +
> +  /* Subreg(DI,n) -> DF/SF/SI/HI/QI */
> +  poly_uint64 word_size = GET_MODE_SIZE (word_mode);
> +  poly_uint64 mode_size = GET_MODE_SIZE (mode);
> +  poly_uint64 byte_off;
> +  unsigned int start;
> +  machine_mode int_mode;
> +  if (known_ge (word_size, mode_size) && multiple_p (word_size, mode_size)
> +      && int_mode_for_mode (mode).exists (&int_mode)
> +      && can_div_trunc_p (offset, word_size, &start, &byte_off)
> +      && multiple_p (byte_off, mode_size))
> +    {
> +      rtx word_mem = copy_rtx (source);
> +      PUT_MODE (word_mem, word_mode);
> +      word_mem = adjust_address (word_mem, word_mode, -byte_off);
> +
> +      rtx word_reg = gen_reg_rtx (word_mode);
> +      emit_move_insn (word_reg, word_mem);
> +
> +      poly_uint64 low_off = subreg_lowpart_offset (int_mode, word_mode);
> +      if (!known_eq (byte_off, low_off))
> +	{
> +	  poly_uint64 shift_bytes = known_gt (byte_off, low_off)
> +				      ? byte_off - low_off
> +				      : low_off - byte_off;
> +	  word_reg = expand_shift (RSHIFT_EXPR, word_mode, word_reg,
> +				   shift_bytes * BITS_PER_UNIT, word_reg, 0);
> +	}
> +
> +      rtx int_subreg = gen_lowpart (int_mode, word_reg);
> +      if (mode == int_mode)
> +	return int_subreg;
> +
> +      rtx int_mode_reg = gen_reg_rtx (int_mode);
> +      emit_move_insn (int_mode_reg, int_subreg);
> +      return gen_lowpart (mode, int_mode_reg);
> +    }
> +
> +  return NULL_RTX;
> +}
> +
>  rtx
>  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
>  		    enum expand_modifier modifier, rtx *alt_rtl,
> @@ -11812,6 +11875,19 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
>  	    && modifier != EXPAND_WRITE)
>  	  op0 = flip_storage_order (mode1, op0);
>  
> +	/* Accessing sub-field of struct parameter which passed via integer
> +	   registers.  */
> +	if (mode == mode1 && TREE_CODE (tem) == PARM_DECL
> +	    && DECL_INCOMING_RTL (tem) && REG_P (DECL_INCOMING_RTL (tem))
> +	    && GET_MODE (DECL_INCOMING_RTL (tem)) == BLKmode && MEM_P (op0)
> +	    && MEM_OFFSET_KNOWN_P (op0))
> +	  {
> +	    rtx subreg
> +	      = extract_subreg_from_loading_word (mode, op0, DECL_RTL (tem));
> +	    if (subreg)
> +	      op0 = subreg;
> +	  }
> +
>  	if (mode == mode1 || mode1 == BLKmode || mode1 == tmode
>  	    || modifier == EXPAND_CONST_ADDRESS
>  	    || modifier == EXPAND_INITIALIZER)
> diff --git a/gcc/testsuite/g++.target/powerpc/pr102024.C b/gcc/testsuite/g++.target/powerpc/pr102024.C
> index 769585052b5..c8995cae707 100644
> --- a/gcc/testsuite/g++.target/powerpc/pr102024.C
> +++ b/gcc/testsuite/g++.target/powerpc/pr102024.C
> @@ -5,7 +5,7 @@
>  // Test that a zero-width bit field in an otherwise homogeneous aggregate
>  // generates a psabi warning and passes arguments in GPRs.
>  
> -// { dg-final { scan-assembler-times {\mstd\M} 4 } }
> +// { dg-final { scan-assembler-times {\mmtvsrd\M} 4 } }
>  
>  struct a_thing
>  {
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108073.c b/gcc/testsuite/gcc.target/powerpc/pr108073.c
> new file mode 100644
> index 00000000000..91c13d896b7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr108073.c
> @@ -0,0 +1,30 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -save-temps" } */
> +
> +typedef struct DF {double a[4]; short s1; short s2; short s3; short s4; } DF;
> +typedef struct SF {float a[4]; int i1; int i2; } SF;
> +
> +/* Each of below function contains one mtvsrd.  */
> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 3 {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
> +/* { dg-final { scan-assembler-not {\mlwz\M} {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
> +/* { dg-final { scan-assembler-not {\mlhz\M} {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
> +short  __attribute__ ((noipa)) foo_hi (DF a, int flag){if (flag == 2)return a.s2+a.s3;return 0;}
> +int  __attribute__ ((noipa)) foo_si (SF a, int flag){if (flag == 2)return a.i2+a.i1;return 0;}
> +double __attribute__ ((noipa)) foo_df (DF arg, int flag){if (flag == 2)return arg.a[3];else return 0.0;}
> +float  __attribute__ ((noipa)) foo_sf (SF arg, int flag){if (flag == 2)return arg.a[2]; return 0;}
> +float  __attribute__ ((noipa)) foo_sf1 (SF arg, int flag){if (flag == 2)return arg.a[1];return 0;}
> +
> +DF gdf = {{1.0,2.0,3.0,4.0}, 1, 2, 3, 4};
> +SF gsf = {{1.0f,2.0f,3.0f,4.0f}, 1, 2};
> +
> +int main()
> +{
> +  if (!(foo_hi (gdf, 2) == 5 && foo_si (gsf, 2) == 3 && foo_df (gdf, 2) == 4.0
> +	&& foo_sf (gsf, 2) == 3.0 && foo_sf1 (gsf, 2) == 2.0))
> +    __builtin_abort ();
> +  if (!(foo_hi (gdf, 1) == 0 && foo_si (gsf, 1) == 0 && foo_df (gdf, 1) == 0
> +	&& foo_sf (gsf, 1) == 0 && foo_sf1 (gsf, 1) == 0))
> +    __builtin_abort ();
> +  return 0;
> +}
> +
Jiufu Guo May 11, 2023, 1:20 a.m. UTC | #2
Hi,

I would like to ping:
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609396.html

We know there are a few issues related to aggregate parameter and
returns.  I'm thinking if it is ok for trunk to use this patch to
resolve part of those issues.


BR,
Jeff (Jiufu)


Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:

> Hi,
>
> Gentle ping:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609396.html
>
> Thanks for comments and suggestions!
>
> I'm thinking that we may use these patches to fix some of the issues
> on parm and returns.
>
> Sorry for the late ping for this patch to ask if this is acceptable.
>
>
> BR,
> Jeff (Jiufu)
>
> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>
>> Hi,
>>
>> This patch is fixing an issue about parameter accessing if the
>> parameter is struct type and passed through integer registers, and
>> there is floating member is accessed. Like below code:
>>
>> typedef struct DF {double a[4]; long l; } DF;
>> double foo_df (DF arg){return arg.a[3];}
>>
>> On ppc64le, with trunk gcc, "std 6,-24(1) ; lfd 1,-24(1)" is
>> generated.  While instruction "mtvsrd 1, 6" would be enough for
>> this case.
>>
>> This patch updates the behavior when loading floating members of a
>> parameter: if that floating member is stored via integer register,
>> then loading it as integer mode first, and converting it to floating
>> mode.
>>
>> Compare with previous patch:
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608872.html
>> Previous version supports converion from DImode to DF/SF, this
>> version also supports conversion from DImode to SI/HI/QI modes.
>>
>> I also tried to enhance CSE/DSE for this issue.  But because the
>> limitations (e.g. CSE does not like new pseudo, DSE is not good
>> at cross-blocks), some cases (as this patch) can not be handled.
>>
>> Bootstrap and regtest passes on ppc64{,le}.
>> Is this ok for trunk?  Thanks for comments!
>>
>>
>> BR,
>> Jeff (Jiufu)
>>
>>
>> 	PR target/108073
>>
>> gcc/ChangeLog:
>>
>> 	* expr.cc (extract_subreg_from_loading_word): New function.
>> 	(expand_expr_real_1): Call extract_subreg_from_loading_word.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	* g++.target/powerpc/pr102024.C: Updated.
>> 	* gcc.target/powerpc/pr108073.c: New test.
>>
>> ---
>>  gcc/expr.cc                                 | 76 +++++++++++++++++++++
>>  gcc/testsuite/g++.target/powerpc/pr102024.C |  2 +-
>>  gcc/testsuite/gcc.target/powerpc/pr108073.c | 30 ++++++++
>>  3 files changed, 107 insertions(+), 1 deletion(-)
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108073.c
>>
>> diff --git a/gcc/expr.cc b/gcc/expr.cc
>> index d9407432ea5..6de4a985c8b 100644
>> --- a/gcc/expr.cc
>> +++ b/gcc/expr.cc
>> @@ -10631,6 +10631,69 @@ stmt_is_replaceable_p (gimple *stmt)
>>    return false;
>>  }
>>  
>> +/* Return the content of the memory slot SOURCE as MODE.
>> +   SOURCE is based on BASE. BASE is a memory block that is stored via words.
>> +
>> +   To get the content from SOURCE:
>> +   first load the word from the memory which covers the SOURCE slot first;
>> +   next return the word's subreg which offsets to SOURCE slot;
>> +   then convert to MODE as necessary.  */
>> +
>> +static rtx
>> +extract_subreg_from_loading_word (machine_mode mode, rtx source, rtx base)
>> +{
>> +  rtx src_base = XEXP (source, 0);
>> +  poly_uint64 offset = MEM_OFFSET (source);
>> +
>> +  if (GET_CODE (src_base) == PLUS && CONSTANT_P (XEXP (src_base, 1)))
>> +    {
>> +      offset += INTVAL (XEXP (src_base, 1));
>> +      src_base = XEXP (src_base, 0);
>> +    }
>> +
>> +  if (!rtx_equal_p (XEXP (base, 0), src_base))
>> +    return NULL_RTX;
>> +
>> +  /* Subreg(DI,n) -> DF/SF/SI/HI/QI */
>> +  poly_uint64 word_size = GET_MODE_SIZE (word_mode);
>> +  poly_uint64 mode_size = GET_MODE_SIZE (mode);
>> +  poly_uint64 byte_off;
>> +  unsigned int start;
>> +  machine_mode int_mode;
>> +  if (known_ge (word_size, mode_size) && multiple_p (word_size, mode_size)
>> +      && int_mode_for_mode (mode).exists (&int_mode)
>> +      && can_div_trunc_p (offset, word_size, &start, &byte_off)
>> +      && multiple_p (byte_off, mode_size))
>> +    {
>> +      rtx word_mem = copy_rtx (source);
>> +      PUT_MODE (word_mem, word_mode);
>> +      word_mem = adjust_address (word_mem, word_mode, -byte_off);
>> +
>> +      rtx word_reg = gen_reg_rtx (word_mode);
>> +      emit_move_insn (word_reg, word_mem);
>> +
>> +      poly_uint64 low_off = subreg_lowpart_offset (int_mode, word_mode);
>> +      if (!known_eq (byte_off, low_off))
>> +	{
>> +	  poly_uint64 shift_bytes = known_gt (byte_off, low_off)
>> +				      ? byte_off - low_off
>> +				      : low_off - byte_off;
>> +	  word_reg = expand_shift (RSHIFT_EXPR, word_mode, word_reg,
>> +				   shift_bytes * BITS_PER_UNIT, word_reg, 0);
>> +	}
>> +
>> +      rtx int_subreg = gen_lowpart (int_mode, word_reg);
>> +      if (mode == int_mode)
>> +	return int_subreg;
>> +
>> +      rtx int_mode_reg = gen_reg_rtx (int_mode);
>> +      emit_move_insn (int_mode_reg, int_subreg);
>> +      return gen_lowpart (mode, int_mode_reg);
>> +    }
>> +
>> +  return NULL_RTX;
>> +}
>> +
>>  rtx
>>  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
>>  		    enum expand_modifier modifier, rtx *alt_rtl,
>> @@ -11812,6 +11875,19 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
>>  	    && modifier != EXPAND_WRITE)
>>  	  op0 = flip_storage_order (mode1, op0);
>>  
>> +	/* Accessing sub-field of struct parameter which passed via integer
>> +	   registers.  */
>> +	if (mode == mode1 && TREE_CODE (tem) == PARM_DECL
>> +	    && DECL_INCOMING_RTL (tem) && REG_P (DECL_INCOMING_RTL (tem))
>> +	    && GET_MODE (DECL_INCOMING_RTL (tem)) == BLKmode && MEM_P (op0)
>> +	    && MEM_OFFSET_KNOWN_P (op0))
>> +	  {
>> +	    rtx subreg
>> +	      = extract_subreg_from_loading_word (mode, op0, DECL_RTL (tem));
>> +	    if (subreg)
>> +	      op0 = subreg;
>> +	  }
>> +
>>  	if (mode == mode1 || mode1 == BLKmode || mode1 == tmode
>>  	    || modifier == EXPAND_CONST_ADDRESS
>>  	    || modifier == EXPAND_INITIALIZER)
>> diff --git a/gcc/testsuite/g++.target/powerpc/pr102024.C b/gcc/testsuite/g++.target/powerpc/pr102024.C
>> index 769585052b5..c8995cae707 100644
>> --- a/gcc/testsuite/g++.target/powerpc/pr102024.C
>> +++ b/gcc/testsuite/g++.target/powerpc/pr102024.C
>> @@ -5,7 +5,7 @@
>>  // Test that a zero-width bit field in an otherwise homogeneous aggregate
>>  // generates a psabi warning and passes arguments in GPRs.
>>  
>> -// { dg-final { scan-assembler-times {\mstd\M} 4 } }
>> +// { dg-final { scan-assembler-times {\mmtvsrd\M} 4 } }
>>  
>>  struct a_thing
>>  {
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108073.c b/gcc/testsuite/gcc.target/powerpc/pr108073.c
>> new file mode 100644
>> index 00000000000..91c13d896b7
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108073.c
>> @@ -0,0 +1,30 @@
>> +/* { dg-do run } */
>> +/* { dg-options "-O2 -save-temps" } */
>> +
>> +typedef struct DF {double a[4]; short s1; short s2; short s3; short s4; } DF;
>> +typedef struct SF {float a[4]; int i1; int i2; } SF;
>> +
>> +/* Each of below function contains one mtvsrd.  */
>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 3 {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
>> +/* { dg-final { scan-assembler-not {\mlwz\M} {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
>> +/* { dg-final { scan-assembler-not {\mlhz\M} {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
>> +short  __attribute__ ((noipa)) foo_hi (DF a, int flag){if (flag == 2)return a.s2+a.s3;return 0;}
>> +int  __attribute__ ((noipa)) foo_si (SF a, int flag){if (flag == 2)return a.i2+a.i1;return 0;}
>> +double __attribute__ ((noipa)) foo_df (DF arg, int flag){if (flag == 2)return arg.a[3];else return 0.0;}
>> +float  __attribute__ ((noipa)) foo_sf (SF arg, int flag){if (flag == 2)return arg.a[2]; return 0;}
>> +float  __attribute__ ((noipa)) foo_sf1 (SF arg, int flag){if (flag == 2)return arg.a[1];return 0;}
>> +
>> +DF gdf = {{1.0,2.0,3.0,4.0}, 1, 2, 3, 4};
>> +SF gsf = {{1.0f,2.0f,3.0f,4.0f}, 1, 2};
>> +
>> +int main()
>> +{
>> +  if (!(foo_hi (gdf, 2) == 5 && foo_si (gsf, 2) == 3 && foo_df (gdf, 2) == 4.0
>> +	&& foo_sf (gsf, 2) == 3.0 && foo_sf1 (gsf, 2) == 2.0))
>> +    __builtin_abort ();
>> +  if (!(foo_hi (gdf, 1) == 0 && foo_si (gsf, 1) == 0 && foo_df (gdf, 1) == 0
>> +	&& foo_sf (gsf, 1) == 0 && foo_sf1 (gsf, 1) == 0))
>> +    __builtin_abort ();
>> +  return 0;
>> +}
>> +
Jeff Law June 10, 2023, 5:40 p.m. UTC | #3
On 5/10/23 19:20, Jiufu Guo wrote:
> 
> Hi,
> 
> I would like to ping:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609396.html
> 
> We know there are a few issues related to aggregate parameter and
> returns.  I'm thinking if it is ok for trunk to use this patch to
> resolve part of those issues.
It looks like the patch is focused on emitting a load of the object from 
memory into a GPR, then copying the GPR into pseudo (which hopefully 
gets allocated into an FPR).  That would seem to indicate the value got 
flushed to memory at some point.  Presumably because the type of the 
object it not one that we would typically allow in registers, except for 
some special cases for parameter passing and return values?

If that's the case, then is there any value in finding the flush to the 
stack and just emitting a copy from the GPR into the destination pseudo 
at that point?

Or is it just easier to construct a load from the flushback area and let 
CSE/DCE/DSE clean things up?

Jeff

I
Jiufu Guo June 13, 2023, 2:58 a.m. UTC | #4
Hi,

Jeff Law <jeffreyalaw@gmail.com> writes:

> On 5/10/23 19:20, Jiufu Guo wrote:
>>
>> Hi,
>>
>> I would like to ping:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609396.html
>>
>> We know there are a few issues related to aggregate parameter and
>> returns.  I'm thinking if it is ok for trunk to use this patch to
>> resolve part of those issues.
> It looks like the patch is focused on emitting a load of the object
> from memory into a GPR, then copying the GPR into pseudo (which
> hopefully gets allocated into an FPR).  That would seem to indicate
> the value got flushed to memory at some point.  Presumably because the
> type of the object it not one that we would typically allow in
> registers, except for some special cases for parameter passing and
> return values?

Yes.  The the object would be in a different mode as the nature mode
of the register of the parameter/returns.

>
> If that's the case, then is there any value in finding the flush to
> the stack and just emitting a copy from the GPR into the destination
> pseudo at that point?
>
> Or is it just easier to construct a load from the flushback area and
> let CSE/DCE/DSE clean things up?
Right! The idea of this patch is to construct a load that can expose the
opportunity for CSE/DCE/DSE passes to optimize the code.

And I'm also trying to use another solution to handle this issue: like
light-sra in expander for struct parameter.  Sorry for missing a recall
of this patch.

Thank you very much for your comments!!


BR,
Jeff (Jiufu)

>
> Jeff
>
> I
diff mbox series

Patch

diff --git a/gcc/expr.cc b/gcc/expr.cc
index d9407432ea5..6de4a985c8b 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -10631,6 +10631,69 @@  stmt_is_replaceable_p (gimple *stmt)
   return false;
 }
 
+/* Return the content of the memory slot SOURCE as MODE.
+   SOURCE is based on BASE. BASE is a memory block that is stored via words.
+
+   To get the content from SOURCE:
+   first load the word from the memory which covers the SOURCE slot first;
+   next return the word's subreg which offsets to SOURCE slot;
+   then convert to MODE as necessary.  */
+
+static rtx
+extract_subreg_from_loading_word (machine_mode mode, rtx source, rtx base)
+{
+  rtx src_base = XEXP (source, 0);
+  poly_uint64 offset = MEM_OFFSET (source);
+
+  if (GET_CODE (src_base) == PLUS && CONSTANT_P (XEXP (src_base, 1)))
+    {
+      offset += INTVAL (XEXP (src_base, 1));
+      src_base = XEXP (src_base, 0);
+    }
+
+  if (!rtx_equal_p (XEXP (base, 0), src_base))
+    return NULL_RTX;
+
+  /* Subreg(DI,n) -> DF/SF/SI/HI/QI */
+  poly_uint64 word_size = GET_MODE_SIZE (word_mode);
+  poly_uint64 mode_size = GET_MODE_SIZE (mode);
+  poly_uint64 byte_off;
+  unsigned int start;
+  machine_mode int_mode;
+  if (known_ge (word_size, mode_size) && multiple_p (word_size, mode_size)
+      && int_mode_for_mode (mode).exists (&int_mode)
+      && can_div_trunc_p (offset, word_size, &start, &byte_off)
+      && multiple_p (byte_off, mode_size))
+    {
+      rtx word_mem = copy_rtx (source);
+      PUT_MODE (word_mem, word_mode);
+      word_mem = adjust_address (word_mem, word_mode, -byte_off);
+
+      rtx word_reg = gen_reg_rtx (word_mode);
+      emit_move_insn (word_reg, word_mem);
+
+      poly_uint64 low_off = subreg_lowpart_offset (int_mode, word_mode);
+      if (!known_eq (byte_off, low_off))
+	{
+	  poly_uint64 shift_bytes = known_gt (byte_off, low_off)
+				      ? byte_off - low_off
+				      : low_off - byte_off;
+	  word_reg = expand_shift (RSHIFT_EXPR, word_mode, word_reg,
+				   shift_bytes * BITS_PER_UNIT, word_reg, 0);
+	}
+
+      rtx int_subreg = gen_lowpart (int_mode, word_reg);
+      if (mode == int_mode)
+	return int_subreg;
+
+      rtx int_mode_reg = gen_reg_rtx (int_mode);
+      emit_move_insn (int_mode_reg, int_subreg);
+      return gen_lowpart (mode, int_mode_reg);
+    }
+
+  return NULL_RTX;
+}
+
 rtx
 expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 		    enum expand_modifier modifier, rtx *alt_rtl,
@@ -11812,6 +11875,19 @@  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 	    && modifier != EXPAND_WRITE)
 	  op0 = flip_storage_order (mode1, op0);
 
+	/* Accessing sub-field of struct parameter which passed via integer
+	   registers.  */
+	if (mode == mode1 && TREE_CODE (tem) == PARM_DECL
+	    && DECL_INCOMING_RTL (tem) && REG_P (DECL_INCOMING_RTL (tem))
+	    && GET_MODE (DECL_INCOMING_RTL (tem)) == BLKmode && MEM_P (op0)
+	    && MEM_OFFSET_KNOWN_P (op0))
+	  {
+	    rtx subreg
+	      = extract_subreg_from_loading_word (mode, op0, DECL_RTL (tem));
+	    if (subreg)
+	      op0 = subreg;
+	  }
+
 	if (mode == mode1 || mode1 == BLKmode || mode1 == tmode
 	    || modifier == EXPAND_CONST_ADDRESS
 	    || modifier == EXPAND_INITIALIZER)
diff --git a/gcc/testsuite/g++.target/powerpc/pr102024.C b/gcc/testsuite/g++.target/powerpc/pr102024.C
index 769585052b5..c8995cae707 100644
--- a/gcc/testsuite/g++.target/powerpc/pr102024.C
+++ b/gcc/testsuite/g++.target/powerpc/pr102024.C
@@ -5,7 +5,7 @@ 
 // Test that a zero-width bit field in an otherwise homogeneous aggregate
 // generates a psabi warning and passes arguments in GPRs.
 
-// { dg-final { scan-assembler-times {\mstd\M} 4 } }
+// { dg-final { scan-assembler-times {\mmtvsrd\M} 4 } }
 
 struct a_thing
 {
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108073.c b/gcc/testsuite/gcc.target/powerpc/pr108073.c
new file mode 100644
index 00000000000..91c13d896b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108073.c
@@ -0,0 +1,30 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -save-temps" } */
+
+typedef struct DF {double a[4]; short s1; short s2; short s3; short s4; } DF;
+typedef struct SF {float a[4]; int i1; int i2; } SF;
+
+/* Each of below function contains one mtvsrd.  */
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 3 {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mlwz\M} {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mlhz\M} {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
+short  __attribute__ ((noipa)) foo_hi (DF a, int flag){if (flag == 2)return a.s2+a.s3;return 0;}
+int  __attribute__ ((noipa)) foo_si (SF a, int flag){if (flag == 2)return a.i2+a.i1;return 0;}
+double __attribute__ ((noipa)) foo_df (DF arg, int flag){if (flag == 2)return arg.a[3];else return 0.0;}
+float  __attribute__ ((noipa)) foo_sf (SF arg, int flag){if (flag == 2)return arg.a[2]; return 0;}
+float  __attribute__ ((noipa)) foo_sf1 (SF arg, int flag){if (flag == 2)return arg.a[1];return 0;}
+
+DF gdf = {{1.0,2.0,3.0,4.0}, 1, 2, 3, 4};
+SF gsf = {{1.0f,2.0f,3.0f,4.0f}, 1, 2};
+
+int main()
+{
+  if (!(foo_hi (gdf, 2) == 5 && foo_si (gsf, 2) == 3 && foo_df (gdf, 2) == 4.0
+	&& foo_sf (gsf, 2) == 3.0 && foo_sf1 (gsf, 2) == 2.0))
+    __builtin_abort ();
+  if (!(foo_hi (gdf, 1) == 0 && foo_si (gsf, 1) == 0 && foo_df (gdf, 1) == 0
+	&& foo_sf (gsf, 1) == 0 && foo_sf1 (gsf, 1) == 0))
+    __builtin_abort ();
+  return 0;
+}
+