diff mbox series

[2/4,GCC11] Add target hook stride_dform_valid_p

Message ID c58c8a2d-7b02-7946-d9ce-1c8f60342cad@linux.ibm.com
State New
Headers show
Series IVOPTs consider step cost for different forms when unrolling | expand

Commit Message

Kewen.Lin Jan. 16, 2020, 9:40 a.m. UTC
gcc/ChangeLog

2020-01-16  Kewen Lin  <linkw@gcc.gnu.org>

	* config/rs6000/rs6000.c (TARGET_STRIDE_DFORM_VALID_P): New macro.
	(rs6000_stride_dform_valid_p): New function.
	* doc/tm.texi: Regenerate.
	* doc/tm.texi.in (TARGET_STRIDE_DFORM_VALID_P): New hook.
	* target.def (stride_dform_valid_p): New hook.
gcc/config/rs6000/rs6000.c | 40 ++++++++++++++++++++++++++++++++++++++++
 gcc/doc/tm.texi            |  8 ++++++++
 gcc/doc/tm.texi.in         |  2 ++
 gcc/target.def             | 13 ++++++++++++-
 4 files changed, 62 insertions(+), 1 deletion(-)

Comments

Richard Sandiford Jan. 20, 2020, 10:42 a.m. UTC | #1
"Kewen.Lin" <linkw@linux.ibm.com> writes:
> gcc/ChangeLog
>
> 2020-01-16  Kewen Lin  <linkw@gcc.gnu.org>
>
> 	* config/rs6000/rs6000.c (TARGET_STRIDE_DFORM_VALID_P): New macro.
> 	(rs6000_stride_dform_valid_p): New function.
> 	* doc/tm.texi: Regenerate.
> 	* doc/tm.texi.in (TARGET_STRIDE_DFORM_VALID_P): New hook.
> 	* target.def (stride_dform_valid_p): New hook.

It looks like we should able to derive this information from the normal
legitimate_address_p hook.

Also, "D-form" vs. "X-form" is AFAIK a PowerPC-specific classification.
It would be good to use a more generic term in target-independent code.

Thanks,
Richard



>
>  gcc/config/rs6000/rs6000.c | 40 ++++++++++++++++++++++++++++++++++++++++
>  gcc/doc/tm.texi            |  8 ++++++++
>  gcc/doc/tm.texi.in         |  2 ++
>  gcc/target.def             | 13 ++++++++++++-
>  4 files changed, 62 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 0dabaa6..1e41fcf 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -1657,6 +1657,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
>  #undef TARGET_PREDICT_DOLOOP_P
>  #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
>  
> +#undef TARGET_STRIDE_DFORM_VALID_P
> +#define TARGET_STRIDE_DFORM_VALID_P rs6000_stride_dform_valid_p
> +
>  #undef TARGET_HAVE_COUNT_REG_DECR_P
>  #define TARGET_HAVE_COUNT_REG_DECR_P true
>  
> @@ -26272,6 +26275,43 @@ rs6000_predict_doloop_p (struct loop *loop)
>    return true;
>  }
>  
> +/* Return true if the memory access with mode MODE, signedness SIGNED_P and
> +   store STORE_P with offset from 0 to (NUNROLL-1) * STRIDE are valid with
> +   D-form instructions.  */
> +
> +static bool
> +rs6000_stride_dform_valid_p (machine_mode mode, signed HOST_WIDE_INT stride,
> +			  bool signed_p, bool store_p, unsigned nunroll)
> +{
> +  static const HOST_WIDE_INT max_bound = 0x7fff;
> +  static const HOST_WIDE_INT min_bound = -0x8000;
> +
> +  if (!IN_RANGE ((nunroll - 1) * stride, min_bound, max_bound))
> +    return false;
> +
> +  /* Check DQ-form for vector mode or float128 mode.  */
> +  if (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
> +    {
> +      if (mode_supports_dq_form (mode) && !(stride & 0xF))
> +	return true;
> +      else
> +	return false;
> +    }
> +
> +  /* Simply consider non VSX instructions.  */
> +  if (mode == QImode || mode == HImode || mode == SFmode || mode == DFmode)
> +    return true;
> +
> +  /* lwz/stw is D-form, but lwa is DS-form.  */
> +  if (mode == SImode && (!signed_p || store_p || !(stride & 0x03)))
> +    return true;
> +
> +  if (mode == DImode && !(stride & 0x03))
> +    return true;
> +
> +  return false;
> +}
> +
>  struct gcc_target targetm = TARGET_INITIALIZER;
>  
>  #include "gt-rs6000.h"
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 86ad278..0b8bc7c 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -11669,6 +11669,14 @@ function version at run-time for a given set of function versions.
>  body must be generated.
>  @end deftypefn
>  
> +@deftypefn {Target Hook} bool TARGET_STRIDE_DFORM_VALID_P (machine_mode @var{mode}, signed HOST_WIDE_INT @var{stride}, bool @var{signed_p}, bool @var{store_p}, unsigned @var{nunroll})
> +For a given memory access, check whether it is valid to put 0, @var{stride}
> +, 2 * @var{stride}, ... , (@var{nunroll} - 1) to the instruction D-form
> +displacement, with mode @var{mode}, signedness @var{signed_p} and store
> +@var{store_p}.  Return true if valid.
> +The default version of this hook returns false.
> +@end deftypefn
> +
>  @deftypefn {Target Hook} bool TARGET_PREDICT_DOLOOP_P (class loop *@var{loop})
>  Return true if we can predict it is possible to use a low-overhead loop
>  for a particular loop.  The parameter @var{loop} is a pointer to the loop.
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index fd9769e..e90d020 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -7953,6 +7953,8 @@ to by @var{ce_info}.
>  
>  @hook TARGET_GENERATE_VERSION_DISPATCHER_BODY
>  
> +@hook TARGET_STRIDE_DFORM_VALID_P
> +
>  @hook TARGET_PREDICT_DOLOOP_P
>  
>  @hook TARGET_HAVE_COUNT_REG_DECR_P
> diff --git a/gcc/target.def b/gcc/target.def
> index f61c831..ee19a8d 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -4300,7 +4300,18 @@ DEFHOOK
>   emits a @code{speculation_barrier} instruction if that is defined.",
>  rtx, (machine_mode mode, rtx result, rtx val, rtx failval),
>   default_speculation_safe_value)
> - 
> +
> +DEFHOOK
> +(stride_dform_valid_p,
> + "For a given memory access, check whether it is valid to put 0, @var{stride}\n\
> +, 2 * @var{stride}, ... , (@var{nunroll} - 1) to the instruction D-form\n\
> +displacement, with mode @var{mode}, signedness @var{signed_p} and store\n\
> +@var{store_p}.  Return true if valid.\n\
> +The default version of this hook returns false.",
> + bool, (machine_mode mode, signed HOST_WIDE_INT stride, bool signed_p,
> + bool store_p, unsigned nunroll),
> + NULL)
> +
>  DEFHOOK
>  (predict_doloop_p,
>   "Return true if we can predict it is possible to use a low-overhead loop\n\
Richard Biener Jan. 20, 2020, 11:37 a.m. UTC | #2
On Mon, 20 Jan 2020, Richard Sandiford wrote:

> "Kewen.Lin" <linkw@linux.ibm.com> writes:
> > gcc/ChangeLog
> >
> > 2020-01-16  Kewen Lin  <linkw@gcc.gnu.org>
> >
> > 	* config/rs6000/rs6000.c (TARGET_STRIDE_DFORM_VALID_P): New macro.
> > 	(rs6000_stride_dform_valid_p): New function.
> > 	* doc/tm.texi: Regenerate.
> > 	* doc/tm.texi.in (TARGET_STRIDE_DFORM_VALID_P): New hook.
> > 	* target.def (stride_dform_valid_p): New hook.
> 
> It looks like we should able to derive this information from the normal
> legitimate_address_p hook.
> 
> Also, "D-form" vs. "X-form" is AFAIK a PowerPC-specific classification.
> It would be good to use a more generic term in target-independent code.

Note the whole series is also stage1 material.

Richard.
Segher Boessenkool Jan. 20, 2020, 1:14 p.m. UTC | #3
Hi!

On Mon, Jan 20, 2020 at 10:42:12AM +0000, Richard Sandiford wrote:
> "Kewen.Lin" <linkw@linux.ibm.com> writes:
> > gcc/ChangeLog
> >
> > 2020-01-16  Kewen Lin  <linkw@gcc.gnu.org>
> >
> > 	* config/rs6000/rs6000.c (TARGET_STRIDE_DFORM_VALID_P): New macro.
> > 	(rs6000_stride_dform_valid_p): New function.
> > 	* doc/tm.texi: Regenerate.
> > 	* doc/tm.texi.in (TARGET_STRIDE_DFORM_VALID_P): New hook.
> > 	* target.def (stride_dform_valid_p): New hook.
> 
> It looks like we should able to derive this information from the normal
> legitimate_address_p hook.

Yes, probably.

> Also, "D-form" vs. "X-form" is AFAIK a PowerPC-specific classification.
> It would be good to use a more generic term in target-independent code.

Yeah.  X-form is [reg+reg] addressing; D-form is [reg+imm] addressing.
We can do simple [reg] addressing in either form as well.  Whether D-form
can be used for some access depends on many factors (ISA version, mode of
the datum, alignment, and how big the offset is of course).  But the usual
legitimate_address_p hook should do fine.  The ivopts code already has an
addr_offset_valid_p function, maybe that could be adjusted for this?


Segher
Kewen.Lin Feb. 25, 2020, 9:46 a.m. UTC | #4
on 2020/1/20 下午9:14, Segher Boessenkool wrote:
> Hi!
> 
> On Mon, Jan 20, 2020 at 10:42:12AM +0000, Richard Sandiford wrote:
>> "Kewen.Lin" <linkw@linux.ibm.com> writes:
>>> gcc/ChangeLog
>>>
>>> 2020-01-16  Kewen Lin  <linkw@gcc.gnu.org>
>>>
>>> 	* config/rs6000/rs6000.c (TARGET_STRIDE_DFORM_VALID_P): New macro.
>>> 	(rs6000_stride_dform_valid_p): New function.
>>> 	* doc/tm.texi: Regenerate.
>>> 	* doc/tm.texi.in (TARGET_STRIDE_DFORM_VALID_P): New hook.
>>> 	* target.def (stride_dform_valid_p): New hook.
>>
>> It looks like we should able to derive this information from the normal
>> legitimate_address_p hook.
> 
> Yes, probably.
> 
>> Also, "D-form" vs. "X-form" is AFAIK a PowerPC-specific classification.
>> It would be good to use a more generic term in target-independent code.
> 
> Yeah.  X-form is [reg+reg] addressing; D-form is [reg+imm] addressing.
> We can do simple [reg] addressing in either form as well.  Whether D-form
> can be used for some access depends on many factors (ISA version, mode of
> the datum, alignment, and how big the offset is of course).  But the usual
> legitimate_address_p hook should do fine.  The ivopts code already has an
> addr_offset_valid_p function, maybe that could be adjusted for this?
> 
> 
> Segher
> 

Hi Segher and Richard S.,

Sorry for late response.  Thanks for your comments on legitimate_address_p hook
and function addr_offset_valid_p.  I updated the IVOPTs part with
addr_offset_valid_p, although rs6000_legitimate_offset_address_p doesn't check
strictly all the time (like worst_case is false), it works well with SPEC2017.
Based on it, the hook is simplified as attached patch.

BR,
Kewen
-----------

gcc/ChangeLog

2020-02-25  Kewen Lin  <linkw@gcc.gnu.org>

	* config/rs6000/rs6000.c (TARGET_CONSIDER_REG_OFFSET_FOR_UNROLL_P): New
	macro.
	* doc/tm.texi: Regenerate.
	* doc/tm.texi.in (TARGET_CONSIDER_REG_OFFSET_FOR_UNROLL_P): New hook.
	* target.def (consider_reg_offset_for_unroll_p): New hook.
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 4c1c0e9..0eb13df 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1652,6 +1652,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_PREDICT_DOLOOP_P
 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
 
+#undef TARGET_CONSIDER_REG_OFFSET_FOR_UNROLL_P
+#define TARGET_CONSIDER_REG_OFFSET_FOR_UNROLL_P true
+
 #undef TARGET_HAVE_COUNT_REG_DECR_P
 #define TARGET_HAVE_COUNT_REG_DECR_P true
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 19985ad..fc21a3b 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11680,6 +11680,18 @@ function version at run-time for a given set of function versions.
 body must be generated.
 @end deftypefn
 
+@deftypevr {Target Hook} bool TARGET_CONSIDER_REG_OFFSET_FOR_UNROLL_P
+When RTL unrolling performs on a loop, the duplicated loop iterations
+have appropriate IV step update expressions.  But if an IV is derived from
+address object, it is profitable to fill its required offset updates into
+appropriate memory access expressions if target memory accessing supports
+the register offset mode and the resulted offset is in the valid range.
+Return true if target supports register offset memory accessing mode and
+wants IVOPTs or other passes to consider this information for better code
+for unrolling.  It needs to invoke unroll factor estimation in middle-end.
+The default version of this hook returns false.
+@end deftypevr
+
 @deftypefn {Target Hook} bool TARGET_PREDICT_DOLOOP_P (class loop *@var{loop})
 Return true if we can predict it is possible to use a low-overhead loop
 for a particular loop.  The parameter @var{loop} is a pointer to the loop.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 1a16150..45377d3 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7953,6 +7953,8 @@ to by @var{ce_info}.
 
 @hook TARGET_GENERATE_VERSION_DISPATCHER_BODY
 
+@hook TARGET_CONSIDER_REG_OFFSET_FOR_UNROLL_P
+
 @hook TARGET_PREDICT_DOLOOP_P
 
 @hook TARGET_HAVE_COUNT_REG_DECR_P
diff --git a/gcc/target.def b/gcc/target.def
index b5e82ff..a966d4f 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4299,7 +4299,20 @@ DEFHOOK
  emits a @code{speculation_barrier} instruction if that is defined.",
 rtx, (machine_mode mode, rtx result, rtx val, rtx failval),
  default_speculation_safe_value)
- 
+
+DEFHOOKPOD
+(consider_reg_offset_for_unroll_p,
+ "When RTL unrolling performs on a loop, the duplicated loop iterations\n\
+have appropriate IV step update expressions.  But if an IV is derived from\n\
+address object, it is profitable to fill its required offset updates into\n\
+appropriate memory access expressions if target memory accessing supports\n\
+the register offset mode and the resulted offset is in the valid range.\n\
+Return true if target supports register offset memory accessing mode and\n\
+wants IVOPTs or other passes to consider this information for better code\n\
+for unrolling.  It needs to invoke unroll factor estimation in middle-end.\n\
+The default version of this hook returns false.",
+ bool, false)
+
 DEFHOOK
 (predict_doloop_p,
  "Return true if we can predict it is possible to use a low-overhead loop\n\
Richard Sandiford March 2, 2020, 11:08 a.m. UTC | #5
"Kewen.Lin" <linkw@linux.ibm.com> writes:
> on 2020/1/20 下午9:14, Segher Boessenkool wrote:
>> Hi!
>> 
>> On Mon, Jan 20, 2020 at 10:42:12AM +0000, Richard Sandiford wrote:
>>> "Kewen.Lin" <linkw@linux.ibm.com> writes:
>>>> gcc/ChangeLog
>>>>
>>>> 2020-01-16  Kewen Lin  <linkw@gcc.gnu.org>
>>>>
>>>> 	* config/rs6000/rs6000.c (TARGET_STRIDE_DFORM_VALID_P): New macro.
>>>> 	(rs6000_stride_dform_valid_p): New function.
>>>> 	* doc/tm.texi: Regenerate.
>>>> 	* doc/tm.texi.in (TARGET_STRIDE_DFORM_VALID_P): New hook.
>>>> 	* target.def (stride_dform_valid_p): New hook.
>>>
>>> It looks like we should able to derive this information from the normal
>>> legitimate_address_p hook.
>> 
>> Yes, probably.
>> 
>>> Also, "D-form" vs. "X-form" is AFAIK a PowerPC-specific classification.
>>> It would be good to use a more generic term in target-independent code.
>> 
>> Yeah.  X-form is [reg+reg] addressing; D-form is [reg+imm] addressing.
>> We can do simple [reg] addressing in either form as well.  Whether D-form
>> can be used for some access depends on many factors (ISA version, mode of
>> the datum, alignment, and how big the offset is of course).  But the usual
>> legitimate_address_p hook should do fine.  The ivopts code already has an
>> addr_offset_valid_p function, maybe that could be adjusted for this?
>> 
>> 
>> Segher
>> 
>
> Hi Segher and Richard S.,
>
> Sorry for late response.  Thanks for your comments on legitimate_address_p hook
> and function addr_offset_valid_p.  I updated the IVOPTs part with
> addr_offset_valid_p, although rs6000_legitimate_offset_address_p doesn't check
> strictly all the time (like worst_case is false), it works well with SPEC2017.
> Based on it, the hook is simplified as attached patch.

Thanks for the update.  I think it would be better to add a --param
rather than a bool hook though.  Targets can then change the default
(if necessary) using SET_OPTION_IF_UNSET.  The user can override the
default if they want to.

It might also be better to start with an opt-out rather than an opt-in
(i.e. with the default param value being true rather than false).
With a default-off option, it's much harder to tell whether something
has been deliberately turned off or whether no-one's thought about it
either way.  We can always flip the default later if it turns out that
nothing other than rs6000 benefits.

Richard
Kewen.Lin March 3, 2020, 12:25 p.m. UTC | #6
>> Hi Segher and Richard S.,
>>
>> Sorry for late response.  Thanks for your comments on legitimate_address_p hook
>> and function addr_offset_valid_p.  I updated the IVOPTs part with
>> addr_offset_valid_p, although rs6000_legitimate_offset_address_p doesn't check
>> strictly all the time (like worst_case is false), it works well with SPEC2017.
>> Based on it, the hook is simplified as attached patch.
> 
> Thanks for the update.  I think it would be better to add a --param
> rather than a bool hook though.  Targets can then change the default
> (if necessary) using SET_OPTION_IF_UNSET.  The user can override the
> default if they want to.
> 
> It might also be better to start with an opt-out rather than an opt-in
> (i.e. with the default param value being true rather than false).
> With a default-off option, it's much harder to tell whether something
> has been deliberately turned off or whether no-one's thought about it
> either way.  We can always flip the default later if it turns out that
> nothing other than rs6000 benefits.
> 
> Richard
> 

Hi Richard,

Thanks for your comments!  It's a good idea to use param due to the
flexibility.  And yes, it sounds good to have more targets to try and
make it better.  But I have a bit concern on turning it on by default.
Since it replies on unroll factor estimation, as part 1/4 shows, it
calls targetm.loop_unroll_adjust if target supports, which used to
work on RTL level.  To avoid possible ICE, I'm intended to turn it
off for those targets (s390 & i386) with that hook, since without good
understanding on those targets, it's hard for me to extend them with
gimple level support.  Does it make sense?

The updated patch has been attached.

BR,
Kewen
---------

gcc/ChangeLog

2020-03-03  Kewen Lin  <linkw@gcc.gnu.org>

	* doc/invoke.texi (iv-consider-reg-offset-for-unroll): Document new option.
	* params.opt (iv-consider-reg-offset-for-unroll): New.
	* config/s390/s390.c (s390_option_override_internal): Disable parameter
	iv-consider-reg-offset-for-unroll by default.
	* config/i386/i386-options.c (ix86_option_override_internal): Likewise.
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index e0be493..41c99b3 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -2902,6 +2902,12 @@ ix86_option_override_internal (bool main_args_p,
   if (ix86_indirect_branch != indirect_branch_keep)
     SET_OPTION_IF_UNSET (opts, opts_set, flag_jump_tables, 0);
 
+  /* Disable this for now till loop_unroll_adjust supports gimple level checks,
+     to avoid possible ICE.  */
+  if (opts->x_optimize >= 1)
+    SET_OPTION_IF_UNSET (opts, opts_set,
+			 param_iv_consider_reg_offset_for_unroll, 0);
+
   return true;
 }
 
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index ebba670..ae4c2bd 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -15318,6 +15318,12 @@ s390_option_override_internal (struct gcc_options *opts,
      not the case when the code runs before the prolog. */
   if (opts->x_flag_fentry && !TARGET_64BIT)
     error ("%<-mfentry%> is supported only for 64-bit CPUs");
+
+  /* Disable this for now till loop_unroll_adjust supports gimple level checks,
+     to avoid possible ICE.  */
+  if (opts->x_optimize >= 1)
+    SET_OPTION_IF_UNSET (opts, opts_set,
+			 param_iv_consider_reg_offset_for_unroll, 0);
 }
 
 static void
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index fa98e2f..502031c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12220,6 +12220,15 @@ If the number of candidates in the set is smaller than this value,
 always try to remove unnecessary ivs from the set
 when adding a new one.
 
+@item iv-consider-reg-offset-for-unroll
+When RTL unrolling performs on a loop, the duplicated loop iterations introduce
+appropriate induction variable step update expressions.  But if an induction
+variable is derived from address object, it is profitable to fill its required
+offset updates into appropriate memory access expressions if target memory
+accessing supports the register offset mode and the resulted offset is in the
+valid range.  The induction variable optimizations consider this information
+for better unrolling code.  It requires unroll factor estimation in middle-end.
+
 @item avg-loop-niter
 Average number of iterations of a loop.
 
diff --git a/gcc/params.opt b/gcc/params.opt
index 8e4217d..31424cf 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -270,6 +270,10 @@ Bound on number of candidates below that all candidates are considered in iv opt
 Common Joined UInteger Var(param_iv_max_considered_uses) Init(250) Param Optimization
 Bound on number of iv uses in loop optimized in iv optimizations.
 
+-param=iv-consider-reg-offset-for-unroll=
+Common Joined UInteger Var(param_iv_consider_reg_offset_for_unroll) Init(1) Optimization IntegerRange(0, 1) Param
+Whether iv optimizations mark register offset valid groups and consider their derived iv candidates would be profitable with estimated unroll factor consideration.
+
 -param=jump-table-max-growth-ratio-for-size=
 Common Joined UInteger Var(param_jump_table_max_growth_ratio_for_size) Init(300) Param Optimization
 The maximum code size growth ratio when expanding into a jump table (in percent).  The parameter is used when optimizing for size.
Kewen.Lin May 13, 2020, 5:50 a.m. UTC | #7
Hi,

I'd like to ping this patch as well as its sblings.  Thanks in advance.

1/4 v3 https://gcc.gnu.org/pipermail/gcc-patches/2020-February/540171.html
2/4 v3 https://gcc.gnu.org/pipermail/gcc-patches/2020-March/541387.html
3/4 v3 https://gcc.gnu.org/pipermail/gcc-patches/2020-May/545643.html

BR,
Kewen

on 2020/3/3 下午8:25, Kewen.Lin wrote:
> Hi Richard,
> 
> Thanks for your comments!  It's a good idea to use param due to the
> flexibility.  And yes, it sounds good to have more targets to try and
> make it better.  But I have a bit concern on turning it on by default.
> Since it replies on unroll factor estimation, as part 1/4 shows, it
> calls targetm.loop_unroll_adjust if target supports, which used to
> work on RTL level.  To avoid possible ICE, I'm intended to turn it
> off for those targets (s390 & i386) with that hook, since without good
> understanding on those targets, it's hard for me to extend them with
> gimple level support.  Does it make sense?
> 
> The updated patch has been attached.
> 
> BR,
> Kewen
> ---------
> 
> gcc/ChangeLog
> 
> 2020-03-03  Kewen Lin  <linkw@gcc.gnu.org>
> 
> 	* doc/invoke.texi (iv-consider-reg-offset-for-unroll): Document new option.
> 	* params.opt (iv-consider-reg-offset-for-unroll): New.
> 	* config/s390/s390.c (s390_option_override_internal): Disable parameter
> 	iv-consider-reg-offset-for-unroll by default.
> 	* config/i386/i386-options.c (ix86_option_override_internal): Likewise.
>
Kewen.Lin May 28, 2020, 2:17 a.m. UTC | #8
Hi,

Gentle ping patches as below:

1/4 v3 https://gcc.gnu.org/pipermail/gcc-patches/2020-February/540171.html
2/4 v3 https://gcc.gnu.org/pipermail/gcc-patches/2020-March/541387.html
3/4 v3 https://gcc.gnu.org/pipermail/gcc-patches/2020-May/545643.html

Or shall I ping them seperately?

Thanks!
Kewen

on 2020/5/13 下午1:50, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 
> I'd like to ping this patch as well as its sblings.  Thanks in advance.
> 
> 1/4 v3 https://gcc.gnu.org/pipermail/gcc-patches/2020-February/540171.html
> 2/4 v3 https://gcc.gnu.org/pipermail/gcc-patches/2020-March/541387.html
> 3/4 v3 https://gcc.gnu.org/pipermail/gcc-patches/2020-May/545643.html
> 
> BR,
> Kewen
> 
> on 2020/3/3 下午8:25, Kewen.Lin wrote:
>> Hi Richard,
>>
>> Thanks for your comments!  It's a good idea to use param due to the
>> flexibility.  And yes, it sounds good to have more targets to try and
>> make it better.  But I have a bit concern on turning it on by default.
>> Since it replies on unroll factor estimation, as part 1/4 shows, it
>> calls targetm.loop_unroll_adjust if target supports, which used to
>> work on RTL level.  To avoid possible ICE, I'm intended to turn it
>> off for those targets (s390 & i386) with that hook, since without good
>> understanding on those targets, it's hard for me to extend them with
>> gimple level support.  Does it make sense?
>>
>> The updated patch has been attached.
>>
>> BR,
>> Kewen
>> ---------
>>
>> gcc/ChangeLog
>>
>> 2020-03-03  Kewen Lin  <linkw@gcc.gnu.org>
>>
>> 	* doc/invoke.texi (iv-consider-reg-offset-for-unroll): Document new option.
>> 	* params.opt (iv-consider-reg-offset-for-unroll): New.
>> 	* config/s390/s390.c (s390_option_override_internal): Disable parameter
>> 	iv-consider-reg-offset-for-unroll by default.
>> 	* config/i386/i386-options.c (ix86_option_override_internal): Likewise.
>>
Richard Sandiford May 28, 2020, 10:54 a.m. UTC | #9
"Kewen.Lin" <linkw@linux.ibm.com> writes:
> Hi,
>
> Gentle ping patches as below:
>
> 1/4 v3 https://gcc.gnu.org/pipermail/gcc-patches/2020-February/540171.html
> 2/4 v3 https://gcc.gnu.org/pipermail/gcc-patches/2020-March/541387.html
> 3/4 v3 https://gcc.gnu.org/pipermail/gcc-patches/2020-May/545643.html
>
> Or shall I ping them seperately?

Can you repost the full series?

Thanks,
Richard
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 0dabaa6..1e41fcf 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1657,6 +1657,9 @@  static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_PREDICT_DOLOOP_P
 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
 
+#undef TARGET_STRIDE_DFORM_VALID_P
+#define TARGET_STRIDE_DFORM_VALID_P rs6000_stride_dform_valid_p
+
 #undef TARGET_HAVE_COUNT_REG_DECR_P
 #define TARGET_HAVE_COUNT_REG_DECR_P true
 
@@ -26272,6 +26275,43 @@  rs6000_predict_doloop_p (struct loop *loop)
   return true;
 }
 
+/* Return true if the memory access with mode MODE, signedness SIGNED_P and
+   store STORE_P with offset from 0 to (NUNROLL-1) * STRIDE are valid with
+   D-form instructions.  */
+
+static bool
+rs6000_stride_dform_valid_p (machine_mode mode, signed HOST_WIDE_INT stride,
+			  bool signed_p, bool store_p, unsigned nunroll)
+{
+  static const HOST_WIDE_INT max_bound = 0x7fff;
+  static const HOST_WIDE_INT min_bound = -0x8000;
+
+  if (!IN_RANGE ((nunroll - 1) * stride, min_bound, max_bound))
+    return false;
+
+  /* Check DQ-form for vector mode or float128 mode.  */
+  if (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
+    {
+      if (mode_supports_dq_form (mode) && !(stride & 0xF))
+	return true;
+      else
+	return false;
+    }
+
+  /* Simply consider non VSX instructions.  */
+  if (mode == QImode || mode == HImode || mode == SFmode || mode == DFmode)
+    return true;
+
+  /* lwz/stw is D-form, but lwa is DS-form.  */
+  if (mode == SImode && (!signed_p || store_p || !(stride & 0x03)))
+    return true;
+
+  if (mode == DImode && !(stride & 0x03))
+    return true;
+
+  return false;
+}
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rs6000.h"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 86ad278..0b8bc7c 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11669,6 +11669,14 @@  function version at run-time for a given set of function versions.
 body must be generated.
 @end deftypefn
 
+@deftypefn {Target Hook} bool TARGET_STRIDE_DFORM_VALID_P (machine_mode @var{mode}, signed HOST_WIDE_INT @var{stride}, bool @var{signed_p}, bool @var{store_p}, unsigned @var{nunroll})
+For a given memory access, check whether it is valid to put 0, @var{stride}
+, 2 * @var{stride}, ... , (@var{nunroll} - 1) to the instruction D-form
+displacement, with mode @var{mode}, signedness @var{signed_p} and store
+@var{store_p}.  Return true if valid.
+The default version of this hook returns false.
+@end deftypefn
+
 @deftypefn {Target Hook} bool TARGET_PREDICT_DOLOOP_P (class loop *@var{loop})
 Return true if we can predict it is possible to use a low-overhead loop
 for a particular loop.  The parameter @var{loop} is a pointer to the loop.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index fd9769e..e90d020 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7953,6 +7953,8 @@  to by @var{ce_info}.
 
 @hook TARGET_GENERATE_VERSION_DISPATCHER_BODY
 
+@hook TARGET_STRIDE_DFORM_VALID_P
+
 @hook TARGET_PREDICT_DOLOOP_P
 
 @hook TARGET_HAVE_COUNT_REG_DECR_P
diff --git a/gcc/target.def b/gcc/target.def
index f61c831..ee19a8d 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4300,7 +4300,18 @@  DEFHOOK
  emits a @code{speculation_barrier} instruction if that is defined.",
 rtx, (machine_mode mode, rtx result, rtx val, rtx failval),
  default_speculation_safe_value)
- 
+
+DEFHOOK
+(stride_dform_valid_p,
+ "For a given memory access, check whether it is valid to put 0, @var{stride}\n\
+, 2 * @var{stride}, ... , (@var{nunroll} - 1) to the instruction D-form\n\
+displacement, with mode @var{mode}, signedness @var{signed_p} and store\n\
+@var{store_p}.  Return true if valid.\n\
+The default version of this hook returns false.",
+ bool, (machine_mode mode, signed HOST_WIDE_INT stride, bool signed_p,
+ bool store_p, unsigned nunroll),
+ NULL)
+
 DEFHOOK
 (predict_doloop_p,
  "Return true if we can predict it is possible to use a low-overhead loop\n\