diff mbox series

[v1,2/2] RISC-V: Add instruction fusion (for ventana-vt1)

Message ID 20211114214757.3190803-3-philipp.tomsich@vrull.eu
State New
Headers show
Series Basic support for the Ventana VT1 w/ instruction fusion | expand

Commit Message

Philipp Tomsich Nov. 14, 2021, 9:47 p.m. UTC
From: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>

The Ventana VT1 core supports quad-issue and instruction fusion.
This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
together and adds idiom matcheing for the supported fusion cases.

gcc/ChangeLog:

	* config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
	constants to identify supported fusion patterns.
	(struct riscv_tune_param): Add fusible_op field.
	(riscv_macro_fusion_p): Implement.
	(riscv_fusion_enabled_p): Implement.
	(riscv_macro_fusion_pair_p): Implement and recoginze fusible
	idioms for Ventana VT1.
	(TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
	(TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to riscv_macro_fusion_pair_p.

Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
---

 gcc/config/riscv/riscv.c | 196 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 196 insertions(+)

Comments

Kito Cheng Nov. 17, 2021, 2:05 p.m. UTC | #1
Hi Philipp:

Thanks for the patch, I like this approach, that can easily configure
different capabilities for each core :)

So there are only a few minor comments for this patch.

On Mon, Nov 15, 2021 at 5:49 AM Philipp Tomsich
<philipp.tomsich@vrull.eu> wrote:
>
> From: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
>
> The Ventana VT1 core supports quad-issue and instruction fusion.
> This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
> together and adds idiom matcheing for the supported fusion cases.
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
>         constants to identify supported fusion patterns.
>         (struct riscv_tune_param): Add fusible_op field.
>         (riscv_macro_fusion_p): Implement.
>         (riscv_fusion_enabled_p): Implement.
>         (riscv_macro_fusion_pair_p): Implement and recoginze fusible
>         idioms for Ventana VT1.
>         (TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
>         (TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to riscv_macro_fusion_pair_p.
>
> Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
> ---
>
>  gcc/config/riscv/riscv.c | 196 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 196 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index 6b918db65e9..8eac52101a3 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -211,6 +211,19 @@ struct riscv_integer_op {
>     The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
>  #define RISCV_MAX_INTEGER_OPS 8
>
> +enum riscv_fusion_pairs
> +{
> +  RISCV_FUSE_NOTHING = 0,
> +  RISCV_FUSE_ZEXTW = (1 << 0),
> +  RISCV_FUSE_ZEXTH = (1 << 1),
> +  RISCV_FUSE_ZEXTWS = (1 << 2),
> +  RISCV_FUSE_LDINDEXED = (1 << 3),

RISCV_FUSE_LDINDEXED -> RISCV_FUSE_LD_INDEXED

Could you add some comment for above enums, like that:
/* slli rx, rx, 32 + srli rx, rx, 32 */
RISCV_FUSE_ZEXTW

So that we could know what kind of instruction will be funded for this enum.

> +  RISCV_FUSE_LUI_ADDI = (1 << 4),
> +  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
> +  RISCV_FUSE_LUI_LD = (1 << 6),
> +  RISCV_FUSE_AUIPC_LD = (1 << 7),
> +};
> +
>  /* Costs of various operations on the different architectures.  */
>
>  struct riscv_tune_param
> @@ -224,6 +237,7 @@ struct riscv_tune_param
>    unsigned short branch_cost;
>    unsigned short memory_cost;
>    bool slow_unaligned_access;
> +  unsigned int fusible_ops;
>  };
>
>  /* Information about one micro-arch we know about.  */
> @@ -289,6 +303,7 @@ static const struct riscv_tune_param rocket_tune_info = {
>    3,                                           /* branch_cost */
>    5,                                           /* memory_cost */
>    true,                                                /* slow_unaligned_access */
> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
>  };
>
>  /* Costs to use when optimizing for Sifive 7 Series.  */
> @@ -302,6 +317,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
>    4,                                           /* branch_cost */
>    3,                                           /* memory_cost */
>    true,                                                /* slow_unaligned_access */
> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
>  };
>
>  /* Costs to use when optimizing for T-HEAD c906.  */
> @@ -328,6 +344,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
>    1,                                           /* branch_cost */
>    2,                                           /* memory_cost */
>    false,                                       /* slow_unaligned_access */
> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
>  };
>
>  /* Costs to use when optimizing for Ventana Micro VT1.  */
> @@ -341,6 +358,10 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
>    4,                                           /* branch_cost */
>    5,                                           /* memory_cost */
>    false,                                       /* slow_unaligned_access */
> +  ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
> +    RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
> +    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
> +    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
>  };
>
>  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> @@ -4909,6 +4930,177 @@ riscv_issue_rate (void)
>    return tune_param->issue_rate;
>  }
>
> +/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
> +   instruction fusion of some sort.  */
> +
> +static bool
> +riscv_macro_fusion_p (void)
> +{
> +  return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
> +}
> +
> +/* Return true iff the instruction fusion described by OP is enabled.  */
> +
> +static bool
> +riscv_fusion_enabled_p(enum riscv_fusion_pairs op)

space between function name and parentheses.

riscv_fusion_enabled_p (enum riscv_fusion_pairs op)

> +{
> +  return tune_param->fusible_ops & op;
> +}
> +
> +/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
> +   should be kept together during scheduling.  */
> +
> +static bool
> +riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
> +{
> +  rtx prev_set = single_set (prev);
> +  rtx curr_set = single_set (curr);
> +  /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
> +  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
> +
> +  if (!riscv_macro_fusion_p ())
> +    return false;
> +
> +  if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) ||
> +                       riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)))
> +    {
> +      /* We are trying to match the following:
> +          prev (slli) == (set (reg:DI rD)
> +                              (ashift:DI (reg:DI rS) (const_int 32)))
> +          curr (slri) == (set (reg:DI rD)

slri -> srli

> +                              (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
> +        with <shift> being either 32 for FUSE_ZEXTW, or
> +                        `less than 32 for FUSE_ZEXTWS. */

`less <-- I guess the ` is kind of an accident?

> +
> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
> +         && REG_P (SET_DEST (prev_set))
> +         && REG_P (SET_DEST (curr_set))
> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
> +         && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
> +               && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )

space between function name and parentheses.

> +             || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
> +                  && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))

space between function name and parentheses.

> +       return true;
> +    }
> +
> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
> +    {
> +      /* We are trying to match the following:
> +          prev (slli) == (set (reg:DI rD)
> +                              (ashift:DI (reg:DI rS) (const_int 48)))
> +          curr (slri) == (set (reg:DI rD)
> +                              (lshiftrt:DI (reg:DI rD) (const_int 48))) */

slri -> srli

> +
> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
> +         && REG_P (SET_DEST (prev_set))
> +         && REG_P (SET_DEST (curr_set))
> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
> +         && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
> +       return true;
> +    }
> +
> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
> +    {
> +      /* We are trying to match the following:
> +          prev (add) == (set (reg:DI rD)
> +                             (plus:DI (reg:DI rS1) (reg:DI rS2))
> +          curr (ld)  == (set (reg:DI rD)
> +                             (mem:DI (reg:DI rD))) */
> +
> +      if (MEM_P (SET_SRC (curr_set))
> +         && REG_P (XEXP (SET_SRC (curr_set), 0))
> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
> +       return true;
> +
> +      /* We are trying to match the following:
> +          prev (add) == (set (reg:DI rD)
> +                             (plus:DI (reg:DI rS1) (reg:DI rS2)))
> +          curr (lw)  == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
> +
> +      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
> +          || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
> +         && MEM_P (XEXP (SET_SRC (curr_set), 0))
> +         && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
> +         && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))

This line is over 80 columns.


> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
> +       return true;
> +    }
> +
> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
> +    {
> +      /* We are trying to match the following:
> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
> +          curr (addi) == (set (reg:DI rD)
> +                              (plus:DI (reg:DI rD) (const_int IMM12))) */
> +
> +      if (GET_CODE (SET_SRC (curr_set)) == PLUS
> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))
> +         && CONST_INT_P (SET_SRC (prev_set))
> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))
> +       return true;
> +    }
> +
> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
> +    {
> +      /* We are trying to match the following:
> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
> +          prev (addi)  == (set (reg:DI rD)
> +                               (plus:DI (reg:DI rD) (const_int IMM12))) */
> +
> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
> +         && GET_CODE (SET_SRC (curr_set)) == PLUS
> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))
> +       return true;
> +    }
> +
> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
> +    {
> +      /* We are trying to match the following:
> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
> +          curr (ld)  == (set (reg:DI rD)
> +                             (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
> +
> +      if (CONST_INT_P (SET_SRC (prev_set))
> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
> +         && MEM_P (SET_SRC (curr_set))
> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
> +       return true;
> +    }
> +
> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
> +    {
> +      /* We are trying to match the following:
> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
> +          curr (ld)  == (set (reg:DI rD)
> +                             (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
> +
> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
> +         && MEM_P (SET_SRC (curr_set))
> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
> +       return true;
> +    }
> +
> +  return false;
> +}
> +
>  /* Auxiliary function to emit RISC-V ELF attribute. */
>  static void
>  riscv_emit_attribute ()
> @@ -5676,6 +5868,10 @@ riscv_asan_shadow_offset (void)
>
>  #undef TARGET_SCHED_ISSUE_RATE
>  #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
> +#undef TARGET_SCHED_MACRO_FUSION_P
> +#define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
> +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
> +#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
>
>  #undef TARGET_FUNCTION_OK_FOR_SIBCALL
>  #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
> --
> 2.32.0
>
Palmer Dabbelt Nov. 17, 2021, 7:40 p.m. UTC | #2
[This is my first time trying my Rivos address on the lists, so sorry if 
something goes off the rails.]

On Wed, 17 Nov 2021 06:05:04 PST (-0800), gcc-patches@gcc.gnu.org wrote:
> Hi Philipp:
>
> Thanks for the patch, I like this approach, that can easily configure
> different capabilities for each core :)
>
> So there are only a few minor comments for this patch.
>
> On Mon, Nov 15, 2021 at 5:49 AM Philipp Tomsich
> <philipp.tomsich@vrull.eu> wrote:
>>
>> From: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
>>
>> The Ventana VT1 core supports quad-issue and instruction fusion.
>> This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
>> together and adds idiom matcheing for the supported fusion cases.

There's a typo at "matcheing".

>>
>> gcc/ChangeLog:
>>
>>         * config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
>>         constants to identify supported fusion patterns.
>>         (struct riscv_tune_param): Add fusible_op field.
>>         (riscv_macro_fusion_p): Implement.
>>         (riscv_fusion_enabled_p): Implement.
>>         (riscv_macro_fusion_pair_p): Implement and recoginze fusible
>>         idioms for Ventana VT1.
>>         (TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
>>         (TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to riscv_macro_fusion_pair_p.
>>
>> Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>

This doesn't match the From (though admittedly I'm pretty new to the SoB 
stuff in GCC, so I'm not sure if that's even a rule here).

>> ---
>>
>>  gcc/config/riscv/riscv.c | 196 +++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 196 insertions(+)
>>
>> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
>> index 6b918db65e9..8eac52101a3 100644
>> --- a/gcc/config/riscv/riscv.c
>> +++ b/gcc/config/riscv/riscv.c
>> @@ -211,6 +211,19 @@ struct riscv_integer_op {
>>     The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
>>  #define RISCV_MAX_INTEGER_OPS 8
>>
>> +enum riscv_fusion_pairs
>> +{
>> +  RISCV_FUSE_NOTHING = 0,
>> +  RISCV_FUSE_ZEXTW = (1 << 0),
>> +  RISCV_FUSE_ZEXTH = (1 << 1),
>> +  RISCV_FUSE_ZEXTWS = (1 << 2),
>> +  RISCV_FUSE_LDINDEXED = (1 << 3),
>
> RISCV_FUSE_LDINDEXED -> RISCV_FUSE_LD_INDEXED
>
> Could you add some comment for above enums, like that:
> /* slli rx, rx, 32 + srli rx, rx, 32 */
> RISCV_FUSE_ZEXTW
>
> So that we could know what kind of instruction will be funded for this enum.
>
>> +  RISCV_FUSE_LUI_ADDI = (1 << 4),
>> +  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
>> +  RISCV_FUSE_LUI_LD = (1 << 6),
>> +  RISCV_FUSE_AUIPC_LD = (1 << 7),
>> +};
>> +
>>  /* Costs of various operations on the different architectures.  */
>>
>>  struct riscv_tune_param
>> @@ -224,6 +237,7 @@ struct riscv_tune_param
>>    unsigned short branch_cost;
>>    unsigned short memory_cost;
>>    bool slow_unaligned_access;
>> +  unsigned int fusible_ops;
>>  };
>>
>>  /* Information about one micro-arch we know about.  */
>> @@ -289,6 +303,7 @@ static const struct riscv_tune_param rocket_tune_info = {
>>    3,                                           /* branch_cost */
>>    5,                                           /* memory_cost */
>>    true,                                                /* slow_unaligned_access */
>> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
>>  };

There's some tab/space issues here (and in the below ones).  They align 
when merged, but the new lines are spaces-only and the old ones have 
internal spaces mixed with tabs (IIRC that's to the GCC style, if not we 
should fix these to at least be consistent).

>>
>>  /* Costs to use when optimizing for Sifive 7 Series.  */
>> @@ -302,6 +317,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
>>    4,                                           /* branch_cost */
>>    3,                                           /* memory_cost */
>>    true,                                                /* slow_unaligned_access */
>> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
>>  };
>>
>>  /* Costs to use when optimizing for T-HEAD c906.  */
>> @@ -328,6 +344,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
>>    1,                                           /* branch_cost */
>>    2,                                           /* memory_cost */
>>    false,                                       /* slow_unaligned_access */
>> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
>>  };
>>
>>  /* Costs to use when optimizing for Ventana Micro VT1.  */
>> @@ -341,6 +358,10 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
>>    4,                                           /* branch_cost */
>>    5,                                           /* memory_cost */
>>    false,                                       /* slow_unaligned_access */
>> +  ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
>> +    RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
>> +    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
>> +    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
>>  };
>>
>>  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
>> @@ -4909,6 +4930,177 @@ riscv_issue_rate (void)
>>    return tune_param->issue_rate;
>>  }
>>
>> +/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
>> +   instruction fusion of some sort.  */
>> +
>> +static bool
>> +riscv_macro_fusion_p (void)
>> +{
>> +  return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
>> +}
>> +
>> +/* Return true iff the instruction fusion described by OP is enabled.  */
>> +
>> +static bool
>> +riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
>
> space between function name and parentheses.
>
> riscv_fusion_enabled_p (enum riscv_fusion_pairs op)
>
>> +{
>> +  return tune_param->fusible_ops & op;
>> +}
>> +
>> +/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
>> +   should be kept together during scheduling.  */
>> +
>> +static bool
>> +riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
>> +{
>> +  rtx prev_set = single_set (prev);
>> +  rtx curr_set = single_set (curr);
>> +  /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
>> +  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
>> +
>> +  if (!riscv_macro_fusion_p ())
>> +    return false;
>> +
>> +  if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) ||
>> +                       riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (slli) == (set (reg:DI rD)
>> +                              (ashift:DI (reg:DI rS) (const_int 32)))
>> +          curr (slri) == (set (reg:DI rD)

This really jumps out as the sort of thing that should be in RTL, but 
I'm not sure that's feasible.  The short branch->cmov optimization we're 
doing for the SiFive 7 series is sort of fusion and done in RTL, but the 
mechanism is very different there so it wouldn't apply directly.  There 
are other ports doing it this way so I'm not opposed to merging this 
as-is, but if there's a straight-forward way to port this to RTL then 
it's probably be a lot saner to maintain in the long term.  I can't come 
up with anything sane, though.

On the subject of maintainability: I don't see any tests for this.  
Given that we're exceedingly likely to end up with other targets that 
have front-end fusion and thus try to share this code, it'd be really 
nice to have either some documentation of what fusions this core 
performs or some tests that exercise the important ones.  I can't find 
any other references to "Ventana VT1" on the internet, but LMK if 
there's something I'm missing.

I'm not sure lacking tests should block merging this (as it's not like 
we have much optimization-based testing for the RISC-V port), but 
without at least some public documentation it's going to be hard to 
produce those outside of Ventana -- essentially I'm worried about ending 
up with the same black-box pipeline problem we have for the C906.

I seem to remember having tests for the SiFive optimization, but I 
couldn't find any when I looked.  Maybe Jim or Kito remembers where to 
find them, otherwise I'll throw some together.

>
> slri -> srli
>
>> +                              (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
>> +        with <shift> being either 32 for FUSE_ZEXTW, or
>> +                        `less than 32 for FUSE_ZEXTWS. */
>
> `less <-- I guess the ` is kind of an accident?
>
>> +
>> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
>> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
>> +         && REG_P (SET_DEST (prev_set))
>> +         && REG_P (SET_DEST (curr_set))
>> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
>> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
>> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
>> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
>> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
>> +         && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
>> +               && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
>
> space between function name and parentheses.
>
>> +             || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
>> +                  && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
>
> space between function name and parentheses.
>
>> +       return true;
>> +    }
>> +
>> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (slli) == (set (reg:DI rD)
>> +                              (ashift:DI (reg:DI rS) (const_int 48)))
>> +          curr (slri) == (set (reg:DI rD)
>> +                              (lshiftrt:DI (reg:DI rD) (const_int 48))) */
>
> slri -> srli
>
>> +
>> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
>> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
>> +         && REG_P (SET_DEST (prev_set))
>> +         && REG_P (SET_DEST (curr_set))
>> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
>> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
>> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
>> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
>> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
>> +         && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
>> +       return true;
>> +    }
>> +
>> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (add) == (set (reg:DI rD)
>> +                             (plus:DI (reg:DI rS1) (reg:DI rS2))
>> +          curr (ld)  == (set (reg:DI rD)
>> +                             (mem:DI (reg:DI rD))) */
>> +
>> +      if (MEM_P (SET_SRC (curr_set))
>> +         && REG_P (XEXP (SET_SRC (curr_set), 0))
>> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
>> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
>> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
>> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
>> +       return true;
>> +
>> +      /* We are trying to match the following:
>> +          prev (add) == (set (reg:DI rD)
>> +                             (plus:DI (reg:DI rS1) (reg:DI rS2)))
>> +          curr (lw)  == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
>> +
>> +      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
>> +          || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
>> +         && MEM_P (XEXP (SET_SRC (curr_set), 0))
>> +         && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
>> +         && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
>
> This line is over 80 columns.
>
>
>> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
>> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
>> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
>> +       return true;
>> +    }
>> +
>> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
>> +          curr (addi) == (set (reg:DI rD)
>> +                              (plus:DI (reg:DI rD) (const_int IMM12))) */
>> +
>> +      if (GET_CODE (SET_SRC (curr_set)) == PLUS
>> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
>> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))
>> +         && CONST_INT_P (SET_SRC (prev_set))
>> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))
>> +       return true;
>> +    }
>> +
>> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
>> +          prev (addi)  == (set (reg:DI rD)
>> +                               (plus:DI (reg:DI rD) (const_int IMM12))) */
>> +
>> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
>> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
>> +         && GET_CODE (SET_SRC (curr_set)) == PLUS
>> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))
>> +       return true;
>> +    }
>> +
>> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
>> +          curr (ld)  == (set (reg:DI rD)
>> +                             (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
>> +
>> +      if (CONST_INT_P (SET_SRC (prev_set))
>> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
>> +         && MEM_P (SET_SRC (curr_set))
>> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
>> +       return true;
>> +    }
>> +
>> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
>> +          curr (ld)  == (set (reg:DI rD)
>> +                             (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
>> +
>> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
>> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
>> +         && MEM_P (SET_SRC (curr_set))
>> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
>> +       return true;
>> +    }
>> +
>> +  return false;
>> +}
>> +
>>  /* Auxiliary function to emit RISC-V ELF attribute. */
>>  static void
>>  riscv_emit_attribute ()
>> @@ -5676,6 +5868,10 @@ riscv_asan_shadow_offset (void)
>>
>>  #undef TARGET_SCHED_ISSUE_RATE
>>  #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
>> +#undef TARGET_SCHED_MACRO_FUSION_P
>> +#define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
>> +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
>> +#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
>>
>>  #undef TARGET_FUNCTION_OK_FOR_SIBCALL
>>  #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
>> --
>> 2.32.0
>>
Philipp Tomsich Nov. 17, 2021, 7:44 p.m. UTC | #3
On Wed, 17 Nov 2021 at 20:40, Palmer Dabbelt <palmer@rivosinc.com> wrote:

> [This is my first time trying my Rivos address on the lists, so sorry if
> something goes off the rails.]
>
> On Wed, 17 Nov 2021 06:05:04 PST (-0800), gcc-patches@gcc.gnu.org wrote:
> > Hi Philipp:
> >
> > Thanks for the patch, I like this approach, that can easily configure
> > different capabilities for each core :)
> >
> > So there are only a few minor comments for this patch.
> >
> > On Mon, Nov 15, 2021 at 5:49 AM Philipp Tomsich
> > <philipp.tomsich@vrull.eu> wrote:
> >>
> >> From: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
> >>
> >> The Ventana VT1 core supports quad-issue and instruction fusion.
> >> This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
> >> together and adds idiom matcheing for the supported fusion cases.
>
> There's a typo at "matcheing".
>
> >>
> >> gcc/ChangeLog:
> >>
> >>         * config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
> >>         constants to identify supported fusion patterns.
> >>         (struct riscv_tune_param): Add fusible_op field.
> >>         (riscv_macro_fusion_p): Implement.
> >>         (riscv_fusion_enabled_p): Implement.
> >>         (riscv_macro_fusion_pair_p): Implement and recoginze fusible
> >>         idioms for Ventana VT1.
> >>         (TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
> >>         (TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to
> riscv_macro_fusion_pair_p.
> >>
> >> Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
>
> This doesn't match the From (though admittedly I'm pretty new to the SoB
> stuff in GCC, so I'm not sure if that's even a rule here).
>

I noticed that I hadn't reset the authors and that patman had inserted a
Signed-off-by: for that reason, right after I sent this out.
Given that it's all me and there's both individual assignment paperwork and
company disclaimers on file for all of the email-addresses, this should be
fine.

>> ---
> >>
> >>  gcc/config/riscv/riscv.c | 196 +++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 196 insertions(+)
> >>
> >> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> >> index 6b918db65e9..8eac52101a3 100644
> >> --- a/gcc/config/riscv/riscv.c
> >> +++ b/gcc/config/riscv/riscv.c
> >> @@ -211,6 +211,19 @@ struct riscv_integer_op {
> >>     The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
> >>  #define RISCV_MAX_INTEGER_OPS 8
> >>
> >> +enum riscv_fusion_pairs
> >> +{
> >> +  RISCV_FUSE_NOTHING = 0,
> >> +  RISCV_FUSE_ZEXTW = (1 << 0),
> >> +  RISCV_FUSE_ZEXTH = (1 << 1),
> >> +  RISCV_FUSE_ZEXTWS = (1 << 2),
> >> +  RISCV_FUSE_LDINDEXED = (1 << 3),
> >
> > RISCV_FUSE_LDINDEXED -> RISCV_FUSE_LD_INDEXED
> >
> > Could you add some comment for above enums, like that:
> > /* slli rx, rx, 32 + srli rx, rx, 32 */
> > RISCV_FUSE_ZEXTW
> >
> > So that we could know what kind of instruction will be funded for this
> enum.
> >
> >> +  RISCV_FUSE_LUI_ADDI = (1 << 4),
> >> +  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
> >> +  RISCV_FUSE_LUI_LD = (1 << 6),
> >> +  RISCV_FUSE_AUIPC_LD = (1 << 7),
> >> +};
> >> +
> >>  /* Costs of various operations on the different architectures.  */
> >>
> >>  struct riscv_tune_param
> >> @@ -224,6 +237,7 @@ struct riscv_tune_param
> >>    unsigned short branch_cost;
> >>    unsigned short memory_cost;
> >>    bool slow_unaligned_access;
> >> +  unsigned int fusible_ops;
> >>  };
> >>
> >>  /* Information about one micro-arch we know about.  */
> >> @@ -289,6 +303,7 @@ static const struct riscv_tune_param
> rocket_tune_info = {
> >>    3,                                           /* branch_cost */
> >>    5,                                           /* memory_cost */
> >>    true,                                                /*
> slow_unaligned_access */
> >> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
> >>  };
>
> There's some tab/space issues here (and in the below ones).  They align
> when merged, but the new lines are spaces-only and the old ones have
> internal spaces mixed with tabs (IIRC that's to the GCC style, if not we
> should fix these to at least be consistent).
>
> >>
> >>  /* Costs to use when optimizing for Sifive 7 Series.  */
> >> @@ -302,6 +317,7 @@ static const struct riscv_tune_param
> sifive_7_tune_info = {
> >>    4,                                           /* branch_cost */
> >>    3,                                           /* memory_cost */
> >>    true,                                                /*
> slow_unaligned_access */
> >> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
> >>  };
> >>
> >>  /* Costs to use when optimizing for T-HEAD c906.  */
> >> @@ -328,6 +344,7 @@ static const struct riscv_tune_param
> optimize_size_tune_info = {
> >>    1,                                           /* branch_cost */
> >>    2,                                           /* memory_cost */
> >>    false,                                       /*
> slow_unaligned_access */
> >> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
> >>  };
> >>
> >>  /* Costs to use when optimizing for Ventana Micro VT1.  */
> >> @@ -341,6 +358,10 @@ static const struct riscv_tune_param
> ventana_vt1_tune_info = {
> >>    4,                                           /* branch_cost */
> >>    5,                                           /* memory_cost */
> >>    false,                                       /*
> slow_unaligned_access */
> >> +  ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
> >> +    RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
> >> +    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
> >> +    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
> >>  };
> >>
> >>  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int,
> bool *);
> >> @@ -4909,6 +4930,177 @@ riscv_issue_rate (void)
> >>    return tune_param->issue_rate;
> >>  }
> >>
> >> +/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target
> supports
> >> +   instruction fusion of some sort.  */
> >> +
> >> +static bool
> >> +riscv_macro_fusion_p (void)
> >> +{
> >> +  return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
> >> +}
> >> +
> >> +/* Return true iff the instruction fusion described by OP is enabled.
> */
> >> +
> >> +static bool
> >> +riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
> >
> > space between function name and parentheses.
> >
> > riscv_fusion_enabled_p (enum riscv_fusion_pairs op)
> >
> >> +{
> >> +  return tune_param->fusible_ops & op;
> >> +}
> >> +
> >> +/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV
> and CURR
> >> +   should be kept together during scheduling.  */
> >> +
> >> +static bool
> >> +riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
> >> +{
> >> +  rtx prev_set = single_set (prev);
> >> +  rtx curr_set = single_set (curr);
> >> +  /* prev and curr are simple SET insns i.e. no flag setting or
> branching.  */
> >> +  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
> >> +
> >> +  if (!riscv_macro_fusion_p ())
> >> +    return false;
> >> +
> >> +  if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) ||
> >> +                       riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (slli) == (set (reg:DI rD)
> >> +                              (ashift:DI (reg:DI rS) (const_int 32)))
> >> +          curr (slri) == (set (reg:DI rD)
>
> This really jumps out as the sort of thing that should be in RTL, but
> I'm not sure that's feasible.  The short branch->cmov optimization we're
> doing for the SiFive 7 series is sort of fusion and done in RTL, but the
> mechanism is very different there so it wouldn't apply directly.  There
> are other ports doing it this way so I'm not opposed to merging this
> as-is, but if there's a straight-forward way to port this to RTL then
> it's probably be a lot saner to maintain in the long term.  I can't come
> up with anything sane, though.
>
> On the subject of maintainability: I don't see any tests for this.
> Given that we're exceedingly likely to end up with other targets that
> have front-end fusion and thus try to share this code, it'd be really
> nice to have either some documentation of what fusions this core
> performs or some tests that exercise the important ones.  I can't find
> any other references to "Ventana VT1" on the internet, but LMK if
> there's something I'm missing.
>
> I'm not sure lacking tests should block merging this (as it's not like
> we have much optimization-based testing for the RISC-V port), but
> without at least some public documentation it's going to be hard to
> produce those outside of Ventana -- essentially I'm worried about ending
> up with the same black-box pipeline problem we have for the C906.
>
> I seem to remember having tests for the SiFive optimization, but I
> couldn't find any when I looked.  Maybe Jim or Kito remembers where to
> find them, otherwise I'll throw some together.
>
> >
> > slri -> srli
> >
> >> +                              (lshiftrt:DI (reg:DI rD) (const_int
> <shift>)))
> >> +        with <shift> being either 32 for FUSE_ZEXTW, or
> >> +                        `less than 32 for FUSE_ZEXTWS. */
> >
> > `less <-- I guess the ` is kind of an accident?
> >
> >> +
> >> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
> >> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
> >> +         && REG_P (SET_DEST (prev_set))
> >> +         && REG_P (SET_DEST (curr_set))
> >> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
> >> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST
> (curr_set))
> >> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
> >> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> >> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
> >> +         && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
> >> +               && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
> >
> > space between function name and parentheses.
> >
> >> +             || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
> >> +                  && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
> >
> > space between function name and parentheses.
> >
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (slli) == (set (reg:DI rD)
> >> +                              (ashift:DI (reg:DI rS) (const_int 48)))
> >> +          curr (slri) == (set (reg:DI rD)
> >> +                              (lshiftrt:DI (reg:DI rD) (const_int
> 48))) */
> >
> > slri -> srli
> >
> >> +
> >> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
> >> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
> >> +         && REG_P (SET_DEST (prev_set))
> >> +         && REG_P (SET_DEST (curr_set))
> >> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
> >> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST
> (curr_set))
> >> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
> >> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> >> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
> >> +         && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (add) == (set (reg:DI rD)
> >> +                             (plus:DI (reg:DI rS1) (reg:DI rS2))
> >> +          curr (ld)  == (set (reg:DI rD)
> >> +                             (mem:DI (reg:DI rD))) */
> >> +
> >> +      if (MEM_P (SET_SRC (curr_set))
> >> +         && REG_P (XEXP (SET_SRC (curr_set), 0))
> >> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST
> (prev_set))
> >> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
> >> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
> >> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
> >> +       return true;
> >> +
> >> +      /* We are trying to match the following:
> >> +          prev (add) == (set (reg:DI rD)
> >> +                             (plus:DI (reg:DI rS1) (reg:DI rS2)))
> >> +          curr (lw)  == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
> >> +
> >> +      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
> >> +          || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
> >> +         && MEM_P (XEXP (SET_SRC (curr_set), 0))
> >> +         && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
> >> +         && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO
> (SET_DEST (prev_set))
> >
> > This line is over 80 columns.
> >
> >
> >> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
> >> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
> >> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
> >> +          curr (addi) == (set (reg:DI rD)
> >> +                              (plus:DI (reg:DI rD) (const_int IMM12)))
> */
> >> +
> >> +      if (GET_CODE (SET_SRC (curr_set)) == PLUS
> >> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> >> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))
> >> +         && CONST_INT_P (SET_SRC (prev_set))
> >> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...]
> UNSPEC_AUIPC))
> >> +          prev (addi)  == (set (reg:DI rD)
> >> +                               (plus:DI (reg:DI rD) (const_int
> IMM12))) */
> >> +
> >> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
> >> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
> >> +         && GET_CODE (SET_SRC (curr_set)) == PLUS
> >> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
> >> +          curr (ld)  == (set (reg:DI rD)
> >> +                             (mem:DI (plus:DI (reg:DI rD) (const_int
> IMM12)))) */
> >> +
> >> +      if (CONST_INT_P (SET_SRC (prev_set))
> >> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
> >> +         && MEM_P (SET_SRC (curr_set))
> >> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...]
> UNSPEC_AUIPC))
> >> +          curr (ld)  == (set (reg:DI rD)
> >> +                             (mem:DI (plus:DI (reg:DI rD) (const_int
> IMM12)))) */
> >> +
> >> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
> >> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
> >> +         && MEM_P (SET_SRC (curr_set))
> >> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
> >> +       return true;
> >> +    }
> >> +
> >> +  return false;
> >> +}
> >> +
> >>  /* Auxiliary function to emit RISC-V ELF attribute. */
> >>  static void
> >>  riscv_emit_attribute ()
> >> @@ -5676,6 +5868,10 @@ riscv_asan_shadow_offset (void)
> >>
> >>  #undef TARGET_SCHED_ISSUE_RATE
> >>  #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
> >> +#undef TARGET_SCHED_MACRO_FUSION_P
> >> +#define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
> >> +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
> >> +#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
> >>
> >>  #undef TARGET_FUNCTION_OK_FOR_SIBCALL
> >>  #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
> >> --
> >> 2.32.0
> >>
>
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 6b918db65e9..8eac52101a3 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -211,6 +211,19 @@  struct riscv_integer_op {
    The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
 #define RISCV_MAX_INTEGER_OPS 8
 
+enum riscv_fusion_pairs
+{
+  RISCV_FUSE_NOTHING = 0,
+  RISCV_FUSE_ZEXTW = (1 << 0),
+  RISCV_FUSE_ZEXTH = (1 << 1),
+  RISCV_FUSE_ZEXTWS = (1 << 2),
+  RISCV_FUSE_LDINDEXED = (1 << 3),
+  RISCV_FUSE_LUI_ADDI = (1 << 4),
+  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
+  RISCV_FUSE_LUI_LD = (1 << 6),
+  RISCV_FUSE_AUIPC_LD = (1 << 7),
+};
+
 /* Costs of various operations on the different architectures.  */
 
 struct riscv_tune_param
@@ -224,6 +237,7 @@  struct riscv_tune_param
   unsigned short branch_cost;
   unsigned short memory_cost;
   bool slow_unaligned_access;
+  unsigned int fusible_ops;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -289,6 +303,7 @@  static const struct riscv_tune_param rocket_tune_info = {
   3,						/* branch_cost */
   5,						/* memory_cost */
   true,						/* slow_unaligned_access */
+  RISCV_FUSE_NOTHING,                           /* fusible_ops */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -302,6 +317,7 @@  static const struct riscv_tune_param sifive_7_tune_info = {
   4,						/* branch_cost */
   3,						/* memory_cost */
   true,						/* slow_unaligned_access */
+  RISCV_FUSE_NOTHING,                           /* fusible_ops */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -328,6 +344,7 @@  static const struct riscv_tune_param optimize_size_tune_info = {
   1,						/* branch_cost */
   2,						/* memory_cost */
   false,					/* slow_unaligned_access */
+  RISCV_FUSE_NOTHING,                           /* fusible_ops */
 };
 
 /* Costs to use when optimizing for Ventana Micro VT1.  */
@@ -341,6 +358,10 @@  static const struct riscv_tune_param ventana_vt1_tune_info = {
   4,						/* branch_cost */
   5,						/* memory_cost */
   false,					/* slow_unaligned_access */
+  ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
+    RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
+    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
+    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -4909,6 +4930,177 @@  riscv_issue_rate (void)
   return tune_param->issue_rate;
 }
 
+/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
+   instruction fusion of some sort.  */
+
+static bool
+riscv_macro_fusion_p (void)
+{
+  return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
+}
+
+/* Return true iff the instruction fusion described by OP is enabled.  */
+
+static bool
+riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
+{
+  return tune_param->fusible_ops & op;
+}
+
+/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
+   should be kept together during scheduling.  */
+
+static bool
+riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+{
+  rtx prev_set = single_set (prev);
+  rtx curr_set = single_set (curr);
+  /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
+  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
+
+  if (!riscv_macro_fusion_p ())
+    return false;
+
+  if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) ||
+			riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)))
+    {
+      /* We are trying to match the following:
+	   prev (slli) == (set (reg:DI rD)
+			       (ashift:DI (reg:DI rS) (const_int 32)))
+	   curr (slri) == (set (reg:DI rD)
+			       (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
+	 with <shift> being either 32 for FUSE_ZEXTW, or
+			 `less than 32 for FUSE_ZEXTWS. */
+
+      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
+	  && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
+	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
+	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
+	  && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
+	  && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
+		&& riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
+	      || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
+		   && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
+	return true;
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
+    {
+      /* We are trying to match the following:
+	   prev (slli) == (set (reg:DI rD)
+			       (ashift:DI (reg:DI rS) (const_int 48)))
+	   curr (slri) == (set (reg:DI rD)
+			       (lshiftrt:DI (reg:DI rD) (const_int 48))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
+	  && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
+	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
+	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
+	  && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
+	  && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
+	return true;
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
+    {
+      /* We are trying to match the following:
+	   prev (add) == (set (reg:DI rD)
+			      (plus:DI (reg:DI rS1) (reg:DI rS2))
+	   curr (ld)  == (set (reg:DI rD)
+			      (mem:DI (reg:DI rD))) */
+
+      if (MEM_P (SET_SRC (curr_set))
+	  && REG_P (XEXP (SET_SRC (curr_set), 0))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
+	  && GET_CODE (SET_SRC (prev_set)) == PLUS
+	  && REG_P (XEXP (SET_SRC (prev_set), 0))
+	  && REG_P (XEXP (SET_SRC (prev_set), 1)))
+	return true;
+
+      /* We are trying to match the following:
+	   prev (add) == (set (reg:DI rD)
+			      (plus:DI (reg:DI rS1) (reg:DI rS2)))
+	   curr (lw)  == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
+
+      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
+	   || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
+	  && MEM_P (XEXP (SET_SRC (curr_set), 0))
+	  && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
+	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
+	  && GET_CODE (SET_SRC (prev_set)) == PLUS
+	  && REG_P (XEXP (SET_SRC (prev_set), 0))
+	  && REG_P (XEXP (SET_SRC (prev_set), 1)))
+	return true;
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
+    {
+      /* We are trying to match the following:
+	   prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
+	   curr (addi) == (set (reg:DI rD)
+			       (plus:DI (reg:DI rD) (const_int IMM12))) */
+
+      if (GET_CODE (SET_SRC (curr_set)) == PLUS
+	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
+	  && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))
+	  && CONST_INT_P (SET_SRC (prev_set))
+	  && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))
+	return true;
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
+    {
+      /* We are trying to match the following:
+	   prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
+	   prev (addi)  == (set (reg:DI rD)
+				(plus:DI (reg:DI rD) (const_int IMM12))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
+	  && XINT (prev_set, 1) == UNSPEC_AUIPC
+	  && GET_CODE (SET_SRC (curr_set)) == PLUS
+	  && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))
+	return true;
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
+    {
+      /* We are trying to match the following:
+	   prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
+	   curr (ld)  == (set (reg:DI rD)
+			      (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
+
+      if (CONST_INT_P (SET_SRC (prev_set))
+	  && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
+	  && MEM_P (SET_SRC (curr_set))
+	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
+	return true;
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
+    {
+      /* We are trying to match the following:
+	   prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
+	   curr (ld)  == (set (reg:DI rD)
+			      (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
+	  && XINT (prev_set, 1) == UNSPEC_AUIPC
+	  && MEM_P (SET_SRC (curr_set))
+	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
+	return true;
+    }
+
+  return false;
+}
+
 /* Auxiliary function to emit RISC-V ELF attribute. */
 static void
 riscv_emit_attribute ()
@@ -5676,6 +5868,10 @@  riscv_asan_shadow_offset (void)
 
 #undef TARGET_SCHED_ISSUE_RATE
 #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
 
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall