diff mbox

Force rtl templates to be inlined

Message ID 1409641427-29875-1-git-send-email-andi@firstfloor.org
State New
Headers show

Commit Message

Andi Kleen Sept. 2, 2014, 7:03 a.m. UTC
From: Andi Kleen <ak@linux.intel.com>

I noticed that with the trunk compiler a range of the new rtl
inlines show up as hot in a profiler during stage1. I think
that happens because stage1 is not using optimization
and does not inline plain "inline".  And these rtl inlines
are very frequently called.

Mark them all with __attribute__((always_inline)) which forces
inlining even with -O0.

Passes bootstrap and testing on x86_64-linux.

Cc: dmalcolm@redhat.com

include/:

2014-09-01  Andi Kleen  <ak@linux.intel.com>

	* ansidecl.h (ALWAYS_INLINE): Add.

gcc/:

2014-09-01  Andi Kleen  <ak@linux.intel.com>

	* rtl.h (is_a_helper): Change inline to ALWAYS_INLINE.
	(rhs_regno): Dito.
	(init_costs_to_max): Dito.
	(init_costs_to_zero): Dito.
	(costs_lt_p): Dito.
	(costs_add_n_insns): Dito.
	(wi::int_traits ::get_precision): Dito.
	(wi::shwi): Dito.
	(wi::min_value): Dito.
	(wi::max_value): Dito.
	(set_rtx_cost): Dito.
	(get_full_set_rtx_cost): Dito.
	(set_src_cost): Dito.
	(get_full_set_src_cost): Dito.
	(get_mem_attrs): Dito.
---
 gcc/rtl.h          | 111 +++++++++++++++++++++++++++--------------------------
 include/ansidecl.h |   6 +++
 2 files changed, 62 insertions(+), 55 deletions(-)

Comments

Andrew Pinski Sept. 2, 2014, 7:09 a.m. UTC | #1
On Tue, Sep 2, 2014 at 12:03 AM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> I noticed that with the trunk compiler a range of the new rtl
> inlines show up as hot in a profiler during stage1. I think
> that happens because stage1 is not using optimization
> and does not inline plain "inline".  And these rtl inlines
> are very frequently called.
>
> Mark them all with __attribute__((always_inline)) which forces
> inlining even with -O0.


I think this is wrong and should not be committed.  stage1 is designed
to be without optimization and there have been bugs in the past in the
area of always_inline too.

Thanks,
Andrew Pinski

>
> Passes bootstrap and testing on x86_64-linux.
>
> Cc: dmalcolm@redhat.com
>
> include/:
>
> 2014-09-01  Andi Kleen  <ak@linux.intel.com>
>
>         * ansidecl.h (ALWAYS_INLINE): Add.
>
> gcc/:
>
> 2014-09-01  Andi Kleen  <ak@linux.intel.com>
>
>         * rtl.h (is_a_helper): Change inline to ALWAYS_INLINE.
>         (rhs_regno): Dito.
>         (init_costs_to_max): Dito.
>         (init_costs_to_zero): Dito.
>         (costs_lt_p): Dito.
>         (costs_add_n_insns): Dito.
>         (wi::int_traits ::get_precision): Dito.
>         (wi::shwi): Dito.
>         (wi::min_value): Dito.
>         (wi::max_value): Dito.
>         (set_rtx_cost): Dito.
>         (get_full_set_rtx_cost): Dito.
>         (set_src_cost): Dito.
>         (get_full_set_src_cost): Dito.
>         (get_mem_attrs): Dito.
> ---
>  gcc/rtl.h          | 111 +++++++++++++++++++++++++++--------------------------
>  include/ansidecl.h |   6 +++
>  2 files changed, 62 insertions(+), 55 deletions(-)
>
> diff --git a/gcc/rtl.h b/gcc/rtl.h
> index beeed2f..d711e43 100644
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
>  #define GCC_RTL_H
>
>  #include <utility>
> +#include "ansidecl.h"
>  #include "statistics.h"
>  #include "machmode.h"
>  #include "input.h"
> @@ -418,7 +419,7 @@ public:
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_expr_list *>::test (rtx rt)
>  {
>    return rt->code == EXPR_LIST;
> @@ -447,7 +448,7 @@ public:
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_insn_list *>::test (rtx rt)
>  {
>    return rt->code == INSN_LIST;
> @@ -474,7 +475,7 @@ public:
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_sequence *>::test (rtx rt)
>  {
>    return rt->code == SEQUENCE;
> @@ -482,7 +483,7 @@ is_a_helper <rtx_sequence *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <const rtx_sequence *>::test (const_rtx rt)
>  {
>    return rt->code == SEQUENCE;
> @@ -778,7 +779,7 @@ struct GTY(()) rtvec_def {
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_insn *>::test (rtx rt)
>  {
>    return (INSN_P (rt)
> @@ -790,7 +791,7 @@ is_a_helper <rtx_insn *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <const rtx_insn *>::test (const_rtx rt)
>  {
>    return (INSN_P (rt)
> @@ -802,7 +803,7 @@ is_a_helper <const rtx_insn *>::test (const_rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_debug_insn *>::test (rtx rt)
>  {
>    return DEBUG_INSN_P (rt);
> @@ -810,7 +811,7 @@ is_a_helper <rtx_debug_insn *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_nonjump_insn *>::test (rtx rt)
>  {
>    return NONJUMP_INSN_P (rt);
> @@ -818,7 +819,7 @@ is_a_helper <rtx_nonjump_insn *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_jump_insn *>::test (rtx rt)
>  {
>    return JUMP_P (rt);
> @@ -826,7 +827,7 @@ is_a_helper <rtx_jump_insn *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_call_insn *>::test (rtx rt)
>  {
>    return CALL_P (rt);
> @@ -834,7 +835,7 @@ is_a_helper <rtx_call_insn *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_call_insn *>::test (rtx_insn *insn)
>  {
>    return CALL_P (insn);
> @@ -842,7 +843,7 @@ is_a_helper <rtx_call_insn *>::test (rtx_insn *insn)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_jump_table_data *>::test (rtx rt)
>  {
>    return JUMP_TABLE_DATA_P (rt);
> @@ -850,7 +851,7 @@ is_a_helper <rtx_jump_table_data *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_jump_table_data *>::test (rtx_insn *insn)
>  {
>    return JUMP_TABLE_DATA_P (insn);
> @@ -858,7 +859,7 @@ is_a_helper <rtx_jump_table_data *>::test (rtx_insn *insn)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_barrier *>::test (rtx rt)
>  {
>    return BARRIER_P (rt);
> @@ -866,7 +867,7 @@ is_a_helper <rtx_barrier *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_code_label *>::test (rtx rt)
>  {
>    return LABEL_P (rt);
> @@ -874,7 +875,7 @@ is_a_helper <rtx_code_label *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_code_label *>::test (rtx_insn *insn)
>  {
>    return LABEL_P (insn);
> @@ -882,7 +883,7 @@ is_a_helper <rtx_code_label *>::test (rtx_insn *insn)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_note *>::test (rtx rt)
>  {
>    return NOTE_P (rt);
> @@ -890,7 +891,7 @@ is_a_helper <rtx_note *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_note *>::test (rtx_insn *insn)
>  {
>    return NOTE_P (insn);
> @@ -1257,26 +1258,26 @@ extern void rtl_check_failed_flag (const char *, const_rtx, const char *,
>
>  /* Methods of rtx_expr_list.  */
>
> -inline rtx_expr_list *rtx_expr_list::next () const
> +ALWAYS_INLINE rtx_expr_list *rtx_expr_list::next () const
>  {
>    rtx tmp = XEXP (this, 1);
>    return safe_as_a <rtx_expr_list *> (tmp);
>  }
>
> -inline rtx rtx_expr_list::element () const
> +ALWAYS_INLINE rtx rtx_expr_list::element () const
>  {
>    return XEXP (this, 0);
>  }
>
>  /* Methods of rtx_insn_list.  */
>
> -inline rtx_insn_list *rtx_insn_list::next () const
> +ALWAYS_INLINE rtx_insn_list *rtx_insn_list::next () const
>  {
>    rtx tmp = XEXP (this, 1);
>    return safe_as_a <rtx_insn_list *> (tmp);
>  }
>
> -inline rtx_insn *rtx_insn_list::insn () const
> +ALWAYS_INLINE rtx_insn *rtx_insn_list::insn () const
>  {
>    rtx tmp = XEXP (this, 0);
>    return safe_as_a <rtx_insn *> (tmp);
> @@ -1284,17 +1285,17 @@ inline rtx_insn *rtx_insn_list::insn () const
>
>  /* Methods of rtx_sequence.  */
>
> -inline int rtx_sequence::len () const
> +ALWAYS_INLINE int rtx_sequence::len () const
>  {
>    return XVECLEN (this, 0);
>  }
>
> -inline rtx rtx_sequence::element (int index) const
> +ALWAYS_INLINE rtx rtx_sequence::element (int index) const
>  {
>    return XVECEXP (this, 0, index);
>  }
>
> -inline rtx_insn *rtx_sequence::insn (int index) const
> +ALWAYS_INLINE rtx_insn *rtx_sequence::insn (int index) const
>  {
>    return as_a <rtx_insn *> (XVECEXP (this, 0, index));
>  }
> @@ -1303,12 +1304,12 @@ inline rtx_insn *rtx_sequence::insn (int index) const
>
>  /* Holds a unique number for each insn.
>     These are not necessarily sequentially increasing.  */
> -inline int INSN_UID (const_rtx insn)
> +ALWAYS_INLINE int INSN_UID (const_rtx insn)
>  {
>    return RTL_INSN_CHAIN_FLAG_CHECK ("INSN_UID",
>                                     (insn))->u2.insn_uid;
>  }
> -inline int& INSN_UID (rtx insn)
> +ALWAYS_INLINE int& INSN_UID (rtx insn)
>  {
>    return RTL_INSN_CHAIN_FLAG_CHECK ("INSN_UID",
>                                     (insn))->u2.insn_uid;
> @@ -1321,60 +1322,60 @@ inline int& INSN_UID (rtx insn)
>     and an lvalue form:
>       SET_NEXT_INSN/SET_PREV_INSN.  */
>
> -inline rtx_insn *PREV_INSN (const rtx_insn *insn)
> +ALWAYS_INLINE rtx_insn *PREV_INSN (const rtx_insn *insn)
>  {
>    rtx prev = XEXP (insn, 0);
>    return safe_as_a <rtx_insn *> (prev);
>  }
>
> -inline rtx& SET_PREV_INSN (rtx_insn *insn)
> +ALWAYS_INLINE rtx& SET_PREV_INSN (rtx_insn *insn)
>  {
>    return XEXP (insn, 0);
>  }
>
> -inline rtx_insn *NEXT_INSN (const rtx_insn *insn)
> +ALWAYS_INLINE rtx_insn *NEXT_INSN (const rtx_insn *insn)
>  {
>    rtx next = XEXP (insn, 1);
>    return safe_as_a <rtx_insn *> (next);
>  }
>
> -inline rtx& SET_NEXT_INSN (rtx_insn *insn)
> +ALWAYS_INLINE rtx& SET_NEXT_INSN (rtx_insn *insn)
>  {
>    return XEXP (insn, 1);
>  }
>
> -inline basic_block BLOCK_FOR_INSN (const_rtx insn)
> +ALWAYS_INLINE basic_block BLOCK_FOR_INSN (const_rtx insn)
>  {
>    return XBBDEF (insn, 2);
>  }
>
> -inline basic_block& BLOCK_FOR_INSN (rtx insn)
> +ALWAYS_INLINE basic_block& BLOCK_FOR_INSN (rtx insn)
>  {
>    return XBBDEF (insn, 2);
>  }
>
>  /* The body of an insn.  */
> -inline rtx PATTERN (const_rtx insn)
> +ALWAYS_INLINE rtx PATTERN (const_rtx insn)
>  {
>    return XEXP (insn, 3);
>  }
>
> -inline rtx& PATTERN (rtx insn)
> +ALWAYS_INLINE rtx& PATTERN (rtx insn)
>  {
>    return XEXP (insn, 3);
>  }
>
> -inline unsigned int INSN_LOCATION (const_rtx insn)
> +ALWAYS_INLINE unsigned int INSN_LOCATION (const_rtx insn)
>  {
>    return XUINT (insn, 4);
>  }
>
> -inline unsigned int& INSN_LOCATION (rtx insn)
> +ALWAYS_INLINE unsigned int& INSN_LOCATION (rtx insn)
>  {
>    return XUINT (insn, 4);
>  }
>
> -inline bool INSN_HAS_LOCATION (const rtx_insn *insn)
> +ALWAYS_INLINE bool INSN_HAS_LOCATION (const rtx_insn *insn)
>  {
>    return LOCATION_LOCUS (INSN_LOCATION (insn)) != UNKNOWN_LOCATION;
>  }
> @@ -1387,7 +1388,7 @@ inline bool INSN_HAS_LOCATION (const rtx_insn *insn)
>     -1 means this instruction has not been recognized yet.  */
>  #define INSN_CODE(INSN) XINT (INSN, 5)
>
> -inline rtvec rtx_jump_table_data::get_labels () const
> +ALWAYS_INLINE rtvec rtx_jump_table_data::get_labels () const
>  {
>    rtx pat = PATTERN (this);
>    if (GET_CODE (pat) == ADDR_VEC)
> @@ -1658,7 +1659,7 @@ enum label_kind
>     be decremented and possibly the label can be deleted.  */
>  #define JUMP_LABEL(INSN)   XCEXP (INSN, 7, JUMP_INSN)
>
> -inline rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn)
> +ALWAYS_INLINE rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn)
>  {
>    return safe_as_a <rtx_insn *> (JUMP_LABEL (insn));
>  }
> @@ -1682,7 +1683,7 @@ inline rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn)
>    (RTL_FLAG_CHECK1 ("ORIGINAL_REGNO", (RTX), REG)->u2.original_regno)
>
>  /* Force the REGNO macro to only be used on the lhs.  */
> -static inline unsigned int
> +static ALWAYS_INLINE unsigned int
>  rhs_regno (const_rtx x)
>  {
>    return XCUINT (x, 0, REG);
> @@ -1774,7 +1775,7 @@ struct full_rtx_costs
>  };
>
>  /* Initialize a full_rtx_costs structure C to the maximum cost.  */
> -static inline void
> +static ALWAYS_INLINE void
>  init_costs_to_max (struct full_rtx_costs *c)
>  {
>    c->speed = MAX_COST;
> @@ -1782,7 +1783,7 @@ init_costs_to_max (struct full_rtx_costs *c)
>  }
>
>  /* Initialize a full_rtx_costs structure C to zero cost.  */
> -static inline void
> +static ALWAYS_INLINE void
>  init_costs_to_zero (struct full_rtx_costs *c)
>  {
>    c->speed = 0;
> @@ -1791,7 +1792,7 @@ init_costs_to_zero (struct full_rtx_costs *c)
>
>  /* Compare two full_rtx_costs structures A and B, returning true
>     if A < B when optimizing for speed.  */
> -static inline bool
> +static ALWAYS_INLINE bool
>  costs_lt_p (struct full_rtx_costs *a, struct full_rtx_costs *b,
>             bool speed)
>  {
> @@ -1805,7 +1806,7 @@ costs_lt_p (struct full_rtx_costs *a, struct full_rtx_costs *b,
>
>  /* Increase both members of the full_rtx_costs structure C by the
>     cost of N insns.  */
> -static inline void
> +static ALWAYS_INLINE void
>  costs_add_n_insns (struct full_rtx_costs *c, int n)
>  {
>    c->speed += COSTS_N_INSNS (n);
> @@ -1904,13 +1905,13 @@ namespace wi
>    };
>  }
>
> -inline unsigned int
> +ALWAYS_INLINE unsigned int
>  wi::int_traits <rtx_mode_t>::get_precision (const rtx_mode_t &x)
>  {
>    return GET_MODE_PRECISION (x.second);
>  }
>
> -inline wi::storage_ref
> +ALWAYS_INLINE wi::storage_ref
>  wi::int_traits <rtx_mode_t>::decompose (HOST_WIDE_INT *,
>                                         unsigned int precision,
>                                         const rtx_mode_t &x)
> @@ -1949,7 +1950,7 @@ namespace wi
>    wide_int max_value (enum machine_mode, signop);
>  }
>
> -inline wi::hwi_with_prec
> +ALWAYS_INLINE wi::hwi_with_prec
>  wi::shwi (HOST_WIDE_INT val, enum machine_mode mode)
>  {
>    return shwi (val, GET_MODE_PRECISION (mode));
> @@ -1957,7 +1958,7 @@ wi::shwi (HOST_WIDE_INT val, enum machine_mode mode)
>
>  /* Produce the smallest number that is represented in MODE.  The precision
>     is taken from MODE and the sign from SGN.  */
> -inline wide_int
> +ALWAYS_INLINE wide_int
>  wi::min_value (enum machine_mode mode, signop sgn)
>  {
>    return min_value (GET_MODE_PRECISION (mode), sgn);
> @@ -1965,7 +1966,7 @@ wi::min_value (enum machine_mode mode, signop sgn)
>
>  /* Produce the largest number that is represented in MODE.  The precision
>     is taken from MODE and the sign from SGN.  */
> -inline wide_int
> +ALWAYS_INLINE wide_int
>  wi::max_value (enum machine_mode mode, signop sgn)
>  {
>    return max_value (GET_MODE_PRECISION (mode), sgn);
> @@ -2007,7 +2008,7 @@ extern enum rtx_code get_index_code (const struct address_info *);
>  /* Return the cost of SET X.  SPEED_P is true if optimizing for speed
>     rather than size.  */
>
> -static inline int
> +static ALWAYS_INLINE int
>  set_rtx_cost (rtx x, bool speed_p)
>  {
>    return rtx_cost (x, INSN, 4, speed_p);
> @@ -2015,7 +2016,7 @@ set_rtx_cost (rtx x, bool speed_p)
>
>  /* Like set_rtx_cost, but return both the speed and size costs in C.  */
>
> -static inline void
> +static ALWAYS_INLINE void
>  get_full_set_rtx_cost (rtx x, struct full_rtx_costs *c)
>  {
>    get_full_rtx_cost (x, INSN, 4, c);
> @@ -2025,7 +2026,7 @@ get_full_set_rtx_cost (rtx x, struct full_rtx_costs *c)
>     of a register move.  SPEED_P is true if optimizing for speed rather
>     than size.  */
>
> -static inline int
> +static ALWAYS_INLINE int
>  set_src_cost (rtx x, bool speed_p)
>  {
>    return rtx_cost (x, SET, 1, speed_p);
> @@ -2033,7 +2034,7 @@ set_src_cost (rtx x, bool speed_p)
>
>  /* Like set_src_cost, but return both the speed and size costs in C.  */
>
> -static inline void
> +static ALWAYS_INLINE void
>  get_full_set_src_cost (rtx x, struct full_rtx_costs *c)
>  {
>    get_full_rtx_cost (x, SET, 1, c);
> @@ -3055,7 +3056,7 @@ extern struct target_rtl *this_target_rtl;
>
>  #ifndef GENERATOR_FILE
>  /* Return the attributes of a MEM rtx.  */
> -static inline struct mem_attrs *
> +static ALWAYS_INLINE struct mem_attrs *
>  get_mem_attrs (const_rtx x)
>  {
>    struct mem_attrs *attrs;
> diff --git a/include/ansidecl.h b/include/ansidecl.h
> index 0fb23bb..9132ee0 100644
> --- a/include/ansidecl.h
> +++ b/include/ansidecl.h
> @@ -306,6 +306,12 @@ So instead we use the macro below and test it against specific values.  */
>  #define ENUM_BITFIELD(TYPE) unsigned int
>  #endif
>
> +#ifdef __GNUC__
> +#define ALWAYS_INLINE __attribute__ ((always_inline)) inline
> +#else
> +#define ALWAYS_INLINE inline
> +#endif
> +
>  #ifdef __cplusplus
>  }
>  #endif
> --
> 2.1.0
>
Andi Kleen Sept. 2, 2014, 7:20 a.m. UTC | #2
> there have been bugs in the past in the area of always_inline too.

You're arguing for my patch. It would find those bugs.

-Andi
Andrew Pinski Sept. 2, 2014, 7:22 a.m. UTC | #3
On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen <andi@firstfloor.org> wrote:
>
>> there have been bugs in the past in the area of always_inline too.
>
> You're arguing for my patch. It would find those bugs.


No I am arguing against it since the older versions of GCC we cannot change.

Thanks,
Andrew

>
> -Andi
Steven Bosscher Sept. 2, 2014, 8:36 a.m. UTC | #4
On Tue, Sep 2, 2014 at 9:22 AM, Andrew Pinski wrote:
> On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen wrote:
>>
>>> there have been bugs in the past in the area of always_inline too.
>>
>> You're arguing for my patch. It would find those bugs.
>
>
> No I am arguing against it since the older versions of GCC we cannot change.

Should such bugs turn up, we can account for them in ansidecl.h.

I think Andi's patch should go in.

Ciao!
Steven
Andrew Pinski Sept. 2, 2014, 8:42 a.m. UTC | #5
> On Sep 2, 2014, at 1:36 AM, Steven Bosscher <stevenb.gcc@gmail.com> wrote:
> 
>> On Tue, Sep 2, 2014 at 9:22 AM, Andrew Pinski wrote:
>>> On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen wrote:
>>> 
>>>> there have been bugs in the past in the area of always_inline too.
>>> 
>>> You're arguing for my patch. It would find those bugs.
>> 
>> 
>> No I am arguing against it since the older versions of GCC we cannot change.
> 
> Should such bugs turn up, we can account for them in ansidecl.h.
> 
> I think Andi's patch should go in.

I does hurt debug ability with older compilers too. So if we need to figure out why stage is being miscompiled it is harder to figure how to work around it.  

I think stage should really be -O0 even with respect of inline.  I think we should never force inline inside gcc even at -O0 as it is just a hack (we know it as we added the attribute in the first place). 

Thanks,
Andrew

> 
> Ciao!
> Steven
Richard Biener Sept. 2, 2014, 8:43 a.m. UTC | #6
On Tue, Sep 2, 2014 at 10:36 AM, Steven Bosscher <stevenb.gcc@gmail.com> wrote:
> On Tue, Sep 2, 2014 at 9:22 AM, Andrew Pinski wrote:
>> On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen wrote:
>>>
>>>> there have been bugs in the past in the area of always_inline too.
>>>
>>> You're arguing for my patch. It would find those bugs.
>>
>>
>> No I am arguing against it since the older versions of GCC we cannot change.
>
> Should such bugs turn up, we can account for them in ansidecl.h.
>
> I think Andi's patch should go in.

I disagree.  always-inline isn't an optimization attribute but a correctness
one.

Instead we should not build stage1 with -O0 if we detect a reasonably
recent GCC host compiler (say one that is still maintained).  Or
we simply should make -finline work at -O0 (I suppose it might already
work?) and use it.

Richard.

> Ciao!
> Steven
David Malcolm Sept. 2, 2014, 2:55 p.m. UTC | #7
On Tue, 2014-09-02 at 00:03 -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> I noticed that with the trunk compiler a range of the new rtl
> inlines show up as hot in a profiler during stage1. I think
> that happens because stage1 is not using optimization
> and does not inline plain "inline".  And these rtl inlines
> are very frequently called.

Sorry about that.

FWIW I'm working on some followup patches for the rtx-classes work that
ought to eliminate some of the is_a_helper<> calls; I hope to post them
in the next few days. [1]

I suspect the bulk of them currently are coming from the safe_as_a
<rtx_insn *> calls within NEXT_INSN and PREV_INSN; do you happen to have
information handy on that?

Dave

[1] (I have to take the rest of today off for a family matter).
Andi Kleen Sept. 2, 2014, 4:52 p.m. UTC | #8
> Or we simply should make -finline work at -O0 (I suppose it might already
> work?) and use it.

Yes that's probably better. There are more hot inlines in the stage 1 profile
(like wi::storage_ref or vec::length)
I suspect with the ongoing C++'ification that will get worse.

-Andi
Andi Kleen Sept. 2, 2014, 5:50 p.m. UTC | #9
> I suspect the bulk of them currently are coming from the safe_as_a
> <rtx_insn *> calls within NEXT_INSN and PREV_INSN; do you happen to have
> information handy on that?

Yes that's right:

-   1.03%  lto1                    [.] bool is_a_helper<rtx_insn*>::test<rtx_def>(rtx_def*)                                                     ▒
   - bool is_a_helper<rtx_insn*>::test<rtx_def>(rtx_def*)                                                                                       ▒
      - 92.20% bool is_a<rtx_insn*, rtx_def>(rtx_def*)                                                                                          ▒
         - 98.53% rtx_insn* safe_as_a<rtx_insn*, rtx_def>(rtx_def*)                                                                             ▒
            - 73.28% NEXT_INSN(rtx_insn const*)                                                                                                 ▒
Richard Biener Sept. 3, 2014, 9:49 a.m. UTC | #10
On Tue, Sep 2, 2014 at 6:52 PM, Andi Kleen <ak@linux.intel.com> wrote:
>> Or we simply should make -finline work at -O0 (I suppose it might already
>> work?) and use it.
>
> Yes that's probably better. There are more hot inlines in the stage 1 profile
> (like wi::storage_ref or vec::length)
> I suspect with the ongoing C++'ification that will get worse.

Like with this (untested apart from on a small testcase).  Of course it
usually won't help bootstrap because your host compiler doesn't support
-O0 -finline yet.  Also it might not help that much because while it certainly
removes call overhead it will still not optimize anything else.  Also
inline analysis correctly assumes that no stmts go away so you
only have the call overhead as room to allow inlining (so with checking
enabled the is_a/as_a stuff is probably too large - didn't check).

It may also negatively affect debugging (no var-tracking at -O0).

Anyway, removing !optimize checks in favor of flag_no_inline checks
and initializing that properly is a cleanup as well.

Now to see how to best test this ...

Richard.

> -Andi
>
> --
> ak@linux.intel.com -- Speaking for myself only
Richard Biener Sept. 3, 2014, 9:52 a.m. UTC | #11
On Tue, Sep 2, 2014 at 6:52 PM, Andi Kleen <ak@linux.intel.com> wrote:
>> Or we simply should make -finline work at -O0 (I suppose it might already
>> work?) and use it.
>
> Yes that's probably better. There are more hot inlines in the stage 1 profile
> (like wi::storage_ref or vec::length)
> I suspect with the ongoing C++'ification that will get worse.

Btw, it's C++ which I considered that -Og might replace -O0 exactly
because of all the abstraction penalty which usually doesn't help
debugging at -O0.  Also the idea was that -Og might even compile
faster than -O0, but that's far from true unfortunately ...

Richard.

> -Andi
>
> --
> ak@linux.intel.com -- Speaking for myself only
Andi Kleen Sept. 4, 2014, 3:58 a.m. UTC | #12
> 
> Anyway, removing !optimize checks in favor of flag_no_inline checks
> and initializing that properly is a cleanup as well.

Patch looks good to me.
-Andi
Richard Biener Sept. 4, 2014, 1:02 p.m. UTC | #13
On Thu, Sep 4, 2014 at 5:58 AM, Andi Kleen <andi@firstfloor.org> wrote:
>>
>> Anyway, removing !optimize checks in favor of flag_no_inline checks
>> and initializing that properly is a cleanup as well.
>
> Patch looks good to me.

Unfortunately it doesn't pass bootstrap (inline-summary re-use
between IPA passes is a maze...).  So I need to tweak it more
(pushed back for now).

Richard.

> -Andi
Jan Hubicka Sept. 4, 2014, 6:52 p.m. UTC | #14
> On Thu, Sep 4, 2014 at 5:58 AM, Andi Kleen <andi@firstfloor.org> wrote:
> >>
> >> Anyway, removing !optimize checks in favor of flag_no_inline checks
> >> and initializing that properly is a cleanup as well.
> >
> > Patch looks good to me.
> 
> Unfortunately it doesn't pass bootstrap (inline-summary re-use
> between IPA passes is a maze...).  So I need to tweak it more
> (pushed back for now).

From a quick glance it looks resonable thing to do.
I can fix the summary issues after returning from trip this Sunday.

Honza
> 
> Richard.
> 
> > -Andi
diff mbox

Patch

diff --git a/gcc/rtl.h b/gcc/rtl.h
index beeed2f..d711e43 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -21,6 +21,7 @@  along with GCC; see the file COPYING3.  If not see
 #define GCC_RTL_H
 
 #include <utility>
+#include "ansidecl.h"
 #include "statistics.h"
 #include "machmode.h"
 #include "input.h"
@@ -418,7 +419,7 @@  public:
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_expr_list *>::test (rtx rt)
 {
   return rt->code == EXPR_LIST;
@@ -447,7 +448,7 @@  public:
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_insn_list *>::test (rtx rt)
 {
   return rt->code == INSN_LIST;
@@ -474,7 +475,7 @@  public:
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_sequence *>::test (rtx rt)
 {
   return rt->code == SEQUENCE;
@@ -482,7 +483,7 @@  is_a_helper <rtx_sequence *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <const rtx_sequence *>::test (const_rtx rt)
 {
   return rt->code == SEQUENCE;
@@ -778,7 +779,7 @@  struct GTY(()) rtvec_def {
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_insn *>::test (rtx rt)
 {
   return (INSN_P (rt)
@@ -790,7 +791,7 @@  is_a_helper <rtx_insn *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <const rtx_insn *>::test (const_rtx rt)
 {
   return (INSN_P (rt)
@@ -802,7 +803,7 @@  is_a_helper <const rtx_insn *>::test (const_rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_debug_insn *>::test (rtx rt)
 {
   return DEBUG_INSN_P (rt);
@@ -810,7 +811,7 @@  is_a_helper <rtx_debug_insn *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_nonjump_insn *>::test (rtx rt)
 {
   return NONJUMP_INSN_P (rt);
@@ -818,7 +819,7 @@  is_a_helper <rtx_nonjump_insn *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_jump_insn *>::test (rtx rt)
 {
   return JUMP_P (rt);
@@ -826,7 +827,7 @@  is_a_helper <rtx_jump_insn *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_call_insn *>::test (rtx rt)
 {
   return CALL_P (rt);
@@ -834,7 +835,7 @@  is_a_helper <rtx_call_insn *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_call_insn *>::test (rtx_insn *insn)
 {
   return CALL_P (insn);
@@ -842,7 +843,7 @@  is_a_helper <rtx_call_insn *>::test (rtx_insn *insn)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_jump_table_data *>::test (rtx rt)
 {
   return JUMP_TABLE_DATA_P (rt);
@@ -850,7 +851,7 @@  is_a_helper <rtx_jump_table_data *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_jump_table_data *>::test (rtx_insn *insn)
 {
   return JUMP_TABLE_DATA_P (insn);
@@ -858,7 +859,7 @@  is_a_helper <rtx_jump_table_data *>::test (rtx_insn *insn)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_barrier *>::test (rtx rt)
 {
   return BARRIER_P (rt);
@@ -866,7 +867,7 @@  is_a_helper <rtx_barrier *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_code_label *>::test (rtx rt)
 {
   return LABEL_P (rt);
@@ -874,7 +875,7 @@  is_a_helper <rtx_code_label *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_code_label *>::test (rtx_insn *insn)
 {
   return LABEL_P (insn);
@@ -882,7 +883,7 @@  is_a_helper <rtx_code_label *>::test (rtx_insn *insn)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_note *>::test (rtx rt)
 {
   return NOTE_P (rt);
@@ -890,7 +891,7 @@  is_a_helper <rtx_note *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_note *>::test (rtx_insn *insn)
 {
   return NOTE_P (insn);
@@ -1257,26 +1258,26 @@  extern void rtl_check_failed_flag (const char *, const_rtx, const char *,
 
 /* Methods of rtx_expr_list.  */
 
-inline rtx_expr_list *rtx_expr_list::next () const
+ALWAYS_INLINE rtx_expr_list *rtx_expr_list::next () const
 {
   rtx tmp = XEXP (this, 1);
   return safe_as_a <rtx_expr_list *> (tmp);
 }
 
-inline rtx rtx_expr_list::element () const
+ALWAYS_INLINE rtx rtx_expr_list::element () const
 {
   return XEXP (this, 0);
 }
 
 /* Methods of rtx_insn_list.  */
 
-inline rtx_insn_list *rtx_insn_list::next () const
+ALWAYS_INLINE rtx_insn_list *rtx_insn_list::next () const
 {
   rtx tmp = XEXP (this, 1);
   return safe_as_a <rtx_insn_list *> (tmp);
 }
 
-inline rtx_insn *rtx_insn_list::insn () const
+ALWAYS_INLINE rtx_insn *rtx_insn_list::insn () const
 {
   rtx tmp = XEXP (this, 0);
   return safe_as_a <rtx_insn *> (tmp);
@@ -1284,17 +1285,17 @@  inline rtx_insn *rtx_insn_list::insn () const
 
 /* Methods of rtx_sequence.  */
 
-inline int rtx_sequence::len () const
+ALWAYS_INLINE int rtx_sequence::len () const
 {
   return XVECLEN (this, 0);
 }
 
-inline rtx rtx_sequence::element (int index) const
+ALWAYS_INLINE rtx rtx_sequence::element (int index) const
 {
   return XVECEXP (this, 0, index);
 }
 
-inline rtx_insn *rtx_sequence::insn (int index) const
+ALWAYS_INLINE rtx_insn *rtx_sequence::insn (int index) const
 {
   return as_a <rtx_insn *> (XVECEXP (this, 0, index));
 }
@@ -1303,12 +1304,12 @@  inline rtx_insn *rtx_sequence::insn (int index) const
 
 /* Holds a unique number for each insn.
    These are not necessarily sequentially increasing.  */
-inline int INSN_UID (const_rtx insn)
+ALWAYS_INLINE int INSN_UID (const_rtx insn)
 {
   return RTL_INSN_CHAIN_FLAG_CHECK ("INSN_UID",
 				    (insn))->u2.insn_uid;
 }
-inline int& INSN_UID (rtx insn)
+ALWAYS_INLINE int& INSN_UID (rtx insn)
 {
   return RTL_INSN_CHAIN_FLAG_CHECK ("INSN_UID",
 				    (insn))->u2.insn_uid;
@@ -1321,60 +1322,60 @@  inline int& INSN_UID (rtx insn)
    and an lvalue form:
      SET_NEXT_INSN/SET_PREV_INSN.  */
 
-inline rtx_insn *PREV_INSN (const rtx_insn *insn)
+ALWAYS_INLINE rtx_insn *PREV_INSN (const rtx_insn *insn)
 {
   rtx prev = XEXP (insn, 0);
   return safe_as_a <rtx_insn *> (prev);
 }
 
-inline rtx& SET_PREV_INSN (rtx_insn *insn)
+ALWAYS_INLINE rtx& SET_PREV_INSN (rtx_insn *insn)
 {
   return XEXP (insn, 0);
 }
 
-inline rtx_insn *NEXT_INSN (const rtx_insn *insn)
+ALWAYS_INLINE rtx_insn *NEXT_INSN (const rtx_insn *insn)
 {
   rtx next = XEXP (insn, 1);
   return safe_as_a <rtx_insn *> (next);
 }
 
-inline rtx& SET_NEXT_INSN (rtx_insn *insn)
+ALWAYS_INLINE rtx& SET_NEXT_INSN (rtx_insn *insn)
 {
   return XEXP (insn, 1);
 }
 
-inline basic_block BLOCK_FOR_INSN (const_rtx insn)
+ALWAYS_INLINE basic_block BLOCK_FOR_INSN (const_rtx insn)
 {
   return XBBDEF (insn, 2);
 }
 
-inline basic_block& BLOCK_FOR_INSN (rtx insn)
+ALWAYS_INLINE basic_block& BLOCK_FOR_INSN (rtx insn)
 {
   return XBBDEF (insn, 2);
 }
 
 /* The body of an insn.  */
-inline rtx PATTERN (const_rtx insn)
+ALWAYS_INLINE rtx PATTERN (const_rtx insn)
 {
   return XEXP (insn, 3);
 }
 
-inline rtx& PATTERN (rtx insn)
+ALWAYS_INLINE rtx& PATTERN (rtx insn)
 {
   return XEXP (insn, 3);
 }
 
-inline unsigned int INSN_LOCATION (const_rtx insn)
+ALWAYS_INLINE unsigned int INSN_LOCATION (const_rtx insn)
 {
   return XUINT (insn, 4);
 }
 
-inline unsigned int& INSN_LOCATION (rtx insn)
+ALWAYS_INLINE unsigned int& INSN_LOCATION (rtx insn)
 {
   return XUINT (insn, 4);
 }
 
-inline bool INSN_HAS_LOCATION (const rtx_insn *insn)
+ALWAYS_INLINE bool INSN_HAS_LOCATION (const rtx_insn *insn)
 {
   return LOCATION_LOCUS (INSN_LOCATION (insn)) != UNKNOWN_LOCATION;
 }
@@ -1387,7 +1388,7 @@  inline bool INSN_HAS_LOCATION (const rtx_insn *insn)
    -1 means this instruction has not been recognized yet.  */
 #define INSN_CODE(INSN) XINT (INSN, 5)
 
-inline rtvec rtx_jump_table_data::get_labels () const
+ALWAYS_INLINE rtvec rtx_jump_table_data::get_labels () const
 {
   rtx pat = PATTERN (this);
   if (GET_CODE (pat) == ADDR_VEC)
@@ -1658,7 +1659,7 @@  enum label_kind
    be decremented and possibly the label can be deleted.  */
 #define JUMP_LABEL(INSN)   XCEXP (INSN, 7, JUMP_INSN)
 
-inline rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn)
+ALWAYS_INLINE rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn)
 {
   return safe_as_a <rtx_insn *> (JUMP_LABEL (insn));
 }
@@ -1682,7 +1683,7 @@  inline rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn)
   (RTL_FLAG_CHECK1 ("ORIGINAL_REGNO", (RTX), REG)->u2.original_regno)
 
 /* Force the REGNO macro to only be used on the lhs.  */
-static inline unsigned int
+static ALWAYS_INLINE unsigned int
 rhs_regno (const_rtx x)
 {
   return XCUINT (x, 0, REG);
@@ -1774,7 +1775,7 @@  struct full_rtx_costs
 };
 
 /* Initialize a full_rtx_costs structure C to the maximum cost.  */
-static inline void
+static ALWAYS_INLINE void
 init_costs_to_max (struct full_rtx_costs *c)
 {
   c->speed = MAX_COST;
@@ -1782,7 +1783,7 @@  init_costs_to_max (struct full_rtx_costs *c)
 }
 
 /* Initialize a full_rtx_costs structure C to zero cost.  */
-static inline void
+static ALWAYS_INLINE void
 init_costs_to_zero (struct full_rtx_costs *c)
 {
   c->speed = 0;
@@ -1791,7 +1792,7 @@  init_costs_to_zero (struct full_rtx_costs *c)
 
 /* Compare two full_rtx_costs structures A and B, returning true
    if A < B when optimizing for speed.  */
-static inline bool
+static ALWAYS_INLINE bool
 costs_lt_p (struct full_rtx_costs *a, struct full_rtx_costs *b,
 	    bool speed)
 {
@@ -1805,7 +1806,7 @@  costs_lt_p (struct full_rtx_costs *a, struct full_rtx_costs *b,
 
 /* Increase both members of the full_rtx_costs structure C by the
    cost of N insns.  */
-static inline void
+static ALWAYS_INLINE void
 costs_add_n_insns (struct full_rtx_costs *c, int n)
 {
   c->speed += COSTS_N_INSNS (n);
@@ -1904,13 +1905,13 @@  namespace wi
   };
 }
 
-inline unsigned int
+ALWAYS_INLINE unsigned int
 wi::int_traits <rtx_mode_t>::get_precision (const rtx_mode_t &x)
 {
   return GET_MODE_PRECISION (x.second);
 }
 
-inline wi::storage_ref
+ALWAYS_INLINE wi::storage_ref
 wi::int_traits <rtx_mode_t>::decompose (HOST_WIDE_INT *,
 					unsigned int precision,
 					const rtx_mode_t &x)
@@ -1949,7 +1950,7 @@  namespace wi
   wide_int max_value (enum machine_mode, signop);
 }
 
-inline wi::hwi_with_prec
+ALWAYS_INLINE wi::hwi_with_prec
 wi::shwi (HOST_WIDE_INT val, enum machine_mode mode)
 {
   return shwi (val, GET_MODE_PRECISION (mode));
@@ -1957,7 +1958,7 @@  wi::shwi (HOST_WIDE_INT val, enum machine_mode mode)
 
 /* Produce the smallest number that is represented in MODE.  The precision
    is taken from MODE and the sign from SGN.  */
-inline wide_int
+ALWAYS_INLINE wide_int
 wi::min_value (enum machine_mode mode, signop sgn)
 {
   return min_value (GET_MODE_PRECISION (mode), sgn);
@@ -1965,7 +1966,7 @@  wi::min_value (enum machine_mode mode, signop sgn)
 
 /* Produce the largest number that is represented in MODE.  The precision
    is taken from MODE and the sign from SGN.  */
-inline wide_int
+ALWAYS_INLINE wide_int
 wi::max_value (enum machine_mode mode, signop sgn)
 {
   return max_value (GET_MODE_PRECISION (mode), sgn);
@@ -2007,7 +2008,7 @@  extern enum rtx_code get_index_code (const struct address_info *);
 /* Return the cost of SET X.  SPEED_P is true if optimizing for speed
    rather than size.  */
 
-static inline int
+static ALWAYS_INLINE int
 set_rtx_cost (rtx x, bool speed_p)
 {
   return rtx_cost (x, INSN, 4, speed_p);
@@ -2015,7 +2016,7 @@  set_rtx_cost (rtx x, bool speed_p)
 
 /* Like set_rtx_cost, but return both the speed and size costs in C.  */
 
-static inline void
+static ALWAYS_INLINE void
 get_full_set_rtx_cost (rtx x, struct full_rtx_costs *c)
 {
   get_full_rtx_cost (x, INSN, 4, c);
@@ -2025,7 +2026,7 @@  get_full_set_rtx_cost (rtx x, struct full_rtx_costs *c)
    of a register move.  SPEED_P is true if optimizing for speed rather
    than size.  */
 
-static inline int
+static ALWAYS_INLINE int
 set_src_cost (rtx x, bool speed_p)
 {
   return rtx_cost (x, SET, 1, speed_p);
@@ -2033,7 +2034,7 @@  set_src_cost (rtx x, bool speed_p)
 
 /* Like set_src_cost, but return both the speed and size costs in C.  */
 
-static inline void
+static ALWAYS_INLINE void
 get_full_set_src_cost (rtx x, struct full_rtx_costs *c)
 {
   get_full_rtx_cost (x, SET, 1, c);
@@ -3055,7 +3056,7 @@  extern struct target_rtl *this_target_rtl;
 
 #ifndef GENERATOR_FILE
 /* Return the attributes of a MEM rtx.  */
-static inline struct mem_attrs *
+static ALWAYS_INLINE struct mem_attrs *
 get_mem_attrs (const_rtx x)
 {
   struct mem_attrs *attrs;
diff --git a/include/ansidecl.h b/include/ansidecl.h
index 0fb23bb..9132ee0 100644
--- a/include/ansidecl.h
+++ b/include/ansidecl.h
@@ -306,6 +306,12 @@  So instead we use the macro below and test it against specific values.  */
 #define ENUM_BITFIELD(TYPE) unsigned int
 #endif
 
+#ifdef __GNUC__
+#define ALWAYS_INLINE __attribute__ ((always_inline)) inline
+#else
+#define ALWAYS_INLINE inline
+#endif
+
 #ifdef __cplusplus
 }
 #endif