Message ID | 8738me6x8y.fsf@talisman.default |
---|---|
State | New |
Headers | show |
On Sat, Nov 30, 2013 at 10:43 AM, Richard Sandiford <rdsandiford@googlemail.com> wrote: > So maybe two INTEGER_CST lengths weren't enough. Because bitsizetype > can be offset_int-sized, wi::to_offset had a TYPE_PRECISION condition > to pick the array length: > > template <int N> > inline unsigned int > wi::extended_tree <N>::get_len () const > { > if (N == MAX_BITSIZE_MODE_ANY_INT > || N > TYPE_PRECISION (TREE_TYPE (m_t))) > return TREE_INT_CST_EXT_NUNITS (m_t); > else > return TREE_INT_CST_NUNITS (m_t); > } > > and this TYPE_PRECISION condition was relatively hot in > get_ref_base_and_extent when compiling insn-recog.ii. > > Adding a third length for offset_int does seem to reduce the cost of > the offset_int + to_offset addition. > > Tested on x86_64-linux-gnu. OK to install? Hmm, that's now getting a bit excessive ... inline unsigned int wi::extended_tree <N>::get_len () const { - if (N == MAX_BITSIZE_MODE_ANY_INT - || N > TYPE_PRECISION (TREE_TYPE (m_t))) + if (N == ADDR_MAX_PRECISION) + return TREE_INT_CST_OFFSET_NUNITS (m_t); + else if (N == MAX_BITSIZE_MODE_ANY_INT + || N > TYPE_PRECISION (TREE_TYPE (m_t))) return TREE_INT_CST_EXT_NUNITS (m_t); else return TREE_INT_CST_NUNITS (m_t); Shouldn't it be N >= TYPE_PRECISION () btw? Looking at the implementation it seems it would also work with return MIN (TREE_INT_CST_EXT_NUNITS (m_t), N / HOST_BITS_PER_WIDE_INT); ? Richard. > Thanks, > Richard > > > Index: gcc/ChangeLog.wide-int > =================================================================== > --- gcc/ChangeLog.wide-int 2013-11-30 09:31:16.359198395 +0000 > +++ gcc/ChangeLog.wide-int 2013-11-30 09:41:50.987741444 +0000 > @@ -616,6 +616,7 @@ > (TREE_INT_CST_HIGH): Delete. > (TREE_INT_CST_NUNITS): New. > (TREE_INT_CST_EXT_NUNITS): Likewise. > + (TREE_INT_CST_OFFSET_NUNITS): Likewise. > (TREE_INT_CST_ELT): Likewise. > (INT_CST_LT): Use wide-int interfaces. > (INT_CST_LE): New. > Index: gcc/tree-core.h > =================================================================== > --- gcc/tree-core.h 2013-11-30 09:31:16.359198395 +0000 > +++ gcc/tree-core.h 2013-11-30 09:41:12.011470169 +0000 > @@ -764,11 +764,16 @@ struct GTY(()) tree_base { > struct { > /* The number of HOST_WIDE_INTs if the INTEGER_CST is accessed in > its native precision. */ > - unsigned short unextended; > + unsigned char unextended; > > /* The number of HOST_WIDE_INTs if the INTEGER_CST is extended to > wider precisions based on its TYPE_SIGN. */ > - unsigned short extended; > + unsigned char extended; > + > + /* The number of HOST_WIDE_INTs if the INTEGER_CST is accessed in > + offset_int precision, with smaller integers being extended > + according to their TYPE_SIGN. */ > + unsigned char offset; > } int_length; > > /* VEC length. This field is only used with TREE_VEC. */ > Index: gcc/tree.c > =================================================================== > --- gcc/tree.c 2013-11-30 09:31:16.359198395 +0000 > +++ gcc/tree.c 2013-11-30 09:41:42.965685621 +0000 > @@ -1285,6 +1285,7 @@ wide_int_to_tree (tree type, const wide_ > /* Make sure no one is clobbering the shared constant. */ > gcc_checking_assert (TREE_TYPE (t) == type > && TREE_INT_CST_NUNITS (t) == 1 > + && TREE_INT_CST_OFFSET_NUNITS (t) == 1 > && TREE_INT_CST_EXT_NUNITS (t) == 1 > && TREE_INT_CST_ELT (t, 0) == hwi); > else > @@ -1964,6 +1965,7 @@ make_int_cst_stat (int len, int ext_len > TREE_SET_CODE (t, INTEGER_CST); > TREE_INT_CST_NUNITS (t) = len; > TREE_INT_CST_EXT_NUNITS (t) = ext_len; > + TREE_INT_CST_OFFSET_NUNITS (t) = MIN (ext_len, OFFSET_INT_ELTS); > > TREE_CONSTANT (t) = 1; > > Index: gcc/tree.h > =================================================================== > --- gcc/tree.h 2013-11-30 09:31:16.359198395 +0000 > +++ gcc/tree.h 2013-11-30 09:41:29.418591391 +0000 > @@ -907,6 +907,8 @@ #define TREE_INT_CST_NUNITS(NODE) \ > (INTEGER_CST_CHECK (NODE)->base.u.int_length.unextended) > #define TREE_INT_CST_EXT_NUNITS(NODE) \ > (INTEGER_CST_CHECK (NODE)->base.u.int_length.extended) > +#define TREE_INT_CST_OFFSET_NUNITS(NODE) \ > + (INTEGER_CST_CHECK (NODE)->base.u.int_length.offset) > #define TREE_INT_CST_ELT(NODE, I) TREE_INT_CST_ELT_CHECK (NODE, I) > #define TREE_INT_CST_LOW(NODE) \ > ((unsigned HOST_WIDE_INT) TREE_INT_CST_ELT (NODE, 0)) > @@ -4623,8 +4625,10 @@ wi::extended_tree <N>::get_val () const > inline unsigned int > wi::extended_tree <N>::get_len () const > { > - if (N == MAX_BITSIZE_MODE_ANY_INT > - || N > TYPE_PRECISION (TREE_TYPE (m_t))) > + if (N == ADDR_MAX_PRECISION) > + return TREE_INT_CST_OFFSET_NUNITS (m_t); > + else if (N == MAX_BITSIZE_MODE_ANY_INT > + || N > TYPE_PRECISION (TREE_TYPE (m_t))) > return TREE_INT_CST_EXT_NUNITS (m_t); > else > return TREE_INT_CST_NUNITS (m_t); > Index: gcc/wide-int.h > =================================================================== > --- gcc/wide-int.h 2013-11-30 09:31:16.359198395 +0000 > +++ gcc/wide-int.h 2013-11-30 09:40:32.710196218 +0000 > @@ -256,6 +256,9 @@ #define ADDR_MAX_BITSIZE 64 > #define ADDR_MAX_PRECISION \ > ((ADDR_MAX_BITSIZE + 4 + HOST_BITS_PER_WIDE_INT - 1) & ~(HOST_BITS_PER_WIDE_INT - 1)) > > +/* The number of HWIs needed to store an offset_int. */ > +#define OFFSET_INT_ELTS (ADDR_MAX_PRECISION / HOST_BITS_PER_WIDE_INT) > + > /* The type of result produced by a binary operation on types T1 and T2. > Defined purely for brevity. */ > #define WI_BINARY_RESULT(T1, T2) \
Richard Biener <richard.guenther@gmail.com> writes: > On Sat, Nov 30, 2013 at 10:43 AM, Richard Sandiford > <rdsandiford@googlemail.com> wrote: >> So maybe two INTEGER_CST lengths weren't enough. Because bitsizetype >> can be offset_int-sized, wi::to_offset had a TYPE_PRECISION condition >> to pick the array length: >> >> template <int N> >> inline unsigned int >> wi::extended_tree <N>::get_len () const >> { >> if (N == MAX_BITSIZE_MODE_ANY_INT >> || N > TYPE_PRECISION (TREE_TYPE (m_t))) >> return TREE_INT_CST_EXT_NUNITS (m_t); >> else >> return TREE_INT_CST_NUNITS (m_t); >> } >> >> and this TYPE_PRECISION condition was relatively hot in >> get_ref_base_and_extent when compiling insn-recog.ii. >> >> Adding a third length for offset_int does seem to reduce the cost of >> the offset_int + to_offset addition. >> >> Tested on x86_64-linux-gnu. OK to install? > > Hmm, that's now getting a bit excessive ... Well, we access trees in three different ways, and we want all of them to be cheap, so having three fields in the tree seems pretty natural. > inline unsigned int > wi::extended_tree <N>::get_len () const > { > - if (N == MAX_BITSIZE_MODE_ANY_INT > - || N > TYPE_PRECISION (TREE_TYPE (m_t))) > + if (N == ADDR_MAX_PRECISION) > + return TREE_INT_CST_OFFSET_NUNITS (m_t); > + else if (N == MAX_BITSIZE_MODE_ANY_INT > + || N > TYPE_PRECISION (TREE_TYPE (m_t))) > return TREE_INT_CST_EXT_NUNITS (m_t); > else > return TREE_INT_CST_NUNITS (m_t); > > Shouldn't it be N >= TYPE_PRECISION () btw? No, TREE_INT_CST_NUNITS is for accessing the tree in TYPE_PRECISION and TREE_INT_CST_EXT_NUNITS is for accessing it in wider precisions. N is always >= TYPE_PRECISION here, from the assert in the constructor: gcc_checking_assert (TYPE_PRECISION (TREE_TYPE (t)) <= N); TREE_INT_CST_OFFSET_NUNITS is just a cached x ? TREE_INT_CST_NUNITS (m_t) : TREE_INT_CST_EXT_NUNITS (m_t) result for a particular precision. > Looking at the implementation it seems it would also work with > > return MIN (TREE_INT_CST_EXT_NUNITS (m_t), N / HOST_BITS_PER_WIDE_INT); Yeah, the MIN in the patch was probably bogus sorry. It only works if we can assume that no bitsizetype will have ADDR_MAX_PRECISION significant (non-sign) bits -- in particular that there's no such thing as an all-1s _unsigned_ bitsizetype. That might be true in practice given the way we use offsets, but it isn't safe to generalise that to all N. A safer form would be: if (ext_len > OFFSET_INT_ELTS) TREE_INT_CST_OFFSET_NUNITS (t) = len; else TREE_INT_CST_OFFSET_NUNITS (t) = ext_len; The reason the general form doesn't work for all N is because of the compressed representation. E.g. the example I gave a while ago about a 256-bit all-1s unsigned number being { -1 } as a 256-bit number and { -1, -1, -1, -1, 0 } as a 257+-bit number. But the point of the patch is to avoid any runtime checks here, so the TYPE_PRECISION case is never actually used now. I just kept it around for completeness, since we'd been using it successfully so far. I can put in a gcc_unreachable if you prefer. Thanks, Richard
On Mon, Dec 2, 2013 at 2:48 PM, Richard Sandiford <rdsandiford@googlemail.com> wrote: > Richard Biener <richard.guenther@gmail.com> writes: >> On Sat, Nov 30, 2013 at 10:43 AM, Richard Sandiford >> <rdsandiford@googlemail.com> wrote: >>> So maybe two INTEGER_CST lengths weren't enough. Because bitsizetype >>> can be offset_int-sized, wi::to_offset had a TYPE_PRECISION condition >>> to pick the array length: >>> >>> template <int N> >>> inline unsigned int >>> wi::extended_tree <N>::get_len () const >>> { >>> if (N == MAX_BITSIZE_MODE_ANY_INT >>> || N > TYPE_PRECISION (TREE_TYPE (m_t))) >>> return TREE_INT_CST_EXT_NUNITS (m_t); >>> else >>> return TREE_INT_CST_NUNITS (m_t); >>> } >>> >>> and this TYPE_PRECISION condition was relatively hot in >>> get_ref_base_and_extent when compiling insn-recog.ii. >>> >>> Adding a third length for offset_int does seem to reduce the cost of >>> the offset_int + to_offset addition. >>> >>> Tested on x86_64-linux-gnu. OK to install? >> >> Hmm, that's now getting a bit excessive ... > > Well, we access trees in three different ways, and we want all of them > to be cheap, so having three fields in the tree seems pretty natural. Hmm, I guess you are right :/ >> inline unsigned int >> wi::extended_tree <N>::get_len () const >> { >> - if (N == MAX_BITSIZE_MODE_ANY_INT >> - || N > TYPE_PRECISION (TREE_TYPE (m_t))) >> + if (N == ADDR_MAX_PRECISION) >> + return TREE_INT_CST_OFFSET_NUNITS (m_t); >> + else if (N == MAX_BITSIZE_MODE_ANY_INT >> + || N > TYPE_PRECISION (TREE_TYPE (m_t))) >> return TREE_INT_CST_EXT_NUNITS (m_t); >> else >> return TREE_INT_CST_NUNITS (m_t); >> >> Shouldn't it be N >= TYPE_PRECISION () btw? > > No, TREE_INT_CST_NUNITS is for accessing the tree in TYPE_PRECISION > and TREE_INT_CST_EXT_NUNITS is for accessing it in wider precisions. > > N is always >= TYPE_PRECISION here, from the assert in the constructor: > > gcc_checking_assert (TYPE_PRECISION (TREE_TYPE (t)) <= N); > > TREE_INT_CST_OFFSET_NUNITS is just a cached x ? TREE_INT_CST_NUNITS (m_t) > : TREE_INT_CST_EXT_NUNITS (m_t) result for a particular precision. > >> Looking at the implementation it seems it would also work with >> >> return MIN (TREE_INT_CST_EXT_NUNITS (m_t), N / HOST_BITS_PER_WIDE_INT); > > Yeah, the MIN in the patch was probably bogus sorry. It only works > if we can assume that no bitsizetype will have ADDR_MAX_PRECISION > significant (non-sign) bits -- in particular that there's no such thing as > an all-1s _unsigned_ bitsizetype. That might be true in practice given > the way we use offsets, but it isn't safe to generalise that to all N. > > A safer form would be: > > if (ext_len > OFFSET_INT_ELTS) > TREE_INT_CST_OFFSET_NUNITS (t) = len; > else > TREE_INT_CST_OFFSET_NUNITS (t) = ext_len; > > The reason the general form doesn't work for all N is because of the > compressed representation. E.g. the example I gave a while ago about > a 256-bit all-1s unsigned number being { -1 } as a 256-bit number and > { -1, -1, -1, -1, 0 } as a 257+-bit number. > > But the point of the patch is to avoid any runtime checks here, > so the TYPE_PRECISION case is never actually used now. I just kept > it around for completeness, since we'd been using it successfully so far. > I can put in a gcc_unreachable if you prefer. Yeah, I'd prefer a gcc_unreachable and a comment. Thanks, Richard. > Thanks, > Richard
Index: gcc/ChangeLog.wide-int =================================================================== --- gcc/ChangeLog.wide-int 2013-11-30 09:31:16.359198395 +0000 +++ gcc/ChangeLog.wide-int 2013-11-30 09:41:50.987741444 +0000 @@ -616,6 +616,7 @@ (TREE_INT_CST_HIGH): Delete. (TREE_INT_CST_NUNITS): New. (TREE_INT_CST_EXT_NUNITS): Likewise. + (TREE_INT_CST_OFFSET_NUNITS): Likewise. (TREE_INT_CST_ELT): Likewise. (INT_CST_LT): Use wide-int interfaces. (INT_CST_LE): New. Index: gcc/tree-core.h =================================================================== --- gcc/tree-core.h 2013-11-30 09:31:16.359198395 +0000 +++ gcc/tree-core.h 2013-11-30 09:41:12.011470169 +0000 @@ -764,11 +764,16 @@ struct GTY(()) tree_base { struct { /* The number of HOST_WIDE_INTs if the INTEGER_CST is accessed in its native precision. */ - unsigned short unextended; + unsigned char unextended; /* The number of HOST_WIDE_INTs if the INTEGER_CST is extended to wider precisions based on its TYPE_SIGN. */ - unsigned short extended; + unsigned char extended; + + /* The number of HOST_WIDE_INTs if the INTEGER_CST is accessed in + offset_int precision, with smaller integers being extended + according to their TYPE_SIGN. */ + unsigned char offset; } int_length; /* VEC length. This field is only used with TREE_VEC. */ Index: gcc/tree.c =================================================================== --- gcc/tree.c 2013-11-30 09:31:16.359198395 +0000 +++ gcc/tree.c 2013-11-30 09:41:42.965685621 +0000 @@ -1285,6 +1285,7 @@ wide_int_to_tree (tree type, const wide_ /* Make sure no one is clobbering the shared constant. */ gcc_checking_assert (TREE_TYPE (t) == type && TREE_INT_CST_NUNITS (t) == 1 + && TREE_INT_CST_OFFSET_NUNITS (t) == 1 && TREE_INT_CST_EXT_NUNITS (t) == 1 && TREE_INT_CST_ELT (t, 0) == hwi); else @@ -1964,6 +1965,7 @@ make_int_cst_stat (int len, int ext_len TREE_SET_CODE (t, INTEGER_CST); TREE_INT_CST_NUNITS (t) = len; TREE_INT_CST_EXT_NUNITS (t) = ext_len; + TREE_INT_CST_OFFSET_NUNITS (t) = MIN (ext_len, OFFSET_INT_ELTS); TREE_CONSTANT (t) = 1; Index: gcc/tree.h =================================================================== --- gcc/tree.h 2013-11-30 09:31:16.359198395 +0000 +++ gcc/tree.h 2013-11-30 09:41:29.418591391 +0000 @@ -907,6 +907,8 @@ #define TREE_INT_CST_NUNITS(NODE) \ (INTEGER_CST_CHECK (NODE)->base.u.int_length.unextended) #define TREE_INT_CST_EXT_NUNITS(NODE) \ (INTEGER_CST_CHECK (NODE)->base.u.int_length.extended) +#define TREE_INT_CST_OFFSET_NUNITS(NODE) \ + (INTEGER_CST_CHECK (NODE)->base.u.int_length.offset) #define TREE_INT_CST_ELT(NODE, I) TREE_INT_CST_ELT_CHECK (NODE, I) #define TREE_INT_CST_LOW(NODE) \ ((unsigned HOST_WIDE_INT) TREE_INT_CST_ELT (NODE, 0)) @@ -4623,8 +4625,10 @@ wi::extended_tree <N>::get_val () const inline unsigned int wi::extended_tree <N>::get_len () const { - if (N == MAX_BITSIZE_MODE_ANY_INT - || N > TYPE_PRECISION (TREE_TYPE (m_t))) + if (N == ADDR_MAX_PRECISION) + return TREE_INT_CST_OFFSET_NUNITS (m_t); + else if (N == MAX_BITSIZE_MODE_ANY_INT + || N > TYPE_PRECISION (TREE_TYPE (m_t))) return TREE_INT_CST_EXT_NUNITS (m_t); else return TREE_INT_CST_NUNITS (m_t); Index: gcc/wide-int.h =================================================================== --- gcc/wide-int.h 2013-11-30 09:31:16.359198395 +0000 +++ gcc/wide-int.h 2013-11-30 09:40:32.710196218 +0000 @@ -256,6 +256,9 @@ #define ADDR_MAX_BITSIZE 64 #define ADDR_MAX_PRECISION \ ((ADDR_MAX_BITSIZE + 4 + HOST_BITS_PER_WIDE_INT - 1) & ~(HOST_BITS_PER_WIDE_INT - 1)) +/* The number of HWIs needed to store an offset_int. */ +#define OFFSET_INT_ELTS (ADDR_MAX_PRECISION / HOST_BITS_PER_WIDE_INT) + /* The type of result produced by a binary operation on types T1 and T2. Defined purely for brevity. */ #define WI_BINARY_RESULT(T1, T2) \