diff mbox

Add TREE_INT_CST_OFFSET_NUNITS

Message ID 8738me6x8y.fsf@talisman.default
State New
Headers show

Commit Message

Richard Sandiford Nov. 30, 2013, 9:43 a.m. UTC
So maybe two INTEGER_CST lengths weren't enough.  Because bitsizetype
can be offset_int-sized, wi::to_offset had a TYPE_PRECISION condition
to pick the array length:

template <int N>
inline unsigned int
wi::extended_tree <N>::get_len () const
{
  if (N == MAX_BITSIZE_MODE_ANY_INT
      || N > TYPE_PRECISION (TREE_TYPE (m_t)))
    return TREE_INT_CST_EXT_NUNITS (m_t);
  else
    return TREE_INT_CST_NUNITS (m_t);
}

and this TYPE_PRECISION condition was relatively hot in
get_ref_base_and_extent when compiling insn-recog.ii.

Adding a third length for offset_int does seem to reduce the cost of
the offset_int + to_offset addition.

Tested on x86_64-linux-gnu.  OK to install?

Thanks,
Richard

Comments

Richard Biener Dec. 2, 2013, 11:09 a.m. UTC | #1
On Sat, Nov 30, 2013 at 10:43 AM, Richard Sandiford
<rdsandiford@googlemail.com> wrote:
> So maybe two INTEGER_CST lengths weren't enough.  Because bitsizetype
> can be offset_int-sized, wi::to_offset had a TYPE_PRECISION condition
> to pick the array length:
>
> template <int N>
> inline unsigned int
> wi::extended_tree <N>::get_len () const
> {
>   if (N == MAX_BITSIZE_MODE_ANY_INT
>       || N > TYPE_PRECISION (TREE_TYPE (m_t)))
>     return TREE_INT_CST_EXT_NUNITS (m_t);
>   else
>     return TREE_INT_CST_NUNITS (m_t);
> }
>
> and this TYPE_PRECISION condition was relatively hot in
> get_ref_base_and_extent when compiling insn-recog.ii.
>
> Adding a third length for offset_int does seem to reduce the cost of
> the offset_int + to_offset addition.
>
> Tested on x86_64-linux-gnu.  OK to install?

Hmm, that's now getting a bit excessive ...

 inline unsigned int
 wi::extended_tree <N>::get_len () const
 {
-  if (N == MAX_BITSIZE_MODE_ANY_INT
-      || N > TYPE_PRECISION (TREE_TYPE (m_t)))
+  if (N == ADDR_MAX_PRECISION)
+    return TREE_INT_CST_OFFSET_NUNITS (m_t);
+  else if (N == MAX_BITSIZE_MODE_ANY_INT
+          || N > TYPE_PRECISION (TREE_TYPE (m_t)))
     return TREE_INT_CST_EXT_NUNITS (m_t);
   else
     return TREE_INT_CST_NUNITS (m_t);

Shouldn't it be N >= TYPE_PRECISION () btw?

Looking at the implementation it seems it would also work with

   return MIN (TREE_INT_CST_EXT_NUNITS (m_t), N / HOST_BITS_PER_WIDE_INT);

?

Richard.


> Thanks,
> Richard
>
>
> Index: gcc/ChangeLog.wide-int
> ===================================================================
> --- gcc/ChangeLog.wide-int      2013-11-30 09:31:16.359198395 +0000
> +++ gcc/ChangeLog.wide-int      2013-11-30 09:41:50.987741444 +0000
> @@ -616,6 +616,7 @@
>         (TREE_INT_CST_HIGH): Delete.
>         (TREE_INT_CST_NUNITS): New.
>         (TREE_INT_CST_EXT_NUNITS): Likewise.
> +       (TREE_INT_CST_OFFSET_NUNITS): Likewise.
>         (TREE_INT_CST_ELT): Likewise.
>         (INT_CST_LT): Use wide-int interfaces.
>         (INT_CST_LE): New.
> Index: gcc/tree-core.h
> ===================================================================
> --- gcc/tree-core.h     2013-11-30 09:31:16.359198395 +0000
> +++ gcc/tree-core.h     2013-11-30 09:41:12.011470169 +0000
> @@ -764,11 +764,16 @@ struct GTY(()) tree_base {
>      struct {
>        /* The number of HOST_WIDE_INTs if the INTEGER_CST is accessed in
>          its native precision.  */
> -      unsigned short unextended;
> +      unsigned char unextended;
>
>        /* The number of HOST_WIDE_INTs if the INTEGER_CST is extended to
>          wider precisions based on its TYPE_SIGN.  */
> -      unsigned short extended;
> +      unsigned char extended;
> +
> +      /* The number of HOST_WIDE_INTs if the INTEGER_CST is accessed in
> +        offset_int precision, with smaller integers being extended
> +        according to their TYPE_SIGN.  */
> +      unsigned char offset;
>      } int_length;
>
>      /* VEC length.  This field is only used with TREE_VEC.  */
> Index: gcc/tree.c
> ===================================================================
> --- gcc/tree.c  2013-11-30 09:31:16.359198395 +0000
> +++ gcc/tree.c  2013-11-30 09:41:42.965685621 +0000
> @@ -1285,6 +1285,7 @@ wide_int_to_tree (tree type, const wide_
>             /* Make sure no one is clobbering the shared constant.  */
>             gcc_checking_assert (TREE_TYPE (t) == type
>                                  && TREE_INT_CST_NUNITS (t) == 1
> +                                && TREE_INT_CST_OFFSET_NUNITS (t) == 1
>                                  && TREE_INT_CST_EXT_NUNITS (t) == 1
>                                  && TREE_INT_CST_ELT (t, 0) == hwi);
>           else
> @@ -1964,6 +1965,7 @@ make_int_cst_stat (int len, int ext_len
>    TREE_SET_CODE (t, INTEGER_CST);
>    TREE_INT_CST_NUNITS (t) = len;
>    TREE_INT_CST_EXT_NUNITS (t) = ext_len;
> +  TREE_INT_CST_OFFSET_NUNITS (t) = MIN (ext_len, OFFSET_INT_ELTS);
>
>    TREE_CONSTANT (t) = 1;
>
> Index: gcc/tree.h
> ===================================================================
> --- gcc/tree.h  2013-11-30 09:31:16.359198395 +0000
> +++ gcc/tree.h  2013-11-30 09:41:29.418591391 +0000
> @@ -907,6 +907,8 @@ #define TREE_INT_CST_NUNITS(NODE) \
>    (INTEGER_CST_CHECK (NODE)->base.u.int_length.unextended)
>  #define TREE_INT_CST_EXT_NUNITS(NODE) \
>    (INTEGER_CST_CHECK (NODE)->base.u.int_length.extended)
> +#define TREE_INT_CST_OFFSET_NUNITS(NODE) \
> +  (INTEGER_CST_CHECK (NODE)->base.u.int_length.offset)
>  #define TREE_INT_CST_ELT(NODE, I) TREE_INT_CST_ELT_CHECK (NODE, I)
>  #define TREE_INT_CST_LOW(NODE) \
>    ((unsigned HOST_WIDE_INT) TREE_INT_CST_ELT (NODE, 0))
> @@ -4623,8 +4625,10 @@ wi::extended_tree <N>::get_val () const
>  inline unsigned int
>  wi::extended_tree <N>::get_len () const
>  {
> -  if (N == MAX_BITSIZE_MODE_ANY_INT
> -      || N > TYPE_PRECISION (TREE_TYPE (m_t)))
> +  if (N == ADDR_MAX_PRECISION)
> +    return TREE_INT_CST_OFFSET_NUNITS (m_t);
> +  else if (N == MAX_BITSIZE_MODE_ANY_INT
> +          || N > TYPE_PRECISION (TREE_TYPE (m_t)))
>      return TREE_INT_CST_EXT_NUNITS (m_t);
>    else
>      return TREE_INT_CST_NUNITS (m_t);
> Index: gcc/wide-int.h
> ===================================================================
> --- gcc/wide-int.h      2013-11-30 09:31:16.359198395 +0000
> +++ gcc/wide-int.h      2013-11-30 09:40:32.710196218 +0000
> @@ -256,6 +256,9 @@ #define ADDR_MAX_BITSIZE 64
>  #define ADDR_MAX_PRECISION \
>    ((ADDR_MAX_BITSIZE + 4 + HOST_BITS_PER_WIDE_INT - 1) & ~(HOST_BITS_PER_WIDE_INT - 1))
>
> +/* The number of HWIs needed to store an offset_int.  */
> +#define OFFSET_INT_ELTS (ADDR_MAX_PRECISION / HOST_BITS_PER_WIDE_INT)
> +
>  /* The type of result produced by a binary operation on types T1 and T2.
>     Defined purely for brevity.  */
>  #define WI_BINARY_RESULT(T1, T2) \
Richard Sandiford Dec. 2, 2013, 1:48 p.m. UTC | #2
Richard Biener <richard.guenther@gmail.com> writes:
> On Sat, Nov 30, 2013 at 10:43 AM, Richard Sandiford
> <rdsandiford@googlemail.com> wrote:
>> So maybe two INTEGER_CST lengths weren't enough.  Because bitsizetype
>> can be offset_int-sized, wi::to_offset had a TYPE_PRECISION condition
>> to pick the array length:
>>
>> template <int N>
>> inline unsigned int
>> wi::extended_tree <N>::get_len () const
>> {
>>   if (N == MAX_BITSIZE_MODE_ANY_INT
>>       || N > TYPE_PRECISION (TREE_TYPE (m_t)))
>>     return TREE_INT_CST_EXT_NUNITS (m_t);
>>   else
>>     return TREE_INT_CST_NUNITS (m_t);
>> }
>>
>> and this TYPE_PRECISION condition was relatively hot in
>> get_ref_base_and_extent when compiling insn-recog.ii.
>>
>> Adding a third length for offset_int does seem to reduce the cost of
>> the offset_int + to_offset addition.
>>
>> Tested on x86_64-linux-gnu.  OK to install?
>
> Hmm, that's now getting a bit excessive ...

Well, we access trees in three different ways, and we want all of them
to be cheap, so having three fields in the tree seems pretty natural.

>  inline unsigned int
>  wi::extended_tree <N>::get_len () const
>  {
> -  if (N == MAX_BITSIZE_MODE_ANY_INT
> -      || N > TYPE_PRECISION (TREE_TYPE (m_t)))
> +  if (N == ADDR_MAX_PRECISION)
> +    return TREE_INT_CST_OFFSET_NUNITS (m_t);
> +  else if (N == MAX_BITSIZE_MODE_ANY_INT
> +          || N > TYPE_PRECISION (TREE_TYPE (m_t)))
>      return TREE_INT_CST_EXT_NUNITS (m_t);
>    else
>      return TREE_INT_CST_NUNITS (m_t);
>
> Shouldn't it be N >= TYPE_PRECISION () btw?

No, TREE_INT_CST_NUNITS is for accessing the tree in TYPE_PRECISION
and TREE_INT_CST_EXT_NUNITS is for accessing it in wider precisions.

N is always >= TYPE_PRECISION here, from the assert in the constructor:

  gcc_checking_assert (TYPE_PRECISION (TREE_TYPE (t)) <= N);

TREE_INT_CST_OFFSET_NUNITS is just a cached x ? TREE_INT_CST_NUNITS (m_t)
: TREE_INT_CST_EXT_NUNITS (m_t) result for a particular precision.

> Looking at the implementation it seems it would also work with
>
>    return MIN (TREE_INT_CST_EXT_NUNITS (m_t), N / HOST_BITS_PER_WIDE_INT);

Yeah, the MIN in the patch was probably bogus sorry.  It only works
if we can assume that no bitsizetype will have ADDR_MAX_PRECISION
significant (non-sign) bits -- in particular that there's no such thing as
an all-1s _unsigned_ bitsizetype.  That might be true in practice given
the way we use offsets, but it isn't safe to generalise that to all N.

A safer form would be:

   if (ext_len > OFFSET_INT_ELTS)
     TREE_INT_CST_OFFSET_NUNITS (t) = len;
   else
     TREE_INT_CST_OFFSET_NUNITS (t) = ext_len;

The reason the general form doesn't work for all N is because of the
compressed representation.  E.g. the example I gave a while ago about
a 256-bit all-1s unsigned number being { -1 } as a 256-bit number and
{ -1, -1, -1, -1, 0 } as a 257+-bit number.

But the point of the patch is to avoid any runtime checks here,
so the TYPE_PRECISION case is never actually used now.  I just kept
it around for completeness, since we'd been using it successfully so far.
I can put in a gcc_unreachable if you prefer.

Thanks,
Richard
Richard Biener Dec. 2, 2013, 1:53 p.m. UTC | #3
On Mon, Dec 2, 2013 at 2:48 PM, Richard Sandiford
<rdsandiford@googlemail.com> wrote:
> Richard Biener <richard.guenther@gmail.com> writes:
>> On Sat, Nov 30, 2013 at 10:43 AM, Richard Sandiford
>> <rdsandiford@googlemail.com> wrote:
>>> So maybe two INTEGER_CST lengths weren't enough.  Because bitsizetype
>>> can be offset_int-sized, wi::to_offset had a TYPE_PRECISION condition
>>> to pick the array length:
>>>
>>> template <int N>
>>> inline unsigned int
>>> wi::extended_tree <N>::get_len () const
>>> {
>>>   if (N == MAX_BITSIZE_MODE_ANY_INT
>>>       || N > TYPE_PRECISION (TREE_TYPE (m_t)))
>>>     return TREE_INT_CST_EXT_NUNITS (m_t);
>>>   else
>>>     return TREE_INT_CST_NUNITS (m_t);
>>> }
>>>
>>> and this TYPE_PRECISION condition was relatively hot in
>>> get_ref_base_and_extent when compiling insn-recog.ii.
>>>
>>> Adding a third length for offset_int does seem to reduce the cost of
>>> the offset_int + to_offset addition.
>>>
>>> Tested on x86_64-linux-gnu.  OK to install?
>>
>> Hmm, that's now getting a bit excessive ...
>
> Well, we access trees in three different ways, and we want all of them
> to be cheap, so having three fields in the tree seems pretty natural.

Hmm, I guess you are right :/

>>  inline unsigned int
>>  wi::extended_tree <N>::get_len () const
>>  {
>> -  if (N == MAX_BITSIZE_MODE_ANY_INT
>> -      || N > TYPE_PRECISION (TREE_TYPE (m_t)))
>> +  if (N == ADDR_MAX_PRECISION)
>> +    return TREE_INT_CST_OFFSET_NUNITS (m_t);
>> +  else if (N == MAX_BITSIZE_MODE_ANY_INT
>> +          || N > TYPE_PRECISION (TREE_TYPE (m_t)))
>>      return TREE_INT_CST_EXT_NUNITS (m_t);
>>    else
>>      return TREE_INT_CST_NUNITS (m_t);
>>
>> Shouldn't it be N >= TYPE_PRECISION () btw?
>
> No, TREE_INT_CST_NUNITS is for accessing the tree in TYPE_PRECISION
> and TREE_INT_CST_EXT_NUNITS is for accessing it in wider precisions.
>
> N is always >= TYPE_PRECISION here, from the assert in the constructor:
>
>   gcc_checking_assert (TYPE_PRECISION (TREE_TYPE (t)) <= N);
>
> TREE_INT_CST_OFFSET_NUNITS is just a cached x ? TREE_INT_CST_NUNITS (m_t)
> : TREE_INT_CST_EXT_NUNITS (m_t) result for a particular precision.
>
>> Looking at the implementation it seems it would also work with
>>
>>    return MIN (TREE_INT_CST_EXT_NUNITS (m_t), N / HOST_BITS_PER_WIDE_INT);
>
> Yeah, the MIN in the patch was probably bogus sorry.  It only works
> if we can assume that no bitsizetype will have ADDR_MAX_PRECISION
> significant (non-sign) bits -- in particular that there's no such thing as
> an all-1s _unsigned_ bitsizetype.  That might be true in practice given
> the way we use offsets, but it isn't safe to generalise that to all N.
>
> A safer form would be:
>
>    if (ext_len > OFFSET_INT_ELTS)
>      TREE_INT_CST_OFFSET_NUNITS (t) = len;
>    else
>      TREE_INT_CST_OFFSET_NUNITS (t) = ext_len;
>
> The reason the general form doesn't work for all N is because of the
> compressed representation.  E.g. the example I gave a while ago about
> a 256-bit all-1s unsigned number being { -1 } as a 256-bit number and
> { -1, -1, -1, -1, 0 } as a 257+-bit number.
>
> But the point of the patch is to avoid any runtime checks here,
> so the TYPE_PRECISION case is never actually used now.  I just kept
> it around for completeness, since we'd been using it successfully so far.
> I can put in a gcc_unreachable if you prefer.

Yeah, I'd prefer a gcc_unreachable and a comment.

Thanks,
Richard.

> Thanks,
> Richard
diff mbox

Patch

Index: gcc/ChangeLog.wide-int
===================================================================
--- gcc/ChangeLog.wide-int	2013-11-30 09:31:16.359198395 +0000
+++ gcc/ChangeLog.wide-int	2013-11-30 09:41:50.987741444 +0000
@@ -616,6 +616,7 @@ 
 	(TREE_INT_CST_HIGH): Delete.
 	(TREE_INT_CST_NUNITS): New.
 	(TREE_INT_CST_EXT_NUNITS): Likewise.
+	(TREE_INT_CST_OFFSET_NUNITS): Likewise.
 	(TREE_INT_CST_ELT): Likewise.
 	(INT_CST_LT): Use wide-int interfaces.
 	(INT_CST_LE): New.
Index: gcc/tree-core.h
===================================================================
--- gcc/tree-core.h	2013-11-30 09:31:16.359198395 +0000
+++ gcc/tree-core.h	2013-11-30 09:41:12.011470169 +0000
@@ -764,11 +764,16 @@  struct GTY(()) tree_base {
     struct {
       /* The number of HOST_WIDE_INTs if the INTEGER_CST is accessed in
 	 its native precision.  */
-      unsigned short unextended;
+      unsigned char unextended;
 
       /* The number of HOST_WIDE_INTs if the INTEGER_CST is extended to
 	 wider precisions based on its TYPE_SIGN.  */
-      unsigned short extended;
+      unsigned char extended;
+
+      /* The number of HOST_WIDE_INTs if the INTEGER_CST is accessed in
+	 offset_int precision, with smaller integers being extended
+	 according to their TYPE_SIGN.  */
+      unsigned char offset;
     } int_length;
 
     /* VEC length.  This field is only used with TREE_VEC.  */
Index: gcc/tree.c
===================================================================
--- gcc/tree.c	2013-11-30 09:31:16.359198395 +0000
+++ gcc/tree.c	2013-11-30 09:41:42.965685621 +0000
@@ -1285,6 +1285,7 @@  wide_int_to_tree (tree type, const wide_
 	    /* Make sure no one is clobbering the shared constant.  */
 	    gcc_checking_assert (TREE_TYPE (t) == type
 				 && TREE_INT_CST_NUNITS (t) == 1
+				 && TREE_INT_CST_OFFSET_NUNITS (t) == 1
 				 && TREE_INT_CST_EXT_NUNITS (t) == 1
 				 && TREE_INT_CST_ELT (t, 0) == hwi);
 	  else
@@ -1964,6 +1965,7 @@  make_int_cst_stat (int len, int ext_len
   TREE_SET_CODE (t, INTEGER_CST);
   TREE_INT_CST_NUNITS (t) = len;
   TREE_INT_CST_EXT_NUNITS (t) = ext_len;
+  TREE_INT_CST_OFFSET_NUNITS (t) = MIN (ext_len, OFFSET_INT_ELTS);
 
   TREE_CONSTANT (t) = 1;
 
Index: gcc/tree.h
===================================================================
--- gcc/tree.h	2013-11-30 09:31:16.359198395 +0000
+++ gcc/tree.h	2013-11-30 09:41:29.418591391 +0000
@@ -907,6 +907,8 @@  #define TREE_INT_CST_NUNITS(NODE) \
   (INTEGER_CST_CHECK (NODE)->base.u.int_length.unextended)
 #define TREE_INT_CST_EXT_NUNITS(NODE) \
   (INTEGER_CST_CHECK (NODE)->base.u.int_length.extended)
+#define TREE_INT_CST_OFFSET_NUNITS(NODE) \
+  (INTEGER_CST_CHECK (NODE)->base.u.int_length.offset)
 #define TREE_INT_CST_ELT(NODE, I) TREE_INT_CST_ELT_CHECK (NODE, I)
 #define TREE_INT_CST_LOW(NODE) \
   ((unsigned HOST_WIDE_INT) TREE_INT_CST_ELT (NODE, 0))
@@ -4623,8 +4625,10 @@  wi::extended_tree <N>::get_val () const
 inline unsigned int
 wi::extended_tree <N>::get_len () const
 {
-  if (N == MAX_BITSIZE_MODE_ANY_INT
-      || N > TYPE_PRECISION (TREE_TYPE (m_t)))
+  if (N == ADDR_MAX_PRECISION)
+    return TREE_INT_CST_OFFSET_NUNITS (m_t);
+  else if (N == MAX_BITSIZE_MODE_ANY_INT
+	   || N > TYPE_PRECISION (TREE_TYPE (m_t)))
     return TREE_INT_CST_EXT_NUNITS (m_t);
   else
     return TREE_INT_CST_NUNITS (m_t);
Index: gcc/wide-int.h
===================================================================
--- gcc/wide-int.h	2013-11-30 09:31:16.359198395 +0000
+++ gcc/wide-int.h	2013-11-30 09:40:32.710196218 +0000
@@ -256,6 +256,9 @@  #define ADDR_MAX_BITSIZE 64
 #define ADDR_MAX_PRECISION \
   ((ADDR_MAX_BITSIZE + 4 + HOST_BITS_PER_WIDE_INT - 1) & ~(HOST_BITS_PER_WIDE_INT - 1))
 
+/* The number of HWIs needed to store an offset_int.  */
+#define OFFSET_INT_ELTS (ADDR_MAX_PRECISION / HOST_BITS_PER_WIDE_INT)
+
 /* The type of result produced by a binary operation on types T1 and T2.
    Defined purely for brevity.  */
 #define WI_BINARY_RESULT(T1, T2) \