Patchwork Fold BIT_FIELD_REF of a reference

login
register
mail settings
Submitter Marc Glisse
Date April 3, 2013, 2:15 p.m.
Message ID <alpine.DEB.2.02.1304031557490.31329@stedding.saclay.inria.fr>
Download mbox | patch
Permalink /patch/233494/
State New
Headers show

Comments

Marc Glisse - April 3, 2013, 2:15 p.m.
Hello,

I am not 100% convinced that it is always better to fold to a MEM_REF, but 
that's what the PR asks for.

bootstrap+testsuite on x86_64-linux-gnu.

2013-04-03  Marc Glisse  <marc.glisse@inria.fr>

 	PR middle-end/52436
gcc/
 	* fold-const.c (fold_ternary_loc) <BIT_FIELD_REF>: Handle nested
 	reference.

gcc/testsuite/
 	* gcc.dg/pr52436.c: New testcase.
 	* gcc.dg/tree-ssa/ssa-fre-33.c: Adjust optimizer message.
 	* gcc.dg/tree-ssa/ssa-fre-34.c: Adjust optimizer message.
Richard Guenther - April 3, 2013, 2:46 p.m.
On Wed, Apr 3, 2013 at 4:15 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> I am not 100% convinced that it is always better to fold to a MEM_REF, but
> that's what the PR asks for.
>
> bootstrap+testsuite on x86_64-linux-gnu.
>
> 2013-04-03  Marc Glisse  <marc.glisse@inria.fr>
>
>         PR middle-end/52436
> gcc/
>         * fold-const.c (fold_ternary_loc) <BIT_FIELD_REF>: Handle nested
>         reference.
>
> gcc/testsuite/
>         * gcc.dg/pr52436.c: New testcase.
>         * gcc.dg/tree-ssa/ssa-fre-33.c: Adjust optimizer message.
>         * gcc.dg/tree-ssa/ssa-fre-34.c: Adjust optimizer message.
>
> --
> Marc Glisse
> Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-33.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-33.c  (revision 197411)
> +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-33.c  (working copy)
> @@ -11,12 +11,12 @@ struct {
>  float x;
>  int main(int argc)
>  {
>    vector float res = (vector float){0.0f,0.0f,0.0f,1.0f};
>    res += (vector float){1.0f,2.0f,3.0f,4.0f};
>    s.global_res = res;
>    x = *((float*)&s.global_res + 1);
>    return 0;
>  }
>
> -/* { dg-final { scan-tree-dump "Replaced BIT_FIELD_REF.*with 2" "fre1" } }
> */
> +/* { dg-final { scan-tree-dump "Replaced .* with 2" "fre1" } } */
>  /* { dg-final { cleanup-tree-dump "fre1" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-34.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-34.c  (revision 197411)
> +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-34.c  (working copy)
> @@ -8,12 +8,12 @@ struct {
>      float i;
>      vector float global_res;
>  } s;
>  float foo(float f)
>  {
>    vector float res = (vector float){0.0f,f,0.0f,1.0f};
>    s.global_res = res;
>    return *((float*)&s.global_res + 1);
>  }
>
> -/* { dg-final { scan-tree-dump "Replaced BIT_FIELD_REF.*with f" "fre1" } }
> */
> +/* { dg-final { scan-tree-dump "Replaced .* with f" "fre1" } } */
>  /* { dg-final { cleanup-tree-dump "fre1" } } */
> Index: gcc/testsuite/gcc.dg/pr52436.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/pr52436.c      (revision 0)
> +++ gcc/testsuite/gcc.dg/pr52436.c      (revision 0)
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-optimized" } */
> +
> +typedef long long __m128i __attribute__ ((vector_size (2 * sizeof (long
> long)),
> +                                          may_alias));
> +typedef struct
> +{
> +  __m128i b;
> +} s_1a;
> +typedef s_1a s_1m __attribute__((aligned(1)));
> +void
> +foo (s_1m *p)
> +{
> +  p->b[1] = 5;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "BIT_FIELD_EXPR" "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>
> Property changes on: gcc/testsuite/gcc.dg/pr52436.c
> ___________________________________________________________________
> Added: svn:keywords
>    + Author Date Id Revision URL
> Added: svn:eol-style
>    + native
>
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c    (revision 197411)
> +++ gcc/fold-const.c    (working copy)
> @@ -14293,20 +14293,34 @@ fold_ternary_loc (location_t loc, enum t
>                 {
>                   tree v = native_interpret_expr (type,
>                                                   b + bitpos /
> BITS_PER_UNIT,
>                                                   bitsize / BITS_PER_UNIT);
>                   if (v)
>                     return v;
>                 }
>             }
>         }
>
> +      /* BIT_FIELD_REF[a.b, *, CST] -> MEM[&a, offsetof (a, b) + CST / 8].
> */
> +      if ((handled_component_p (arg0) || TREE_CODE (arg0) == MEM_REF)

This check means the optimization is not performed for
BIT_FIELD_REF[a, *, CST] which I see no particularly good reason for.
Did something break when you just remove the whole check above?

Richard.

> +         && TREE_INT_CST_LOW (arg1) % BITS_PER_UNIT == 0
> +         && TREE_INT_CST_LOW (arg2) % BITS_PER_UNIT == 0)
> +       {
> +         HOST_WIDE_INT coffset;
> +         tree base = get_addr_base_and_unit_offset (arg0, &coffset);
> +         if (base)
> +           return fold_build2 (MEM_REF, type, build_fold_addr_expr (base),
> +                               build_int_cst (build_pointer_type
> +                                                (TYPE_MAIN_VARIANT (type)),
> +                                              coffset + TREE_INT_CST_LOW
> (arg2)
> +                                                        / BITS_PER_UNIT));
> +       }
>        return NULL_TREE;
>
>      case FMA_EXPR:
>        /* For integers we can decompose the FMA if possible.  */
>        if (TREE_CODE (arg0) == INTEGER_CST
>           && TREE_CODE (arg1) == INTEGER_CST)
>         return fold_build2_loc (loc, PLUS_EXPR, type,
>                                 const_binop (MULT_EXPR, arg0, arg1), arg2);
>        if (integer_zerop (arg2))
>         return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
>
Marc Glisse - April 4, 2013, 7:23 a.m.
On Wed, 3 Apr 2013, Richard Biener wrote:

> On Wed, Apr 3, 2013 at 4:15 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> Hello,
>>
>> I am not 100% convinced that it is always better to fold to a MEM_REF, but
>> that's what the PR asks for.
>>
>> bootstrap+testsuite on x86_64-linux-gnu.
>>
>> 2013-04-03  Marc Glisse  <marc.glisse@inria.fr>
>>
>>         PR middle-end/52436
>> gcc/
>>         * fold-const.c (fold_ternary_loc) <BIT_FIELD_REF>: Handle nested
>>         reference.
>>
>> gcc/testsuite/
>>         * gcc.dg/pr52436.c: New testcase.
>>         * gcc.dg/tree-ssa/ssa-fre-33.c: Adjust optimizer message.
>>         * gcc.dg/tree-ssa/ssa-fre-34.c: Adjust optimizer message.
>>
>> --
>> Marc Glisse
>> Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-33.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-33.c  (revision 197411)
>> +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-33.c  (working copy)
>> @@ -11,12 +11,12 @@ struct {
>>  float x;
>>  int main(int argc)
>>  {
>>    vector float res = (vector float){0.0f,0.0f,0.0f,1.0f};
>>    res += (vector float){1.0f,2.0f,3.0f,4.0f};
>>    s.global_res = res;
>>    x = *((float*)&s.global_res + 1);
>>    return 0;
>>  }
>>
>> -/* { dg-final { scan-tree-dump "Replaced BIT_FIELD_REF.*with 2" "fre1" } }
>> */
>> +/* { dg-final { scan-tree-dump "Replaced .* with 2" "fre1" } } */
>>  /* { dg-final { cleanup-tree-dump "fre1" } } */
>> Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-34.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-34.c  (revision 197411)
>> +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-34.c  (working copy)
>> @@ -8,12 +8,12 @@ struct {
>>      float i;
>>      vector float global_res;
>>  } s;
>>  float foo(float f)
>>  {
>>    vector float res = (vector float){0.0f,f,0.0f,1.0f};
>>    s.global_res = res;
>>    return *((float*)&s.global_res + 1);
>>  }
>>
>> -/* { dg-final { scan-tree-dump "Replaced BIT_FIELD_REF.*with f" "fre1" } }
>> */
>> +/* { dg-final { scan-tree-dump "Replaced .* with f" "fre1" } } */
>>  /* { dg-final { cleanup-tree-dump "fre1" } } */
>> Index: gcc/testsuite/gcc.dg/pr52436.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/pr52436.c      (revision 0)
>> +++ gcc/testsuite/gcc.dg/pr52436.c      (revision 0)
>> @@ -0,0 +1,18 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O -fdump-tree-optimized" } */
>> +
>> +typedef long long __m128i __attribute__ ((vector_size (2 * sizeof (long
>> long)),
>> +                                          may_alias));
>> +typedef struct
>> +{
>> +  __m128i b;
>> +} s_1a;
>> +typedef s_1a s_1m __attribute__((aligned(1)));
>> +void
>> +foo (s_1m *p)
>> +{
>> +  p->b[1] = 5;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-not "BIT_FIELD_EXPR" "optimized" } } */
>> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>>
>> Property changes on: gcc/testsuite/gcc.dg/pr52436.c
>> ___________________________________________________________________
>> Added: svn:keywords
>>    + Author Date Id Revision URL
>> Added: svn:eol-style
>>    + native
>>
>> Index: gcc/fold-const.c
>> ===================================================================
>> --- gcc/fold-const.c    (revision 197411)
>> +++ gcc/fold-const.c    (working copy)
>> @@ -14293,20 +14293,34 @@ fold_ternary_loc (location_t loc, enum t
>>                 {
>>                   tree v = native_interpret_expr (type,
>>                                                   b + bitpos /
>> BITS_PER_UNIT,
>>                                                   bitsize / BITS_PER_UNIT);
>>                   if (v)
>>                     return v;
>>                 }
>>             }
>>         }
>>
>> +      /* BIT_FIELD_REF[a.b, *, CST] -> MEM[&a, offsetof (a, b) + CST / 8].
>> */
>> +      if ((handled_component_p (arg0) || TREE_CODE (arg0) == MEM_REF)
>
> This check means the optimization is not performed for
> BIT_FIELD_REF[a, *, CST] which I see no particularly good reason for.

Er, are you trying to get rid of all BIT_FIELD_REFs? Why would you want to 
replace them with a MEM_REF? I actually think my patch already replaces 
too many.

By the way, if I understand the code correctly, 
get_addr_base_and_unit_offset can just as easily return an object or an 
address, when it is called on a MEM_REF. That may be an issue as well.

Maybe I should just forget about this patch for now...

> Did something break when you just remove the whole check above?

Yes, it breaks all over the place in the testsuite.

>> +         && TREE_INT_CST_LOW (arg1) % BITS_PER_UNIT == 0
>> +         && TREE_INT_CST_LOW (arg2) % BITS_PER_UNIT == 0)
>> +       {
>> +         HOST_WIDE_INT coffset;
>> +         tree base = get_addr_base_and_unit_offset (arg0, &coffset);
>> +         if (base)
>> +           return fold_build2 (MEM_REF, type, build_fold_addr_expr (base),
>> +                               build_int_cst (build_pointer_type
>> +                                                (TYPE_MAIN_VARIANT (type)),
>> +                                              coffset + TREE_INT_CST_LOW
>> (arg2)
>> +                                                        / BITS_PER_UNIT));
>> +       }
>>        return NULL_TREE;
>>
>>      case FMA_EXPR:
>>        /* For integers we can decompose the FMA if possible.  */
>>        if (TREE_CODE (arg0) == INTEGER_CST
>>           && TREE_CODE (arg1) == INTEGER_CST)
>>         return fold_build2_loc (loc, PLUS_EXPR, type,
>>                                 const_binop (MULT_EXPR, arg0, arg1), arg2);
>>        if (integer_zerop (arg2))
>>         return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
Richard Guenther - April 4, 2013, 8:09 a.m.
On Thu, Apr 4, 2013 at 9:23 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Wed, 3 Apr 2013, Richard Biener wrote:
>
>> On Wed, Apr 3, 2013 at 4:15 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>>>
>>> Hello,
>>>
>>> I am not 100% convinced that it is always better to fold to a MEM_REF,
>>> but
>>> that's what the PR asks for.
>>>
>>> bootstrap+testsuite on x86_64-linux-gnu.
>>>
>>> 2013-04-03  Marc Glisse  <marc.glisse@inria.fr>
>>>
>>>         PR middle-end/52436
>>> gcc/
>>>         * fold-const.c (fold_ternary_loc) <BIT_FIELD_REF>: Handle nested
>>>         reference.
>>>
>>> gcc/testsuite/
>>>         * gcc.dg/pr52436.c: New testcase.
>>>         * gcc.dg/tree-ssa/ssa-fre-33.c: Adjust optimizer message.
>>>         * gcc.dg/tree-ssa/ssa-fre-34.c: Adjust optimizer message.
>>>
>>> --
>>> Marc Glisse
>>> Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-33.c
>>> ===================================================================
>>> --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-33.c  (revision 197411)
>>> +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-33.c  (working copy)
>>> @@ -11,12 +11,12 @@ struct {
>>>  float x;
>>>  int main(int argc)
>>>  {
>>>    vector float res = (vector float){0.0f,0.0f,0.0f,1.0f};
>>>    res += (vector float){1.0f,2.0f,3.0f,4.0f};
>>>    s.global_res = res;
>>>    x = *((float*)&s.global_res + 1);
>>>    return 0;
>>>  }
>>>
>>> -/* { dg-final { scan-tree-dump "Replaced BIT_FIELD_REF.*with 2" "fre1" }
>>> }
>>> */
>>> +/* { dg-final { scan-tree-dump "Replaced .* with 2" "fre1" } } */
>>>  /* { dg-final { cleanup-tree-dump "fre1" } } */
>>> Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-34.c
>>> ===================================================================
>>> --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-34.c  (revision 197411)
>>> +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-34.c  (working copy)
>>> @@ -8,12 +8,12 @@ struct {
>>>      float i;
>>>      vector float global_res;
>>>  } s;
>>>  float foo(float f)
>>>  {
>>>    vector float res = (vector float){0.0f,f,0.0f,1.0f};
>>>    s.global_res = res;
>>>    return *((float*)&s.global_res + 1);
>>>  }
>>>
>>> -/* { dg-final { scan-tree-dump "Replaced BIT_FIELD_REF.*with f" "fre1" }
>>> }
>>> */
>>> +/* { dg-final { scan-tree-dump "Replaced .* with f" "fre1" } } */
>>>  /* { dg-final { cleanup-tree-dump "fre1" } } */
>>> Index: gcc/testsuite/gcc.dg/pr52436.c
>>> ===================================================================
>>> --- gcc/testsuite/gcc.dg/pr52436.c      (revision 0)
>>> +++ gcc/testsuite/gcc.dg/pr52436.c      (revision 0)
>>> @@ -0,0 +1,18 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-O -fdump-tree-optimized" } */
>>> +
>>> +typedef long long __m128i __attribute__ ((vector_size (2 * sizeof (long
>>> long)),
>>> +                                          may_alias));
>>> +typedef struct
>>> +{
>>> +  __m128i b;
>>> +} s_1a;
>>> +typedef s_1a s_1m __attribute__((aligned(1)));
>>> +void
>>> +foo (s_1m *p)
>>> +{
>>> +  p->b[1] = 5;
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-not "BIT_FIELD_EXPR" "optimized" } } */
>>> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>>>
>>> Property changes on: gcc/testsuite/gcc.dg/pr52436.c
>>> ___________________________________________________________________
>>> Added: svn:keywords
>>>    + Author Date Id Revision URL
>>> Added: svn:eol-style
>>>    + native
>>>
>>> Index: gcc/fold-const.c
>>> ===================================================================
>>> --- gcc/fold-const.c    (revision 197411)
>>> +++ gcc/fold-const.c    (working copy)
>>> @@ -14293,20 +14293,34 @@ fold_ternary_loc (location_t loc, enum t
>>>                 {
>>>                   tree v = native_interpret_expr (type,
>>>                                                   b + bitpos /
>>> BITS_PER_UNIT,
>>>                                                   bitsize /
>>> BITS_PER_UNIT);
>>>                   if (v)
>>>                     return v;
>>>                 }
>>>             }
>>>         }
>>>
>>> +      /* BIT_FIELD_REF[a.b, *, CST] -> MEM[&a, offsetof (a, b) + CST /
>>> 8].
>>> */
>>> +      if ((handled_component_p (arg0) || TREE_CODE (arg0) == MEM_REF)
>>
>>
>> This check means the optimization is not performed for
>> BIT_FIELD_REF[a, *, CST] which I see no particularly good reason for.
>
>
> Er, are you trying to get rid of all BIT_FIELD_REFs? Why would you want to
> replace them with a MEM_REF? I actually think my patch already replaces too
> many.

Yes, when I filed the bug I was working on bitfield lowering and the only
BIT_FIELD_REFs that would survive would be bitfield extracts from
registers.  Thus, BIT_FIELD_REFs on memory would be lowered as

  reg_2 = MEM[ ... ];
  res_3 = BIT_FIELD_REF [reg_2, ...];

with an appropriately aligned / bigger size memory MEM.

As a first step I wanted to lower all BIT_FIELD_REFs that can be expressed
directly as memory access (byte-aligned and byte-size) to regular memory
accesses.

> By the way, if I understand the code correctly,
> get_addr_base_and_unit_offset can just as easily return an object or an
> address, when it is called on a MEM_REF. That may be an issue as well.

It should always return an object.

> Maybe I should just forget about this patch for now...

If it breaks all over the testsuite if generalized then yes (is it just
dump scans that fail or are you seeing "real" issues?)

Thanks,
Richard.

>
>> Did something break when you just remove the whole check above?
>
>
> Yes, it breaks all over the place in the testsuite.
>
>
>>> +         && TREE_INT_CST_LOW (arg1) % BITS_PER_UNIT == 0
>>> +         && TREE_INT_CST_LOW (arg2) % BITS_PER_UNIT == 0)
>>> +       {
>>> +         HOST_WIDE_INT coffset;
>>> +         tree base = get_addr_base_and_unit_offset (arg0, &coffset);
>>> +         if (base)
>>> +           return fold_build2 (MEM_REF, type, build_fold_addr_expr
>>> (base),
>>> +                               build_int_cst (build_pointer_type
>>> +                                                (TYPE_MAIN_VARIANT
>>> (type)),
>>> +                                              coffset + TREE_INT_CST_LOW
>>> (arg2)
>>> +                                                        /
>>> BITS_PER_UNIT));
>>> +       }
>>>        return NULL_TREE;
>>>
>>>      case FMA_EXPR:
>>>        /* For integers we can decompose the FMA if possible.  */
>>>        if (TREE_CODE (arg0) == INTEGER_CST
>>>           && TREE_CODE (arg1) == INTEGER_CST)
>>>         return fold_build2_loc (loc, PLUS_EXPR, type,
>>>                                 const_binop (MULT_EXPR, arg0, arg1),
>>> arg2);
>>>        if (integer_zerop (arg2))
>>>         return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
>
>
> --
> Marc Glisse
Marc Glisse - April 4, 2013, 8:53 a.m.
On Thu, 4 Apr 2013, Richard Biener wrote:

>>>> +      if ((handled_component_p (arg0) || TREE_CODE (arg0) == MEM_REF)
>>>
>>>
>>> This check means the optimization is not performed for
>>> BIT_FIELD_REF[a, *, CST] which I see no particularly good reason for.
>>
>>
>> Er, are you trying to get rid of all BIT_FIELD_REFs? Why would you want to
>> replace them with a MEM_REF? I actually think my patch already replaces too
>> many.
>
> Yes, when I filed the bug I was working on bitfield lowering and the only
> BIT_FIELD_REFs that would survive would be bitfield extracts from
> registers.

Can't a vector (not in memory) count as a register?

> Thus, BIT_FIELD_REFs on memory would be lowered as
>
>  reg_2 = MEM[ ... ];
>  res_3 = BIT_FIELD_REF [reg_2, ...];
>
> with an appropriately aligned / bigger size memory MEM.
>
> As a first step I wanted to lower all BIT_FIELD_REFs that can be expressed
> directly as memory access (byte-aligned and byte-size) to regular memory
> accesses.

But the transformation on BIT_FIELD_REF[A,...] will take the address of A 
even if A is not something that is ok with having its address taken.

>> By the way, if I understand the code correctly,
>> get_addr_base_and_unit_offset can just as easily return an object or an
>> address, when it is called on a MEM_REF. That may be an issue as well.
>
> It should always return an object.

I am probably missing something. Looking in tree-flow-inline.c, for 
MEM_REF[a,...]:

>        case MEM_REF:
>          {
>            tree base = TREE_OPERAND (exp, 0);
>            if (valueize
>                && TREE_CODE (base) == SSA_NAME)
>              base = (*valueize) (base);

valueize is 0.

>            /* Hand back the decl for MEM[&decl, off].  */
>            if (TREE_CODE (base) == ADDR_EXPR)

not the case here.

>              {
>                if (!integer_zerop (TREE_OPERAND (exp, 1)))
>                  {
>                    double_int off = mem_ref_offset (exp);
>                    gcc_assert (off.high == -1 || off.high == 0);
>                    byte_offset += off.to_shwi ();
>                  }
>                exp = TREE_OPERAND (base, 0);
>              }
>            goto done;

it returns a, which afaiu is an address.
For MEM_REF[&b] it does return b.

>> Maybe I should just forget about this patch for now...
>
> If it breaks all over the testsuite if generalized then yes (is it just
> dump scans that fail or are you seeing "real" issues?)

Verification failure for ADDR_EXPR: is_gimple_address (t)

unexpected expression 'v' of kind mem_ref in cxx_eval_constant_expression

The first one in particular affects almost all vector tests, so it might 
hide other issues.
Richard Guenther - April 4, 2013, 9:37 a.m.
On Thu, Apr 4, 2013 at 10:53 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Thu, 4 Apr 2013, Richard Biener wrote:
>
>>>>> +      if ((handled_component_p (arg0) || TREE_CODE (arg0) == MEM_REF)
>>>>
>>>>
>>>>
>>>> This check means the optimization is not performed for
>>>> BIT_FIELD_REF[a, *, CST] which I see no particularly good reason for.
>>>
>>>
>>>
>>> Er, are you trying to get rid of all BIT_FIELD_REFs? Why would you want
>>> to
>>> replace them with a MEM_REF? I actually think my patch already replaces
>>> too
>>> many.
>>
>>
>> Yes, when I filed the bug I was working on bitfield lowering and the only
>> BIT_FIELD_REFs that would survive would be bitfield extracts from
>> registers.
>
>
> Can't a vector (not in memory) count as a register?

Sure.  A vector not in memory is a register.

>
>> Thus, BIT_FIELD_REFs on memory would be lowered as
>>
>>  reg_2 = MEM[ ... ];
>>  res_3 = BIT_FIELD_REF [reg_2, ...];
>>
>> with an appropriately aligned / bigger size memory MEM.
>>
>> As a first step I wanted to lower all BIT_FIELD_REFs that can be expressed
>> directly as memory access (byte-aligned and byte-size) to regular memory
>> accesses.
>
>
> But the transformation on BIT_FIELD_REF[A,...] will take the address of A
> even if A is not something that is ok with having its address taken.

I'd like to see a case where this happens.

>>> By the way, if I understand the code correctly,
>>> get_addr_base_and_unit_offset can just as easily return an object or an
>>> address, when it is called on a MEM_REF. That may be an issue as well.
>>
>>
>> It should always return an object.
>
>
> I am probably missing something. Looking in tree-flow-inline.c, for
> MEM_REF[a,...]:
>
>>        case MEM_REF:
>>          {
>>            tree base = TREE_OPERAND (exp, 0);
>>            if (valueize
>>                && TREE_CODE (base) == SSA_NAME)
>>              base = (*valueize) (base);
>
>
> valueize is 0.
>
>>            /* Hand back the decl for MEM[&decl, off].  */
>>            if (TREE_CODE (base) == ADDR_EXPR)
>
>
> not the case here.
>
>>              {
>>                if (!integer_zerop (TREE_OPERAND (exp, 1)))
>>                  {
>>                    double_int off = mem_ref_offset (exp);
>>                    gcc_assert (off.high == -1 || off.high == 0);
>>                    byte_offset += off.to_shwi ();
>>                  }
>>                exp = TREE_OPERAND (base, 0);
>>              }
>>            goto done;
>
>
> it returns a, which afaiu is an address.
> For MEM_REF[&b] it does return b.

No, it returns 'exp' which is still MEM_REF[&b].

>
>>> Maybe I should just forget about this patch for now...
>>
>>
>> If it breaks all over the testsuite if generalized then yes (is it just
>> dump scans that fail or are you seeing "real" issues?)
>
>
> Verification failure for ADDR_EXPR: is_gimple_address (t)
>
> unexpected expression 'v' of kind mem_ref in cxx_eval_constant_expression
>
> The first one in particular affects almost all vector tests, so it might
> hide other issues.

Yeah, I definitely know that frontends may not expect MEM_REFs at all
(so folding to MEM_REF from fold-const.c is probably not the very best idea).
For that issue, simply do the transform from gimple_fold only.

Richard.

> --
> Marc Glisse
Marc Glisse - April 4, 2013, 10:11 a.m.
On Thu, 4 Apr 2013, Richard Biener wrote:

> On Thu, Apr 4, 2013 at 10:53 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> On Thu, 4 Apr 2013, Richard Biener wrote:
>>
>>>>>> +      if ((handled_component_p (arg0) || TREE_CODE (arg0) == MEM_REF)
>>>>>
>>>>>
>>>>>
>>>>> This check means the optimization is not performed for
>>>>> BIT_FIELD_REF[a, *, CST] which I see no particularly good reason for.
>>>>
>>>>
>>>>
>>>> Er, are you trying to get rid of all BIT_FIELD_REFs? Why would you want
>>>> to
>>>> replace them with a MEM_REF? I actually think my patch already replaces
>>>> too
>>>> many.
>>>
>>>
>>> Yes, when I filed the bug I was working on bitfield lowering and the only
>>> BIT_FIELD_REFs that would survive would be bitfield extracts from
>>> registers.
>>
>>
>> Can't a vector (not in memory) count as a register?
>
> Sure.  A vector not in memory is a register.

So we need to have some test whether something is "a register" and not 
create a MEM_REF in that case.

>>> Thus, BIT_FIELD_REFs on memory would be lowered as
>>>
>>>  reg_2 = MEM[ ... ];
>>>  res_3 = BIT_FIELD_REF [reg_2, ...];
>>>
>>> with an appropriately aligned / bigger size memory MEM.
>>>
>>> As a first step I wanted to lower all BIT_FIELD_REFs that can be expressed
>>> directly as memory access (byte-aligned and byte-size) to regular memory
>>> accesses.
>>
>>
>> But the transformation on BIT_FIELD_REF[A,...] will take the address of A
>> even if A is not something that is ok with having its address taken.
>
> I'd like to see a case where this happens.

In the vector lowering pass, op0 is a SSA_NAME:

typedef double vec __attribute__((vector_size(64)));
vec f(vec x){
   return x+x;
}

>> I am probably missing something. Looking in tree-flow-inline.c, for
>> MEM_REF[a,...]:
>>
>>>        case MEM_REF:
>>>          {
>>>            tree base = TREE_OPERAND (exp, 0);
>>>            if (valueize
>>>                && TREE_CODE (base) == SSA_NAME)
>>>              base = (*valueize) (base);
>>
>>
>> valueize is 0.
>>
>>>            /* Hand back the decl for MEM[&decl, off].  */
>>>            if (TREE_CODE (base) == ADDR_EXPR)
>>
>>
>> not the case here.
>>
>>>              {
>>>                if (!integer_zerop (TREE_OPERAND (exp, 1)))
>>>                  {
>>>                    double_int off = mem_ref_offset (exp);
>>>                    gcc_assert (off.high == -1 || off.high == 0);
>>>                    byte_offset += off.to_shwi ();
>>>                  }
>>>                exp = TREE_OPERAND (base, 0);
>>>              }
>>>            goto done;
>>
>>
>> it returns a, which afaiu is an address.
>> For MEM_REF[&b] it does return b.
>
> No, it returns 'exp' which is still MEM_REF[&b].

(Actually, for MEM_REF[a] it returns the input unchanged, and for 
MEM_REF[&b] it returns b)
Thanks! I had completely misread that.
Richard Guenther - April 4, 2013, 11:48 a.m.
On Thu, Apr 4, 2013 at 12:11 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Thu, 4 Apr 2013, Richard Biener wrote:
>
>> On Thu, Apr 4, 2013 at 10:53 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
>>>
>>> On Thu, 4 Apr 2013, Richard Biener wrote:
>>>
>>>>>>> +      if ((handled_component_p (arg0) || TREE_CODE (arg0) ==
>>>>>>> MEM_REF)
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> This check means the optimization is not performed for
>>>>>> BIT_FIELD_REF[a, *, CST] which I see no particularly good reason for.
>>>>>
>>>>>
>>>>>
>>>>>
>>>>> Er, are you trying to get rid of all BIT_FIELD_REFs? Why would you want
>>>>> to
>>>>> replace them with a MEM_REF? I actually think my patch already replaces
>>>>> too
>>>>> many.
>>>>
>>>>
>>>>
>>>> Yes, when I filed the bug I was working on bitfield lowering and the
>>>> only
>>>> BIT_FIELD_REFs that would survive would be bitfield extracts from
>>>> registers.
>>>
>>>
>>>
>>> Can't a vector (not in memory) count as a register?
>>
>>
>> Sure.  A vector not in memory is a register.
>
>
> So we need to have some test whether something is "a register" and not
> create a MEM_REF in that case.

Ah, of course.  Easy on the gimple side - just check is_gimple_reg ().

>
>>>> Thus, BIT_FIELD_REFs on memory would be lowered as
>>>>
>>>>  reg_2 = MEM[ ... ];
>>>>  res_3 = BIT_FIELD_REF [reg_2, ...];
>>>>
>>>> with an appropriately aligned / bigger size memory MEM.
>>>>
>>>> As a first step I wanted to lower all BIT_FIELD_REFs that can be
>>>> expressed
>>>> directly as memory access (byte-aligned and byte-size) to regular memory
>>>> accesses.
>>>
>>>
>>>
>>> But the transformation on BIT_FIELD_REF[A,...] will take the address of A
>>> even if A is not something that is ok with having its address taken.
>>
>>
>> I'd like to see a case where this happens.
>
>
> In the vector lowering pass, op0 is a SSA_NAME:
>
> typedef double vec __attribute__((vector_size(64)));
> vec f(vec x){
>   return x+x;
>
> }
>
>>> I am probably missing something. Looking in tree-flow-inline.c, for
>>> MEM_REF[a,...]:
>>>
>>>>        case MEM_REF:
>>>>          {
>>>>            tree base = TREE_OPERAND (exp, 0);
>>>>            if (valueize
>>>>                && TREE_CODE (base) == SSA_NAME)
>>>>              base = (*valueize) (base);
>>>
>>>
>>>
>>> valueize is 0.
>>>
>>>>            /* Hand back the decl for MEM[&decl, off].  */
>>>>            if (TREE_CODE (base) == ADDR_EXPR)
>>>
>>>
>>>
>>> not the case here.
>>>
>>>>              {
>>>>                if (!integer_zerop (TREE_OPERAND (exp, 1)))
>>>>                  {
>>>>                    double_int off = mem_ref_offset (exp);
>>>>                    gcc_assert (off.high == -1 || off.high == 0);
>>>>                    byte_offset += off.to_shwi ();
>>>>                  }
>>>>                exp = TREE_OPERAND (base, 0);
>>>>              }
>>>>            goto done;
>>>
>>>
>>>
>>> it returns a, which afaiu is an address.
>>> For MEM_REF[&b] it does return b.
>>
>>
>> No, it returns 'exp' which is still MEM_REF[&b].
>
>
> (Actually, for MEM_REF[a] it returns the input unchanged, and for
> MEM_REF[&b] it returns b)
> Thanks! I had completely misread that.
>
> --
> Marc Glisse

Patch

Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-33.c

===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-33.c	(revision 197411)

+++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-33.c	(working copy)

@@ -11,12 +11,12 @@  struct {

 float x;
 int main(int argc)
 {
   vector float res = (vector float){0.0f,0.0f,0.0f,1.0f};
   res += (vector float){1.0f,2.0f,3.0f,4.0f};
   s.global_res = res;
   x = *((float*)&s.global_res + 1);
   return 0;
 }
 
-/* { dg-final { scan-tree-dump "Replaced BIT_FIELD_REF.*with 2" "fre1" } } */

+/* { dg-final { scan-tree-dump "Replaced .* with 2" "fre1" } } */

 /* { dg-final { cleanup-tree-dump "fre1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-34.c

===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-34.c	(revision 197411)

+++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-34.c	(working copy)

@@ -8,12 +8,12 @@  struct {

     float i;
     vector float global_res;
 } s;
 float foo(float f)
 {
   vector float res = (vector float){0.0f,f,0.0f,1.0f};
   s.global_res = res;
   return *((float*)&s.global_res + 1);
 }
 
-/* { dg-final { scan-tree-dump "Replaced BIT_FIELD_REF.*with f" "fre1" } } */

+/* { dg-final { scan-tree-dump "Replaced .* with f" "fre1" } } */

 /* { dg-final { cleanup-tree-dump "fre1" } } */
Index: gcc/testsuite/gcc.dg/pr52436.c

===================================================================
--- gcc/testsuite/gcc.dg/pr52436.c	(revision 0)

+++ gcc/testsuite/gcc.dg/pr52436.c	(revision 0)

@@ -0,0 +1,18 @@ 

+/* { dg-do compile } */

+/* { dg-options "-O -fdump-tree-optimized" } */

+

+typedef long long __m128i __attribute__ ((vector_size (2 * sizeof (long long)),

+                                          may_alias));

+typedef struct

+{

+  __m128i b;

+} s_1a;

+typedef s_1a s_1m __attribute__((aligned(1)));

+void

+foo (s_1m *p)

+{

+  p->b[1] = 5;

+}

+

+/* { dg-final { scan-tree-dump-not "BIT_FIELD_EXPR" "optimized" } } */

+/* { dg-final { cleanup-tree-dump "optimized" } } */


Property changes on: gcc/testsuite/gcc.dg/pr52436.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: gcc/fold-const.c

===================================================================
--- gcc/fold-const.c	(revision 197411)

+++ gcc/fold-const.c	(working copy)

@@ -14293,20 +14293,34 @@  fold_ternary_loc (location_t loc, enum t

 		{
 		  tree v = native_interpret_expr (type,
 						  b + bitpos / BITS_PER_UNIT,
 						  bitsize / BITS_PER_UNIT);
 		  if (v)
 		    return v;
 		}
 	    }
 	}
 
+      /* BIT_FIELD_REF[a.b, *, CST] -> MEM[&a, offsetof (a, b) + CST / 8].  */

+      if ((handled_component_p (arg0) || TREE_CODE (arg0) == MEM_REF)

+	  && TREE_INT_CST_LOW (arg1) % BITS_PER_UNIT == 0

+	  && TREE_INT_CST_LOW (arg2) % BITS_PER_UNIT == 0)

+	{

+	  HOST_WIDE_INT coffset;

+	  tree base = get_addr_base_and_unit_offset (arg0, &coffset);

+	  if (base)

+	    return fold_build2 (MEM_REF, type, build_fold_addr_expr (base),

+				build_int_cst (build_pointer_type

+						 (TYPE_MAIN_VARIANT (type)),

+					       coffset + TREE_INT_CST_LOW (arg2)

+							 / BITS_PER_UNIT));

+	}

       return NULL_TREE;
 
     case FMA_EXPR:
       /* For integers we can decompose the FMA if possible.  */
       if (TREE_CODE (arg0) == INTEGER_CST
 	  && TREE_CODE (arg1) == INTEGER_CST)
 	return fold_build2_loc (loc, PLUS_EXPR, type,
 				const_binop (MULT_EXPR, arg0, arg1), arg2);
       if (integer_zerop (arg2))
 	return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);