diff mbox series

[ping] Re-unify 'omp_build_component_ref' and 'oacc_build_component_ref'

Message ID 87a6lhhkvp.fsf@euler.schwinge.homeip.net
State New
Headers show
Series [ping] Re-unify 'omp_build_component_ref' and 'oacc_build_component_ref' | expand

Commit Message

Thomas Schwinge Aug. 16, 2021, 8:08 a.m. UTC
Hi!

Ping.


On 2021-08-09T16:16:51+0200, I wrote:
> [from internal]
>
>
> Hi!
>
> This concerns a class of ICEs seen as of og10 branch with the
> "openacc: Middle-end worker-partitioning support" and "amdgcn:
> Enable OpenACC worker partitioning for AMD GCN" changes applied:
>
> On 2020-06-06T16:07:36+0100, Kwok Cheung Yeung <kwok_yeung@mentor.com> wrote:
>> On 01/06/2020 8:48 pm, Kwok Cheung Yeung wrote:
>>> On 21/05/2020 10:23 pm, Kwok Cheung Yeung wrote:
>>>> These all have the same failure mode:
>>>>
>>>> during RTL pass: expand
>>>> [...]/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90: In function 'MAIN__._omp_fn.1':
>>>> [...]/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90:86: internal compiler error: in convert_memory_address_addr_space_1, at explow.c:302
>>>> 0xc29f20 convert_memory_address_addr_space_1(scalar_int_mode, rtx_def*, unsigned char, bool, bool)
>>>>          [...]/gcc/explow.c:302
>>>> 0xc29f57 convert_memory_address_addr_space(scalar_int_mode, rtx_def*, unsigned char)
>>>>          [...]/gcc/explow.c:404
>>>> [...]
>
>>>> This occurs if the -ftree-slp-vectorize flag is specified (default at -O3).
>
>>> The problematic bit of Gimple code is this:
>>>
>>>    .oacc_worker_o.44._120 = gangs_min_472;
>>>    .oacc_worker_o.44._122 = workers_min_473;
>>>    .oacc_worker_o.44._124 = vectors_min_474;
>>>    .oacc_worker_o.44._126 = gangs_max_475;
>>>    .oacc_worker_o.44._128 = workers_max_476;
>>>    .oacc_worker_o.44._130 = vectors_max_477;
>>>    .oacc_worker_o.44._132 = 0;
>>>
>>> With SLP vectorization enabled, it becomes this:
>>>
>>>    _40 = {gangs_min_472, workers_min_473, vectors_min_474, gangs_max_475};
>>>    ...
>>>    MEM <vector(4) int> [(int *)&.oacc_worker_o.44] = _40;
>>>    .oacc_worker_o.44._128 = workers_max_476;
>>>    .oacc_worker_o.44._130 = vectors_max_477;
>>>    .oacc_worker_o.44._132 = 0;
>>>
>>> The optimization is trying to transform 4 separate assignments into a single
>>> memory operation. The trouble is that &o.acc_worker_o is an SImode pointer in
>>> AS4 (LDS), while the memory expression appears to be in the default memory
>>> space. The 'to' expression of the assignment is:
>>>
>>>   <mem_ref 0x7ffff74c61e0
>>>      type <vector_type 0x7ffff7470498
>>>          type <integer_type 0x7ffff73195e8 int public SI
>>>              size <integer_cst 0x7ffff7318bb8 constant 32>
>>>              unit-size <integer_cst 0x7ffff7318bd0 constant 4>
>>>              align:32 warn_if_not_align:0 symtab:0 alias-set 1 canonical-type 0x7ffff73195e8 precision:32 min <integer_cst 0x7ffff7318b70 -2147483648> max <integer_cst 0x7ffff7318b88 2147483647>
>>>              pointer_to_this <pointer_type 0x7ffff73209d8> reference_to_this <reference_type 0x7ffff73d9d20>>
>>>          TI
>>>          size <integer_cst 0x7ffff7318ca8 constant 128>
>>>          unit-size <integer_cst 0x7ffff7318cc0 constant 16>
>>>          align:128 warn_if_not_align:0 symtab:0 alias-set 1 structural-equality nunits:4
>>>          pointer_to_this <pointer_type 0x7ffff7470540>>
>>>
>>>      arg:0 <addr_expr 0x7ffff74cdb80
>>>          type <pointer_type 0x7ffff73209d8 type <integer_type 0x7ffff73195e8 int>
>>>              public unsigned DI
>>>              size <integer_cst 0x7ffff7318978 constant 64>
>>>              unit-size <integer_cst 0x7ffff7318990 constant 8>
>>>              align:64 warn_if_not_align:0 symtab:0 alias-set 2 structural-equality>
>>>          constant
>>>          arg:0 <var_decl 0x7ffff7477f30 .oacc_worker_o.44 type <record_type 0x7ffff73eb888 .oacc_ws_data_s.21 address-space-4>
>>>              addressable used static ignored BLK [...]/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90:86:0
>>>
>>>              size <integer_cst 0x7ffff746ce70 constant 224>
>>>              unit-size <integer_cst 0x7ffff746ce40 constant 28>
>>>              align:128 warn_if_not_align:0
>>>              (mem/c:BLK (symbol_ref:SI (".oacc_worker_o.44.14") [flags 0x2] <var_decl 0x7ffff7477f30 .oacc_worker_o.44>) [9 .oacc_worker_o.44+0 S28 A128 AS4])>>
>>>      arg:1 <integer_cst 0x7ffff73ff078 type <pointer_type 0x7ffff73209d8> constant 0>>
>>>
>>> In convert_memory_address_addr_space_1:
>>>
>>> #ifndef POINTERS_EXTEND_UNSIGNED
>>>    gcc_assert (GET_MODE (x) == to_mode || GET_MODE (x) == VOIDmode);
>>>    return x;
>>> #else /* defined(POINTERS_EXTEND_UNSIGNED) */
>>>
>>> POINTERS_EXTEND_UNSIGNED is not defined, so it hits the assert. The expected
>>> to_mode is DI_mode, but x is SI_mode, so the assert fires.
>
>> I now have a fix for this.
>>
>>  >    MEM <vector(4) int> [(int *)&.oacc_worker_o.44] = _40;
>>
>> The ICE occurs because the SLP vectorization pass creates the new statement
>> using the type of the expression '&.oacc_worker_o.44', which is a pointer to a
>> component ref in the default address space. The expand pass gets confused
>> because it is handed an SImode pointer (for LDS) when it is expecting a DImode
>> pointer (for flat/global space).
>>
>> The underlying problem is that although .oacc_worker_o is in the correct address
>> space, the component ref .oacc_worker_o is not. I fixed this by propagating the
>> address space of .oacc_worker_o when the component ref is created.
>
>>  static tree
>>  oacc_build_component_ref (tree obj, tree field)
>>  {
>> -  tree ret = build3 (COMPONENT_REF, TREE_TYPE (field), obj, field, NULL);
>> +  tree field_type = TREE_TYPE (field);
>> +  tree obj_type = TREE_TYPE (obj);
>> +  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
>> +    field_type = build_qualified_type
>> +                     (field_type,
>> +                      KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));
>> +
>> +  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
>>    if (TREE_THIS_VOLATILE (field))
>>      TREE_THIS_VOLATILE (ret) |= 1;
>>    if (TREE_READONLY (field))
>
> This code change has been included in the recent master branch commit
> e2a58ed6dc5293602d0d168475109caa81ad0f0d "openacc: Middle-end
> worker-partitioning support", which thus includes a
> 'gcc/omp-oacc-neuter-broadcast.cc:oacc_build_component_ref' that is
> slightly different from 'gcc/omp-low.c:omp_build_component_ref'.
>
> I'm confirming that with this reverted, we're seeing ICEs as follows:
>
>     +FAIL: libgomp.oacc-fortran/gemm-2.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/gemm-2.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/gemm.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/gemm.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/optional-reduction.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/optional-reduction.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/private-variables.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/private-variables.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/reduction-1.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/reduction-1.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/reduction-5.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/reduction-5.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/reduction-6.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/reduction-6.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
> Concerning the current 'gcc/omp-low.c:omp_build_component_ref', for the
> current set of offloading testcases, we never see a
> '!ADDR_SPACE_GENERIC_P' there, so the address space handling doesn't seem
> to be necessary there (but also won't do any harm: no-op).
>
> Would it make sense to "Re-unify 'omp_build_component_ref' and
> 'oacc_build_component_ref'", see attached?
>
>
> Grüße
>  Thomas


-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955

Comments

Jakub Jelinek Aug. 16, 2021, 8:21 a.m. UTC | #1
On Mon, Aug 16, 2021 at 10:08:42AM +0200, Thomas Schwinge wrote:
> --- a/gcc/omp-general.c
> +++ b/gcc/omp-general.c
> @@ -2815,4 +2815,25 @@ oacc_get_ifn_dim_arg (const gimple *stmt)
>    return (int) axis;
>  }
>  
> +/* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
> +   as appropriate.  */
> +
> +tree
> +omp_build_component_ref (tree obj, tree field)
> +{
> +  tree field_type = TREE_TYPE (field);
> +  tree obj_type = TREE_TYPE (obj);
> +  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
> +    field_type
> +      = build_qualified_type (field_type,
> +			      KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));

Are you sure this can't trigger?
Say
extern int __seg_fs a;

void
foo (void)
{
  #pragma omp parallel private (a)
  a = 2;
}
I think keeping the qual addr space here is the wrong thing to do,
it should keep the other quals and clear the address space instead,
the whole struct is going to be in generic addres space, isn't it?

> +
> +  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
> +  if (TREE_THIS_VOLATILE (field))
> +    TREE_THIS_VOLATILE (ret) |= 1;
> +  if (TREE_READONLY (field))
> +    TREE_READONLY (ret) |= 1;

When touching these two, shouldn't it be better written as
= 1; instead of |= 1; ?  For a bitfield...

	Jakub
Thomas Schwinge Aug. 19, 2021, 8:13 p.m. UTC | #2
Hi!

Richard, maybe you have an opinion here, in particular about my
"SLP vectorizer" comment below?  Please see
<http://mid.mail-archive.com/87r1f2puss.fsf@euler.schwinge.homeip.net>
for the full context.

On 2021-08-16T10:21:04+0200, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Aug 16, 2021 at 10:08:42AM +0200, Thomas Schwinge wrote:
>>  /* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
>>     as appropriate.  */
>>
>>  tree
>>  omp_build_component_ref (tree obj, tree field)
>>  {
>> +  tree field_type = TREE_TYPE (field);
>> +  tree obj_type = TREE_TYPE (obj);
>> +  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
>> +    field_type
>> +      = build_qualified_type (field_type,
>> +                          KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));

(For later reference: "Kwok's new code" here is to propagate to
'field_type' any non-generic address space of 'obj_type'.)

|> Concerning the current 'gcc/omp-low.c:omp_build_component_ref', for the
|> current set of offloading testcases, we never see a
|> '!ADDR_SPACE_GENERIC_P' there, so the address space handling doesn't seem
|> to be necessary there (but also won't do any harm: no-op).
>
> Are you sure this can't trigger?
> Say
> extern int __seg_fs a;
>
> void
> foo (void)
> {
>   #pragma omp parallel private (a)
>   a = 2;
> }

That test case doesn't run into 'omp_build_component_ref' at all,
but I'm attaching an altered and extended variant that does,
"Add 'libgomp.c/address-space-1.c'".  OK to push to master branch?

In this case, 'omp_build_component_ref' called via host compilation
'pass_lower_omp', it's the 'field_type' that has 'address-space-1', not
'obj_type', so indeed Kwok's new code is a no-op:

    (gdb) call debug_tree(field_type)
     <pointer_type 0x7ffff7686b28
        type <integer_type 0x7ffff7686498 int address-space-1 SI
            size <integer_cst 0x7ffff7540f30 constant 32>
            unit-size <integer_cst 0x7ffff7540f48 constant 4>
            align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686498 precision:32 min <integer_cst 0x7ffff7540ee8 -2147483648> max <integer_cst 0x7ffff7540f00 2147483647>
            pointer_to_this <pointer_type 0x7ffff7686b28>>
        unsigned DI
        size <integer_cst 0x7ffff7540cf0 type <integer_type 0x7ffff75590a8 bitsizetype> constant 64>
        unit-size <integer_cst 0x7ffff7540d08 type <integer_type 0x7ffff7559000 sizetype> constant 8>
        align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686b28>

    (gdb) call debug_tree(obj_type)
     <record_type 0x7ffff7686bd0 .omp_data_t.0 readonly DI
        size <integer_cst 0x7ffff7540cf0 type <integer_type 0x7ffff75590a8 bitsizetype> constant 64>
        unit-size <integer_cst 0x7ffff7540d08 type <integer_type 0x7ffff7559000 sizetype> constant 8>
        align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686bd0
        fields <field_decl 0x7ffff7568428 a
            type <pointer_type 0x7ffff7686b28 type <integer_type 0x7ffff7686498 int address-space-1>
                unsigned DI size <integer_cst 0x7ffff7540cf0 64> unit-size <integer_cst 0x7ffff7540d08 8>
                align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686b28>
            unsigned DI /home/thomas/shared/gcc/omp/as.c:4:14 size <integer_cst 0x7ffff7540cf0 64> unit-size <integer_cst 0x7ffff7540d08 8>
            align:64 warn_if_not_align:0 offset_align 128
            offset <integer_cst 0x7ffff7540d20 constant 0>
            bit-offset <integer_cst 0x7ffff7540d68 constant 0> context <record_type 0x7ffff7686540 .omp_data_t.0>> reference_to_this <reference_type 0x7ffff7686c78>>

The case that Kwok's new code handles, however, is when 'obj_type' has a
non-generic address space, and then propagates that one to 'field_type'.

For a similar OpenACC example, 'omp_build_component_ref' called via GCN
offloading compilation 'pass_omp_oacc_neuter_broadcast', we've got
without Kwok's new code:

    (gdb) call debug_tree(field_type)
     <boolean_type 0x7ffff7550b28 bool public unsigned QI
        size <integer_cst 0x7ffff754fa80 type <integer_type 0x7ffff75500a8 bitsizetype> constant 8>
        unit-size <integer_cst 0x7ffff754fa98 type <integer_type 0x7ffff7550000 sizetype> constant 1>
        align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7550b28 precision:1 min <integer_cst 0x7ffff754fcd8 0> max <integer_cst 0x7ffff754fd08 1>>

    (gdb) call debug_tree(obj_type)
     <record_type 0x7ffff7631000 .oacc_ws_data_s.0 address-space-4 QI
        size <integer_cst 0x7ffff754fa80 type <integer_type 0x7ffff75500a8 bitsizetype> constant 8>
        unit-size <integer_cst 0x7ffff754fa98 type <integer_type 0x7ffff7550000 sizetype> constant 1>
        align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7631000
        fields <field_decl 0x7ffff762e260 _52
            type <boolean_type 0x7ffff7550b28 bool public unsigned QI size <integer_cst 0x7ffff754fa80 8> unit-size <integer_cst 0x7ffff754fa98 1>
                align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7550b28 precision:1 min <integer_cst 0x7ffff754fcd8 0> max <integer_cst 0x7ffff754fd08 1>>
            unsigned QI <built-in>:0:0 size <integer_cst 0x7ffff754fa80 8> unit-size <integer_cst 0x7ffff754fa98 1>
            align:8 warn_if_not_align:0 offset_align 64
            offset <integer_cst 0x7ffff754f9c0 constant 0>
            bit-offset <integer_cst 0x7ffff754fa08 constant 0> context <record_type 0x7ffff7631000 .oacc_ws_data_s.0 address-space-4>>
        pointer_to_this <pointer_type 0x7ffff7631498>>

..., and with Kwok's new code the 'address-space-4' of 'obj_type' is
propagated to 'field_type':

    (gdb) call debug_tree(field_type)
     <boolean_type 0x7ffff7631540 bool address-space-4 unsigned QI
        size <integer_cst 0x7ffff754fa80 type <integer_type 0x7ffff75500a8 bitsizetype> constant 8>
        unit-size <integer_cst 0x7ffff754fa98 type <integer_type 0x7ffff7550000 sizetype> constant 1>
        align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7631540 precision:1 min <integer_cst 0x7ffff754fcd8 0> max <integer_cst 0x7ffff754fd08 1>>

I'm not familiar enough with these bits to tell whether Kwok's new code
is the right solution to this problem -- or if, for example, the problem
is rather in the SLP vectorizer, where the ICE seems to ultimately
emerge?

Without (ICEs later) vs. with (works) Kwok's new code, we see the
'a.xamdgcn-amdhsa.mkoffload.175t.slp1' dump change as follows (word-diff,
only additional '<address-space-4>', occasionally):

    [...]
      {+<address-space-4>+} vector(2) long int * vectp.58;
      {+<address-space-4>+} vector(2) long int * vectp_.oacc_worker_o.57;
      {+<address-space-4>+} vector(2) int * vectp.56;
      {+<address-space-4>+} vector(2) int * vectp_.oacc_worker_o.55;
    [...]
      {+<address-space-4>+} long int * _104;
    [...]
      {+<address-space-4>+} long int * _108;
    [...]
      <address-space-4> void * _350;
    [...]
      _350 = __builtin_gcn_single_copy_start (&.oacc_worker_o.6);
    [...]
      MEM <{+<address-space-4>+} vector(2) long int> [(long int *)&.oacc_worker_o.6] = _101;
      _108 = &.oacc_worker_o.6._22 + 16;
      MEM <{+<address-space-4>+} vector(2) long int> [(long int *)_108] = _100;
      _104 = &.oacc_worker_o.6._22 + 32;
    [...]

For example, with Kwok's new code, '_108' ('<address-space-4> long int *')
is cast into '(long int *)' -- presumably synthesized in the SLP
vectorizer?  Is that correct or shouldn't that cast also include
'<address-space-4>'?

I see a similar issue has been fixed a while ago: r245772 (Git commit
c7d97b2846c5647a81548caa3264d77c0a595010) for PR79723
"Another case of dropped gs: prefix", changing
'gcc/tree-vect-stmts.c:get_vectype_for_scalar_type_and_size' as follows:

    +  /* Re-attach the address-space qualifier if we canonicalized the scalar
    +     type.  */
    +  if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
    +    return build_qualified_type
    +            (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
    +
       return vectype;

(It looks a bit like the address space handling is quite fragile in GCC's
'tree' types/interfaces?  Do we have ideas about how to make that more
robust, less "bolt-on"?)

I did add a few 'assert's for non-generic address space to
'gcc/tree-vect*', but have not yet located where things may be going
wrong.


> I think keeping the qual addr space here is the wrong thing to do,
> it should keep the other quals and clear the address space instead,
> the whole struct is going to be in generic addres space, isn't it?

Correct for 'omp_build_component_ref' called via host compilation
'pass_lower_omp', but in the case of 'omp_build_component_ref' called via
GCN offloading compilation 'pass_omp_oacc_neuter_broadcast', 'obj_type'
has a non-generic address space.

However, regarding the former comment -- shouldn't we force generic
address space for all 'tree' types read in via LTO streaming for
offloading compilation?  I assume that (in the general case) address
spaces are never compatible between host and offloading compilation?
For the attached "Add 'libgomp.c/address-space-1.c'", propagating the
'__seg_fs' address space across the offloading boundary (assuming I did
interpret the dumps correctly) doesn't seem to cause any problems, but
maybe it's problematic for other cases?  (This is, however, a separate
issue from what I'm discussing here.)


>> +  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
>> +  if (TREE_THIS_VOLATILE (field))
>> +    TREE_THIS_VOLATILE (ret) |= 1;
>> +  if (TREE_READONLY (field))
>> +    TREE_READONLY (ret) |= 1;
>
> When touching these two, shouldn't it be better written as
> = 1; instead of |= 1; ?  For a bitfield...

Yes, that was just copied from the original
'gcc/omp-general.c:omp_build_component_ref' -- but happy to simplify
that, of course.


Grüße
 Thomas


-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Richard Biener Aug. 20, 2021, 7:51 a.m. UTC | #3
On Thu, Aug 19, 2021 at 10:14 PM Thomas Schwinge
<thomas@codesourcery.com> wrote:
>
> Hi!
>
> Richard, maybe you have an opinion here, in particular about my
> "SLP vectorizer" comment below?  Please see
> <http://mid.mail-archive.com/87r1f2puss.fsf@euler.schwinge.homeip.net>
> for the full context.
>
> On 2021-08-16T10:21:04+0200, Jakub Jelinek <jakub@redhat.com> wrote:
> > On Mon, Aug 16, 2021 at 10:08:42AM +0200, Thomas Schwinge wrote:
> >>  /* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
> >>     as appropriate.  */
> >>
> >>  tree
> >>  omp_build_component_ref (tree obj, tree field)
> >>  {
> >> +  tree field_type = TREE_TYPE (field);
> >> +  tree obj_type = TREE_TYPE (obj);
> >> +  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
> >> +    field_type
> >> +      = build_qualified_type (field_type,
> >> +                          KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));
>
> (For later reference: "Kwok's new code" here is to propagate to
> 'field_type' any non-generic address space of 'obj_type'.)
>
> |> Concerning the current 'gcc/omp-low.c:omp_build_component_ref', for the
> |> current set of offloading testcases, we never see a
> |> '!ADDR_SPACE_GENERIC_P' there, so the address space handling doesn't seem
> |> to be necessary there (but also won't do any harm: no-op).
> >
> > Are you sure this can't trigger?
> > Say
> > extern int __seg_fs a;
> >
> > void
> > foo (void)
> > {
> >   #pragma omp parallel private (a)
> >   a = 2;
> > }
>
> That test case doesn't run into 'omp_build_component_ref' at all,
> but I'm attaching an altered and extended variant that does,
> "Add 'libgomp.c/address-space-1.c'".  OK to push to master branch?
>
> In this case, 'omp_build_component_ref' called via host compilation
> 'pass_lower_omp', it's the 'field_type' that has 'address-space-1', not
> 'obj_type', so indeed Kwok's new code is a no-op:
>
>     (gdb) call debug_tree(field_type)
>      <pointer_type 0x7ffff7686b28
>         type <integer_type 0x7ffff7686498 int address-space-1 SI
>             size <integer_cst 0x7ffff7540f30 constant 32>
>             unit-size <integer_cst 0x7ffff7540f48 constant 4>
>             align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686498 precision:32 min <integer_cst 0x7ffff7540ee8 -2147483648> max <integer_cst 0x7ffff7540f00 2147483647>
>             pointer_to_this <pointer_type 0x7ffff7686b28>>
>         unsigned DI
>         size <integer_cst 0x7ffff7540cf0 type <integer_type 0x7ffff75590a8 bitsizetype> constant 64>
>         unit-size <integer_cst 0x7ffff7540d08 type <integer_type 0x7ffff7559000 sizetype> constant 8>
>         align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686b28>
>
>     (gdb) call debug_tree(obj_type)
>      <record_type 0x7ffff7686bd0 .omp_data_t.0 readonly DI
>         size <integer_cst 0x7ffff7540cf0 type <integer_type 0x7ffff75590a8 bitsizetype> constant 64>
>         unit-size <integer_cst 0x7ffff7540d08 type <integer_type 0x7ffff7559000 sizetype> constant 8>
>         align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686bd0
>         fields <field_decl 0x7ffff7568428 a
>             type <pointer_type 0x7ffff7686b28 type <integer_type 0x7ffff7686498 int address-space-1>
>                 unsigned DI size <integer_cst 0x7ffff7540cf0 64> unit-size <integer_cst 0x7ffff7540d08 8>
>                 align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686b28>
>             unsigned DI /home/thomas/shared/gcc/omp/as.c:4:14 size <integer_cst 0x7ffff7540cf0 64> unit-size <integer_cst 0x7ffff7540d08 8>
>             align:64 warn_if_not_align:0 offset_align 128
>             offset <integer_cst 0x7ffff7540d20 constant 0>
>             bit-offset <integer_cst 0x7ffff7540d68 constant 0> context <record_type 0x7ffff7686540 .omp_data_t.0>> reference_to_this <reference_type 0x7ffff7686c78>>
>
> The case that Kwok's new code handles, however, is when 'obj_type' has a
> non-generic address space, and then propagates that one to 'field_type'.
>
> For a similar OpenACC example, 'omp_build_component_ref' called via GCN
> offloading compilation 'pass_omp_oacc_neuter_broadcast', we've got
> without Kwok's new code:
>
>     (gdb) call debug_tree(field_type)
>      <boolean_type 0x7ffff7550b28 bool public unsigned QI
>         size <integer_cst 0x7ffff754fa80 type <integer_type 0x7ffff75500a8 bitsizetype> constant 8>
>         unit-size <integer_cst 0x7ffff754fa98 type <integer_type 0x7ffff7550000 sizetype> constant 1>
>         align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7550b28 precision:1 min <integer_cst 0x7ffff754fcd8 0> max <integer_cst 0x7ffff754fd08 1>>
>
>     (gdb) call debug_tree(obj_type)
>      <record_type 0x7ffff7631000 .oacc_ws_data_s.0 address-space-4 QI
>         size <integer_cst 0x7ffff754fa80 type <integer_type 0x7ffff75500a8 bitsizetype> constant 8>
>         unit-size <integer_cst 0x7ffff754fa98 type <integer_type 0x7ffff7550000 sizetype> constant 1>
>         align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7631000
>         fields <field_decl 0x7ffff762e260 _52
>             type <boolean_type 0x7ffff7550b28 bool public unsigned QI size <integer_cst 0x7ffff754fa80 8> unit-size <integer_cst 0x7ffff754fa98 1>
>                 align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7550b28 precision:1 min <integer_cst 0x7ffff754fcd8 0> max <integer_cst 0x7ffff754fd08 1>>
>             unsigned QI <built-in>:0:0 size <integer_cst 0x7ffff754fa80 8> unit-size <integer_cst 0x7ffff754fa98 1>
>             align:8 warn_if_not_align:0 offset_align 64
>             offset <integer_cst 0x7ffff754f9c0 constant 0>
>             bit-offset <integer_cst 0x7ffff754fa08 constant 0> context <record_type 0x7ffff7631000 .oacc_ws_data_s.0 address-space-4>>
>         pointer_to_this <pointer_type 0x7ffff7631498>>
>
> ..., and with Kwok's new code the 'address-space-4' of 'obj_type' is
> propagated to 'field_type':
>
>     (gdb) call debug_tree(field_type)
>      <boolean_type 0x7ffff7631540 bool address-space-4 unsigned QI
>         size <integer_cst 0x7ffff754fa80 type <integer_type 0x7ffff75500a8 bitsizetype> constant 8>
>         unit-size <integer_cst 0x7ffff754fa98 type <integer_type 0x7ffff7550000 sizetype> constant 1>
>         align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7631540 precision:1 min <integer_cst 0x7ffff754fcd8 0> max <integer_cst 0x7ffff754fd08 1>>
>
> I'm not familiar enough with these bits to tell whether Kwok's new code
> is the right solution to this problem -- or if, for example, the problem
> is rather in the SLP vectorizer, where the ICE seems to ultimately
> emerge?
>
> Without (ICEs later) vs. with (works) Kwok's new code, we see the
> 'a.xamdgcn-amdhsa.mkoffload.175t.slp1' dump change as follows (word-diff,
> only additional '<address-space-4>', occasionally):
>
>     [...]
>       {+<address-space-4>+} vector(2) long int * vectp.58;
>       {+<address-space-4>+} vector(2) long int * vectp_.oacc_worker_o.57;
>       {+<address-space-4>+} vector(2) int * vectp.56;
>       {+<address-space-4>+} vector(2) int * vectp_.oacc_worker_o.55;
>     [...]
>       {+<address-space-4>+} long int * _104;
>     [...]
>       {+<address-space-4>+} long int * _108;
>     [...]
>       <address-space-4> void * _350;
>     [...]
>       _350 = __builtin_gcn_single_copy_start (&.oacc_worker_o.6);
>     [...]
>       MEM <{+<address-space-4>+} vector(2) long int> [(long int *)&.oacc_worker_o.6] = _101;
>       _108 = &.oacc_worker_o.6._22 + 16;
>       MEM <{+<address-space-4>+} vector(2) long int> [(long int *)_108] = _100;
>       _104 = &.oacc_worker_o.6._22 + 32;
>     [...]
>
> For example, with Kwok's new code, '_108' ('<address-space-4> long int *')
> is cast into '(long int *)' -- presumably synthesized in the SLP
> vectorizer?  Is that correct or shouldn't that cast also include
> '<address-space-4>'?
>
> I see a similar issue has been fixed a while ago: r245772 (Git commit
> c7d97b2846c5647a81548caa3264d77c0a595010) for PR79723
> "Another case of dropped gs: prefix", changing
> 'gcc/tree-vect-stmts.c:get_vectype_for_scalar_type_and_size' as follows:
>
>     +  /* Re-attach the address-space qualifier if we canonicalized the scalar
>     +     type.  */
>     +  if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
>     +    return build_qualified_type
>     +            (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
>     +
>        return vectype;
>
> (It looks a bit like the address space handling is quite fragile in GCC's
> 'tree' types/interfaces?  Do we have ideas about how to make that more
> robust, less "bolt-on"?)

If in doubt always look at what RTL expansion does - it looks like
set_mem_attributes expects the address-space qualifier to be
present on the type or in case it is passed an object, on the
type of the base, or in case of a dereference, on the pointed-to
type of the pointer (and yes, that does look somewhat fragile).

So it looks like the patch you refer to shouldn't fix anything and

>     +  /* Re-attach the address-space qualifier if we canonicalized the scalar
>     +     type.  */
>     +  if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
>     +    return build_qualified_type
>     +            (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));

looks incomplete.  What you'd need to look for is MEM_REFs built
by the vectorizer and the address-space information on the pointers,
like generated from vect_create_data_ref_ptr.  It might also be that
data-ref analysis / SCEV looks through address-space qualifier changing
casts and thus we pick up the wrong address-space in the end.

What's the testcase that ICEs on trunk?

> I did add a few 'assert's for non-generic address space to
> 'gcc/tree-vect*', but have not yet located where things may be going
> wrong.
>
>
> > I think keeping the qual addr space here is the wrong thing to do,
> > it should keep the other quals and clear the address space instead,
> > the whole struct is going to be in generic addres space, isn't it?
>
> Correct for 'omp_build_component_ref' called via host compilation
> 'pass_lower_omp', but in the case of 'omp_build_component_ref' called via
> GCN offloading compilation 'pass_omp_oacc_neuter_broadcast', 'obj_type'
> has a non-generic address space.
>
> However, regarding the former comment -- shouldn't we force generic
> address space for all 'tree' types read in via LTO streaming for
> offloading compilation?  I assume that (in the general case) address
> spaces are never compatible between host and offloading compilation?
> For the attached "Add 'libgomp.c/address-space-1.c'", propagating the
> '__seg_fs' address space across the offloading boundary (assuming I did
> interpret the dumps correctly) doesn't seem to cause any problems, but
> maybe it's problematic for other cases?  (This is, however, a separate
> issue from what I'm discussing here.)
>
>
> >> +  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
> >> +  if (TREE_THIS_VOLATILE (field))
> >> +    TREE_THIS_VOLATILE (ret) |= 1;
> >> +  if (TREE_READONLY (field))
> >> +    TREE_READONLY (ret) |= 1;
> >
> > When touching these two, shouldn't it be better written as
> > = 1; instead of |= 1; ?  For a bitfield...
>
> Yes, that was just copied from the original
> 'gcc/omp-general.c:omp_build_component_ref' -- but happy to simplify
> that, of course.
>
>
> Grüße
>  Thomas
>
>
> -----------------
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Jakub Jelinek Aug. 20, 2021, 2:49 p.m. UTC | #4
On Thu, Aug 19, 2021 at 10:13:56PM +0200, Thomas Schwinge wrote:
> 	libgomp/
> 	* testsuite/libgomp.c/address-space-1.c: New file.
> 
> Co-authored-by: Jakub Jelinek <jakub@redhat.com>
> ---
>  libgomp/testsuite/libgomp.c/address-space-1.c | 24 +++++++++++++++++++
>  1 file changed, 24 insertions(+)
>  create mode 100644 libgomp/testsuite/libgomp.c/address-space-1.c
> 
> diff --git a/libgomp/testsuite/libgomp.c/address-space-1.c b/libgomp/testsuite/libgomp.c/address-space-1.c
> new file mode 100644
> index 00000000000..90244db03b1
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c/address-space-1.c
> @@ -0,0 +1,24 @@
> +/* Verify OMP instances of variables with address space.  */
> +
> +/* { dg-do run { target i?86-*-* x86_64-*-* } } */
> +/* { dg-require-effective-target offload_device_nonshared_as } */
> +
> +#include <assert.h>
> +
> +int __seg_fs a;
> +
> +int
> +main (void)
> +{
> +  // a = 123; // SIGSEGV
> +  int b;
> +#pragma omp target map(alloc: a) map(from: b)
> +  {
> +    a = 321; // no SIGSEGV (given 'offload_device_nonshared_as')
> +    asm volatile ("" : : : "memory");

Maybe better asm volatile ("" : : "g" (&a) : "memory");
so that the compiler doesn't think it could optimize it away to
just b = 321;
Ok with that change.

> +    b = a;
> +  }
> +  assert (b == 321);
> +
> +  return 0;
> +}
> -- 
> 2.30.2
> 


	Jakub
Thomas Schwinge Aug. 23, 2021, 2:30 p.m. UTC | #5
Hi!

On 2021-08-20T09:51:36+0200, Richard Biener <richard.guenther@gmail.com> wrote:
> On Thu, Aug 19, 2021 at 10:14 PM Thomas Schwinge
> <thomas@codesourcery.com> wrote:
>> Richard, maybe you have an opinion here, in particular about my
>> "SLP vectorizer" comment below?  Please see
>> <http://mid.mail-archive.com/87r1f2puss.fsf@euler.schwinge.homeip.net>
>> for the full context.
>>
>> On 2021-08-16T10:21:04+0200, Jakub Jelinek <jakub@redhat.com> wrote:
>> > On Mon, Aug 16, 2021 at 10:08:42AM +0200, Thomas Schwinge wrote:
>> >>  /* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
>> >>     as appropriate.  */
>> >>
>> >>  tree
>> >>  omp_build_component_ref (tree obj, tree field)
>> >>  {
>> >> +  tree field_type = TREE_TYPE (field);
>> >> +  tree obj_type = TREE_TYPE (obj);
>> >> +  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
>> >> +    field_type
>> >> +      = build_qualified_type (field_type,
>> >> +                          KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));
>>
>> (For later reference: "Kwok's new code" here is to propagate to
>> 'field_type' any non-generic address space of 'obj_type'.)
>>
>> |> Concerning the current 'gcc/omp-low.c:omp_build_component_ref', for the
>> |> current set of offloading testcases, we never see a
>> |> '!ADDR_SPACE_GENERIC_P' there, so the address space handling doesn't seem
>> |> to be necessary there (but also won't do any harm: no-op).
>> >
>> > Are you sure this can't trigger?
>> > Say
>> > extern int __seg_fs a;
>> >
>> > void
>> > foo (void)
>> > {
>> >   #pragma omp parallel private (a)
>> >   a = 2;
>> > }
>>
>> That test case doesn't run into 'omp_build_component_ref' at all,
>> but I'm attaching an altered and extended variant that does,
>> "Add 'libgomp.c/address-space-1.c'".  OK to push to master branch?
>>
>> In this case, 'omp_build_component_ref' called via host compilation
>> 'pass_lower_omp', it's the 'field_type' that has 'address-space-1', not
>> 'obj_type', so indeed Kwok's new code is a no-op:
>>
>>     (gdb) call debug_tree(field_type)
>>      <pointer_type 0x7ffff7686b28
>>         type <integer_type 0x7ffff7686498 int address-space-1 SI
>>             size <integer_cst 0x7ffff7540f30 constant 32>
>>             unit-size <integer_cst 0x7ffff7540f48 constant 4>
>>             align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686498 precision:32 min <integer_cst 0x7ffff7540ee8 -2147483648> max <integer_cst 0x7ffff7540f00 2147483647>
>>             pointer_to_this <pointer_type 0x7ffff7686b28>>
>>         unsigned DI
>>         size <integer_cst 0x7ffff7540cf0 type <integer_type 0x7ffff75590a8 bitsizetype> constant 64>
>>         unit-size <integer_cst 0x7ffff7540d08 type <integer_type 0x7ffff7559000 sizetype> constant 8>
>>         align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686b28>
>>
>>     (gdb) call debug_tree(obj_type)
>>      <record_type 0x7ffff7686bd0 .omp_data_t.0 readonly DI
>>         size <integer_cst 0x7ffff7540cf0 type <integer_type 0x7ffff75590a8 bitsizetype> constant 64>
>>         unit-size <integer_cst 0x7ffff7540d08 type <integer_type 0x7ffff7559000 sizetype> constant 8>
>>         align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686bd0
>>         fields <field_decl 0x7ffff7568428 a
>>             type <pointer_type 0x7ffff7686b28 type <integer_type 0x7ffff7686498 int address-space-1>
>>                 unsigned DI size <integer_cst 0x7ffff7540cf0 64> unit-size <integer_cst 0x7ffff7540d08 8>
>>                 align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686b28>
>>             unsigned DI /home/thomas/shared/gcc/omp/as.c:4:14 size <integer_cst 0x7ffff7540cf0 64> unit-size <integer_cst 0x7ffff7540d08 8>
>>             align:64 warn_if_not_align:0 offset_align 128
>>             offset <integer_cst 0x7ffff7540d20 constant 0>
>>             bit-offset <integer_cst 0x7ffff7540d68 constant 0> context <record_type 0x7ffff7686540 .omp_data_t.0>> reference_to_this <reference_type 0x7ffff7686c78>>
>>
>> The case that Kwok's new code handles, however, is when 'obj_type' has a
>> non-generic address space, and then propagates that one to 'field_type'.
>>
>> For a similar OpenACC example, 'omp_build_component_ref' called via GCN
>> offloading compilation 'pass_omp_oacc_neuter_broadcast', we've got
>> without Kwok's new code:
>>
>>     (gdb) call debug_tree(field_type)
>>      <boolean_type 0x7ffff7550b28 bool public unsigned QI
>>         size <integer_cst 0x7ffff754fa80 type <integer_type 0x7ffff75500a8 bitsizetype> constant 8>
>>         unit-size <integer_cst 0x7ffff754fa98 type <integer_type 0x7ffff7550000 sizetype> constant 1>
>>         align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7550b28 precision:1 min <integer_cst 0x7ffff754fcd8 0> max <integer_cst 0x7ffff754fd08 1>>
>>
>>     (gdb) call debug_tree(obj_type)
>>      <record_type 0x7ffff7631000 .oacc_ws_data_s.0 address-space-4 QI
>>         size <integer_cst 0x7ffff754fa80 type <integer_type 0x7ffff75500a8 bitsizetype> constant 8>
>>         unit-size <integer_cst 0x7ffff754fa98 type <integer_type 0x7ffff7550000 sizetype> constant 1>
>>         align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7631000
>>         fields <field_decl 0x7ffff762e260 _52
>>             type <boolean_type 0x7ffff7550b28 bool public unsigned QI size <integer_cst 0x7ffff754fa80 8> unit-size <integer_cst 0x7ffff754fa98 1>
>>                 align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7550b28 precision:1 min <integer_cst 0x7ffff754fcd8 0> max <integer_cst 0x7ffff754fd08 1>>
>>             unsigned QI <built-in>:0:0 size <integer_cst 0x7ffff754fa80 8> unit-size <integer_cst 0x7ffff754fa98 1>
>>             align:8 warn_if_not_align:0 offset_align 64
>>             offset <integer_cst 0x7ffff754f9c0 constant 0>
>>             bit-offset <integer_cst 0x7ffff754fa08 constant 0> context <record_type 0x7ffff7631000 .oacc_ws_data_s.0 address-space-4>>
>>         pointer_to_this <pointer_type 0x7ffff7631498>>
>>
>> ..., and with Kwok's new code the 'address-space-4' of 'obj_type' is
>> propagated to 'field_type':
>>
>>     (gdb) call debug_tree(field_type)
>>      <boolean_type 0x7ffff7631540 bool address-space-4 unsigned QI
>>         size <integer_cst 0x7ffff754fa80 type <integer_type 0x7ffff75500a8 bitsizetype> constant 8>
>>         unit-size <integer_cst 0x7ffff754fa98 type <integer_type 0x7ffff7550000 sizetype> constant 1>
>>         align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7631540 precision:1 min <integer_cst 0x7ffff754fcd8 0> max <integer_cst 0x7ffff754fd08 1>>
>>
>> I'm not familiar enough with these bits to tell whether Kwok's new code
>> is the right solution to this problem -- or if, for example, the problem
>> is rather in the SLP vectorizer, where the ICE seems to ultimately
>> emerge?
>>
>> Without (ICEs later) vs. with (works) Kwok's new code, we see the
>> 'a.xamdgcn-amdhsa.mkoffload.175t.slp1' dump change as follows (word-diff,
>> only additional '<address-space-4>', occasionally):
>>
>>     [...]
>>       {+<address-space-4>+} vector(2) long int * vectp.58;
>>       {+<address-space-4>+} vector(2) long int * vectp_.oacc_worker_o.57;
>>       {+<address-space-4>+} vector(2) int * vectp.56;
>>       {+<address-space-4>+} vector(2) int * vectp_.oacc_worker_o.55;
>>     [...]
>>       {+<address-space-4>+} long int * _104;
>>     [...]
>>       {+<address-space-4>+} long int * _108;
>>     [...]
>>       <address-space-4> void * _350;
>>     [...]
>>       _350 = __builtin_gcn_single_copy_start (&.oacc_worker_o.6);
>>     [...]
>>       MEM <{+<address-space-4>+} vector(2) long int> [(long int *)&.oacc_worker_o.6] = _101;
>>       _108 = &.oacc_worker_o.6._22 + 16;
>>       MEM <{+<address-space-4>+} vector(2) long int> [(long int *)_108] = _100;
>>       _104 = &.oacc_worker_o.6._22 + 32;
>>     [...]
>>
>> For example, with Kwok's new code, '_108' ('<address-space-4> long int *')
>> is cast into '(long int *)' -- presumably synthesized in the SLP
>> vectorizer?  Is that correct or shouldn't that cast also include
>> '<address-space-4>'?
>>
>> I see a similar issue has been fixed a while ago: r245772 (Git commit
>> c7d97b2846c5647a81548caa3264d77c0a595010) for PR79723
>> "Another case of dropped gs: prefix", changing
>> 'gcc/tree-vect-stmts.c:get_vectype_for_scalar_type_and_size' as follows:
>>
>>     +  /* Re-attach the address-space qualifier if we canonicalized the scalar
>>     +     type.  */
>>     +  if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
>>     +    return build_qualified_type
>>     +            (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
>>     +
>>        return vectype;
>>
>> (It looks a bit like the address space handling is quite fragile in GCC's
>> 'tree' types/interfaces?  Do we have ideas about how to make that more
>> robust, less "bolt-on"?)
>
> If in doubt always look at what RTL expansion does - it looks like
> set_mem_attributes expects the address-space qualifier to be
> present on the type or in case it is passed an object, on the
> type of the base, or in case of a dereference, on the pointed-to
> type of the pointer (and yes, that does look somewhat fragile).
>
> So it looks like the patch you refer to shouldn't fix anything and
>
>>     +  /* Re-attach the address-space qualifier if we canonicalized the scalar
>>     +     type.  */
>>     +  if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
>>     +    return build_qualified_type
>>     +            (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
>
> looks incomplete.  What you'd need to look for is MEM_REFs built
> by the vectorizer and the address-space information on the pointers,
> like generated from vect_create_data_ref_ptr.  It might also be that
> data-ref analysis / SCEV looks through address-space qualifier changing
> casts and thus we pick up the wrong address-space in the end.

Aah, more GCC pieces to learn about ;-) -- thanks for the pointers!

> What's the testcase that ICEs on trunk?

You'll need a GCN offloading build with the attached
"[WIP] Reproduce GCN address space vs. SLP vectorization ICEs",
run 'make check-target-libgomp', and observe a number of ICEs like:

    during RTL pass: expand
    [...]/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: In function 'main._omp_fn.0':
    [...]/libgomp.oacc-c-c++-common/loop-red-gwv-1.c:19:9: internal compiler error: in convert_memory_address_addr_space_1, at explow.c:301
    [...]
    mkoffload: fatal error: build-gcc/gcc/x86_64-pc-linux-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status

'-O1 -ftree-slp-vectorize' would be sufficient to trigger that one.
Run with '-save-temps -v', see the
'[...]/build-gcc-offload-amdgcn-amdhsa/gcc/lto1' command ICE:

    #0  fancy_abort (file=file@entry=0x182e418 "[...]/source-gcc/gcc/explow.c", line=line@entry=301, function=function@entry=0x182e960 <convert_memory_address_addr_space_1(scalar_int_mode, rtx_def*, unsigned char, bool, bool)::__FUNCTION__> "convert_memory_address_addr_space_1") at [...]/source-gcc/gcc/diagnostic.c:1961
    #1  0x00000000007ef690 in convert_memory_address_addr_space_1 (to_mode=..., x=x@entry=0x7ffff764fa08, as=as@entry=0 '\000', in_const=in_const@entry=false, no_emit=no_emit@entry=false) at [...]/source-gcc/gcc/explow.c:301
    #2  0x00000000007ef6cb in convert_memory_address_addr_space (to_mode=..., x=0x7ffff764fa08, as=as@entry=0 '\000') at [...]/source-gcc/gcc/explow.c:423
    #3  0x0000000000812f48 in expand_expr_addr_expr (modifier=EXPAND_SUM, tmode=E_DImode, target=0x0, exp=0x7ffff764a520) at [...]/source-gcc/gcc/expr.c:8535
    #4  expand_expr_real_1 (exp=0x7ffff764a520, target=<optimized out>, tmode=<optimized out>, modifier=EXPAND_SUM, alt_rtl=0x0, inner_reference_p=<optimized out>) at [...]/source-gcc/gcc/expr.c:11741
    #5  0x0000000000813139 in expand_expr (modifier=EXPAND_SUM, mode=E_VOIDmode, target=0x0, exp=0x7ffff764a520) at [...]/source-gcc/gcc/expr.h:301
    #6  expand_expr_real_1 (exp=0x7ffff7649d48, target=<optimized out>, tmode=E_VOIDmode, modifier=EXPAND_WRITE, alt_rtl=0x0, inner_reference_p=<optimized out>) at [...]/source-gcc/gcc/expr.c:10887
    #7  0x000000000082475a in expand_expr (modifier=EXPAND_WRITE, mode=E_VOIDmode, target=0x0, exp=0x7ffff7649d48) at [...]/source-gcc/gcc/expr.h:301
    #8  expand_assignment (to=to@entry=0x7ffff7649d48, from=from@entry=0x7ffff763a7e0, nontemporal=<optimized out>) at [...]/source-gcc/gcc/expr.c:5732
    #9  0x00000000006c807d in expand_gimple_stmt_1 (stmt=stmt@entry=0x7ffff7646aa0) at [...]/source-gcc/gcc/cfgexpand.c:3944
    #10 0x00000000006c95c7 in expand_gimple_stmt (stmt=stmt@entry=0x7ffff7646aa0) at [...]/source-gcc/gcc/cfgexpand.c:4040
    #11 0x00000000006ce884 in expand_gimple_basic_block (bb=0x7ffff7635dd0, disable_tail_calls=disable_tail_calls@entry=false) at [...]/source-gcc/gcc/cfgexpand.c:6082
    #12 0x00000000006d13de in (anonymous namespace)::pass_expand::execute (this=<optimized out>, fun=<optimized out>) at [...]/source-gcc/gcc/cfgexpand.c:6808
    [...]
    (gdb) up
    #1  0x00000000007ef690 in convert_memory_address_addr_space_1 (to_mode=..., x=x@entry=0x7ffff764fa08, as=as@entry=0 '\000', in_const=in_const@entry=false, no_emit=no_emit@entry=false) at [...]/source-gcc/gcc/explow.c:301
    301       gcc_assert (GET_MODE (x) == to_mode || GET_MODE (x) == VOIDmode);
    (gdb) list
    296                                          rtx x, addr_space_t as ATTRIBUTE_UNUSED,
    297                                          bool in_const ATTRIBUTE_UNUSED,
    298                                          bool no_emit ATTRIBUTE_UNUSED)
    299     {
    300     #ifndef POINTERS_EXTEND_UNSIGNED
    301       gcc_assert (GET_MODE (x) == to_mode || GET_MODE (x) == VOIDmode);
    302       return x;
    303     #else /* defined(POINTERS_EXTEND_UNSIGNED) */
    304       scalar_int_mode pointer_mode, address_mode, from_mode;
    305       rtx temp;
    (gdb) call debug_rtx(x)
    (symbol_ref:SI (".oacc_worker_o.13.6") [flags 0x2] <var_decl 0x7ffff7637d80 .oacc_worker_o.13>)
    (gdb) print x->mode
    $1 = E_SImode
    (gdb) print to_mode
    $2 = {m_mode = E_DImode}
    (gdb) up
    #2  0x00000000007ef6cb in convert_memory_address_addr_space (to_mode=..., x=0x7ffff764fa08, as=as@entry=0 '\000') at [...]/source-gcc/gcc/explow.c:423
    423       return convert_memory_address_addr_space_1 (to_mode, x, as, false, false);
    (gdb) up
    #3  0x0000000000812f48 in expand_expr_addr_expr (modifier=EXPAND_SUM, tmode=E_DImode, target=0x0, exp=0x7ffff764a520) at [...]/source-gcc/gcc/expr.c:8535
    8535        result = convert_memory_address_addr_space (new_tmode, result, as);
    (gdb) call debug_tree(exp)
     <addr_expr 0x7ffff764a520
        type <pointer_type 0x7ffff7557888
            type <integer_type 0x7ffff75505e8 int public SI
                size <integer_cst 0x7ffff754fbd0 constant 32>
                unit-size <integer_cst 0x7ffff754fbe8 constant 4>
                align:32 warn_if_not_align:0 symtab:0 alias-set 4 canonical-type 0x7ffff75505e8 precision:32 min <integer_cst 0x7ffff754fb88 -2147483648> max <integer_cst 0x7ffff754fba0 2147483647>
                pointer_to_this <pointer_type 0x7ffff7557888>>
            public unsigned DI
            size <integer_cst 0x7ffff754f990 constant 64>
            unit-size <integer_cst 0x7ffff754f9a8 constant 8>
            align:64 warn_if_not_align:0 symtab:0 alias-set 1 structural-equality>
        constant
        arg:0 <var_decl 0x7ffff7637d80 .oacc_worker_o.13
            type <record_type 0x7ffff76215e8 .oacc_ws_data_s.0 address-space-4 no-force-blk BLK size <integer_cst 0x7ffff754f990 64> unit-size <integer_cst 0x7ffff754f9a8 8>
                align:32 warn_if_not_align:0 symtab:0 alias-set 5 canonical-type 0x7ffff76215e8 fields <field_decl 0x7ffff76317b8 t>
                pointer_to_this <pointer_type 0x7ffff76219d8>>
            addressable used static ignored BLK source-gcc/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c:19:9 size <integer_cst 0x7ffff754f990 64> unit-size <integer_cst 0x7ffff754f9a8 8>
            align:128 warn_if_not_align:0
            (mem/c:BLK (symbol_ref:SI (".oacc_worker_o.13.6") [flags 0x2] <var_decl 0x7ffff7637d80 .oacc_worker_o.13>) [5 .oacc_worker_o.13+0 S8 A128 AS4])>>

In 'arg:0' of 'exp' note 'address-space-4' (expected): 'ADDR_SPACE_LDS'
(per 'gcc/config/gcn/gcn.h:gcn_address_spaces').


With the attached "[WIP] [GCN] '+#define POINTERS_EXTEND_UNSIGNED 1'", we
instead fail as follows:

    ./a.xamdgcn-amdhsa.mkoffload.2.s:92:23: error: invalid modifier 'rel32@lo' (no symbols present)
            s_add_u32       s2, s2, 32@rel32@lo+4
                                       ^
    ./a.xamdgcn-amdhsa.mkoffload.2.s:92:23: error: failed parsing operand.
            s_add_u32       s2, s2, 32@rel32@lo+4
                                       ^
    ./a.xamdgcn-amdhsa.mkoffload.2.s:93:24: error: invalid modifier 'rel32@hi' (no symbols present)
            s_addc_u32      s3, s3, 32@rel32@hi+4
                                       ^
    ./a.xamdgcn-amdhsa.mkoffload.2.s:93:24: error: failed parsing operand.
            s_addc_u32      s3, s3, 32@rel32@hi+4
                                       ^
    mkoffload: fatal error: build-gcc/gcc/x86_64-pc-linux-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status

..., so it's not that simple.  (I have no clue whether
'POINTERS_EXTEND_UNSIGNED' would make sense for GCN -- but thought it was
worth a quick try.)


Grüße
 Thomas


>> I did add a few 'assert's for non-generic address space to
>> 'gcc/tree-vect*', but have not yet located where things may be going
>> wrong.
>>
>>
>> > I think keeping the qual addr space here is the wrong thing to do,
>> > it should keep the other quals and clear the address space instead,
>> > the whole struct is going to be in generic addres space, isn't it?
>>
>> Correct for 'omp_build_component_ref' called via host compilation
>> 'pass_lower_omp', but in the case of 'omp_build_component_ref' called via
>> GCN offloading compilation 'pass_omp_oacc_neuter_broadcast', 'obj_type'
>> has a non-generic address space.
>>
>> However, regarding the former comment -- shouldn't we force generic
>> address space for all 'tree' types read in via LTO streaming for
>> offloading compilation?  I assume that (in the general case) address
>> spaces are never compatible between host and offloading compilation?
>> For the attached "Add 'libgomp.c/address-space-1.c'", propagating the
>> '__seg_fs' address space across the offloading boundary (assuming I did
>> interpret the dumps correctly) doesn't seem to cause any problems, but
>> maybe it's problematic for other cases?  (This is, however, a separate
>> issue from what I'm discussing here.)
>>
>>
>> >> +  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
>> >> +  if (TREE_THIS_VOLATILE (field))
>> >> +    TREE_THIS_VOLATILE (ret) |= 1;
>> >> +  if (TREE_READONLY (field))
>> >> +    TREE_READONLY (ret) |= 1;
>> >
>> > When touching these two, shouldn't it be better written as
>> > = 1; instead of |= 1; ?  For a bitfield...
>>
>> Yes, that was just copied from the original
>> 'gcc/omp-general.c:omp_build_component_ref' -- but happy to simplify
>> that, of course.
>>
>>
>> Grüße
>>  Thomas
>>
>>
>> -----------------
>> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955


-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Richard Biener Aug. 24, 2021, 7:43 a.m. UTC | #6
On Mon, Aug 23, 2021 at 4:30 PM Thomas Schwinge <thomas@codesourcery.com> wrote:
>
> Hi!
>
> On 2021-08-20T09:51:36+0200, Richard Biener <richard.guenther@gmail.com> wrote:
> > On Thu, Aug 19, 2021 at 10:14 PM Thomas Schwinge
> > <thomas@codesourcery.com> wrote:
> >> Richard, maybe you have an opinion here, in particular about my
> >> "SLP vectorizer" comment below?  Please see
> >> <http://mid.mail-archive.com/87r1f2puss.fsf@euler.schwinge.homeip.net>
> >> for the full context.
> >>
> >> On 2021-08-16T10:21:04+0200, Jakub Jelinek <jakub@redhat.com> wrote:
> >> > On Mon, Aug 16, 2021 at 10:08:42AM +0200, Thomas Schwinge wrote:
> >> >>  /* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
> >> >>     as appropriate.  */
> >> >>
> >> >>  tree
> >> >>  omp_build_component_ref (tree obj, tree field)
> >> >>  {
> >> >> +  tree field_type = TREE_TYPE (field);
> >> >> +  tree obj_type = TREE_TYPE (obj);
> >> >> +  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
> >> >> +    field_type
> >> >> +      = build_qualified_type (field_type,
> >> >> +                          KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));
> >>
> >> (For later reference: "Kwok's new code" here is to propagate to
> >> 'field_type' any non-generic address space of 'obj_type'.)
> >>
> >> |> Concerning the current 'gcc/omp-low.c:omp_build_component_ref', for the
> >> |> current set of offloading testcases, we never see a
> >> |> '!ADDR_SPACE_GENERIC_P' there, so the address space handling doesn't seem
> >> |> to be necessary there (but also won't do any harm: no-op).
> >> >
> >> > Are you sure this can't trigger?
> >> > Say
> >> > extern int __seg_fs a;
> >> >
> >> > void
> >> > foo (void)
> >> > {
> >> >   #pragma omp parallel private (a)
> >> >   a = 2;
> >> > }
> >>
> >> That test case doesn't run into 'omp_build_component_ref' at all,
> >> but I'm attaching an altered and extended variant that does,
> >> "Add 'libgomp.c/address-space-1.c'".  OK to push to master branch?
> >>
> >> In this case, 'omp_build_component_ref' called via host compilation
> >> 'pass_lower_omp', it's the 'field_type' that has 'address-space-1', not
> >> 'obj_type', so indeed Kwok's new code is a no-op:
> >>
> >>     (gdb) call debug_tree(field_type)
> >>      <pointer_type 0x7ffff7686b28
> >>         type <integer_type 0x7ffff7686498 int address-space-1 SI
> >>             size <integer_cst 0x7ffff7540f30 constant 32>
> >>             unit-size <integer_cst 0x7ffff7540f48 constant 4>
> >>             align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686498 precision:32 min <integer_cst 0x7ffff7540ee8 -2147483648> max <integer_cst 0x7ffff7540f00 2147483647>
> >>             pointer_to_this <pointer_type 0x7ffff7686b28>>
> >>         unsigned DI
> >>         size <integer_cst 0x7ffff7540cf0 type <integer_type 0x7ffff75590a8 bitsizetype> constant 64>
> >>         unit-size <integer_cst 0x7ffff7540d08 type <integer_type 0x7ffff7559000 sizetype> constant 8>
> >>         align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686b28>
> >>
> >>     (gdb) call debug_tree(obj_type)
> >>      <record_type 0x7ffff7686bd0 .omp_data_t.0 readonly DI
> >>         size <integer_cst 0x7ffff7540cf0 type <integer_type 0x7ffff75590a8 bitsizetype> constant 64>
> >>         unit-size <integer_cst 0x7ffff7540d08 type <integer_type 0x7ffff7559000 sizetype> constant 8>
> >>         align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686bd0
> >>         fields <field_decl 0x7ffff7568428 a
> >>             type <pointer_type 0x7ffff7686b28 type <integer_type 0x7ffff7686498 int address-space-1>
> >>                 unsigned DI size <integer_cst 0x7ffff7540cf0 64> unit-size <integer_cst 0x7ffff7540d08 8>
> >>                 align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7686b28>
> >>             unsigned DI /home/thomas/shared/gcc/omp/as.c:4:14 size <integer_cst 0x7ffff7540cf0 64> unit-size <integer_cst 0x7ffff7540d08 8>
> >>             align:64 warn_if_not_align:0 offset_align 128
> >>             offset <integer_cst 0x7ffff7540d20 constant 0>
> >>             bit-offset <integer_cst 0x7ffff7540d68 constant 0> context <record_type 0x7ffff7686540 .omp_data_t.0>> reference_to_this <reference_type 0x7ffff7686c78>>
> >>
> >> The case that Kwok's new code handles, however, is when 'obj_type' has a
> >> non-generic address space, and then propagates that one to 'field_type'.
> >>
> >> For a similar OpenACC example, 'omp_build_component_ref' called via GCN
> >> offloading compilation 'pass_omp_oacc_neuter_broadcast', we've got
> >> without Kwok's new code:
> >>
> >>     (gdb) call debug_tree(field_type)
> >>      <boolean_type 0x7ffff7550b28 bool public unsigned QI
> >>         size <integer_cst 0x7ffff754fa80 type <integer_type 0x7ffff75500a8 bitsizetype> constant 8>
> >>         unit-size <integer_cst 0x7ffff754fa98 type <integer_type 0x7ffff7550000 sizetype> constant 1>
> >>         align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7550b28 precision:1 min <integer_cst 0x7ffff754fcd8 0> max <integer_cst 0x7ffff754fd08 1>>
> >>
> >>     (gdb) call debug_tree(obj_type)
> >>      <record_type 0x7ffff7631000 .oacc_ws_data_s.0 address-space-4 QI
> >>         size <integer_cst 0x7ffff754fa80 type <integer_type 0x7ffff75500a8 bitsizetype> constant 8>
> >>         unit-size <integer_cst 0x7ffff754fa98 type <integer_type 0x7ffff7550000 sizetype> constant 1>
> >>         align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7631000
> >>         fields <field_decl 0x7ffff762e260 _52
> >>             type <boolean_type 0x7ffff7550b28 bool public unsigned QI size <integer_cst 0x7ffff754fa80 8> unit-size <integer_cst 0x7ffff754fa98 1>
> >>                 align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7550b28 precision:1 min <integer_cst 0x7ffff754fcd8 0> max <integer_cst 0x7ffff754fd08 1>>
> >>             unsigned QI <built-in>:0:0 size <integer_cst 0x7ffff754fa80 8> unit-size <integer_cst 0x7ffff754fa98 1>
> >>             align:8 warn_if_not_align:0 offset_align 64
> >>             offset <integer_cst 0x7ffff754f9c0 constant 0>
> >>             bit-offset <integer_cst 0x7ffff754fa08 constant 0> context <record_type 0x7ffff7631000 .oacc_ws_data_s.0 address-space-4>>
> >>         pointer_to_this <pointer_type 0x7ffff7631498>>
> >>
> >> ..., and with Kwok's new code the 'address-space-4' of 'obj_type' is
> >> propagated to 'field_type':
> >>
> >>     (gdb) call debug_tree(field_type)
> >>      <boolean_type 0x7ffff7631540 bool address-space-4 unsigned QI
> >>         size <integer_cst 0x7ffff754fa80 type <integer_type 0x7ffff75500a8 bitsizetype> constant 8>
> >>         unit-size <integer_cst 0x7ffff754fa98 type <integer_type 0x7ffff7550000 sizetype> constant 1>
> >>         align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff7631540 precision:1 min <integer_cst 0x7ffff754fcd8 0> max <integer_cst 0x7ffff754fd08 1>>
> >>
> >> I'm not familiar enough with these bits to tell whether Kwok's new code
> >> is the right solution to this problem -- or if, for example, the problem
> >> is rather in the SLP vectorizer, where the ICE seems to ultimately
> >> emerge?
> >>
> >> Without (ICEs later) vs. with (works) Kwok's new code, we see the
> >> 'a.xamdgcn-amdhsa.mkoffload.175t.slp1' dump change as follows (word-diff,
> >> only additional '<address-space-4>', occasionally):
> >>
> >>     [...]
> >>       {+<address-space-4>+} vector(2) long int * vectp.58;
> >>       {+<address-space-4>+} vector(2) long int * vectp_.oacc_worker_o.57;
> >>       {+<address-space-4>+} vector(2) int * vectp.56;
> >>       {+<address-space-4>+} vector(2) int * vectp_.oacc_worker_o.55;
> >>     [...]
> >>       {+<address-space-4>+} long int * _104;
> >>     [...]
> >>       {+<address-space-4>+} long int * _108;
> >>     [...]
> >>       <address-space-4> void * _350;
> >>     [...]
> >>       _350 = __builtin_gcn_single_copy_start (&.oacc_worker_o.6);
> >>     [...]
> >>       MEM <{+<address-space-4>+} vector(2) long int> [(long int *)&.oacc_worker_o.6] = _101;
> >>       _108 = &.oacc_worker_o.6._22 + 16;
> >>       MEM <{+<address-space-4>+} vector(2) long int> [(long int *)_108] = _100;
> >>       _104 = &.oacc_worker_o.6._22 + 32;
> >>     [...]
> >>
> >> For example, with Kwok's new code, '_108' ('<address-space-4> long int *')
> >> is cast into '(long int *)' -- presumably synthesized in the SLP
> >> vectorizer?  Is that correct or shouldn't that cast also include
> >> '<address-space-4>'?
> >>
> >> I see a similar issue has been fixed a while ago: r245772 (Git commit
> >> c7d97b2846c5647a81548caa3264d77c0a595010) for PR79723
> >> "Another case of dropped gs: prefix", changing
> >> 'gcc/tree-vect-stmts.c:get_vectype_for_scalar_type_and_size' as follows:
> >>
> >>     +  /* Re-attach the address-space qualifier if we canonicalized the scalar
> >>     +     type.  */
> >>     +  if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
> >>     +    return build_qualified_type
> >>     +            (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
> >>     +
> >>        return vectype;
> >>
> >> (It looks a bit like the address space handling is quite fragile in GCC's
> >> 'tree' types/interfaces?  Do we have ideas about how to make that more
> >> robust, less "bolt-on"?)
> >
> > If in doubt always look at what RTL expansion does - it looks like
> > set_mem_attributes expects the address-space qualifier to be
> > present on the type or in case it is passed an object, on the
> > type of the base, or in case of a dereference, on the pointed-to
> > type of the pointer (and yes, that does look somewhat fragile).
> >
> > So it looks like the patch you refer to shouldn't fix anything and
> >
> >>     +  /* Re-attach the address-space qualifier if we canonicalized the scalar
> >>     +     type.  */
> >>     +  if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
> >>     +    return build_qualified_type
> >>     +            (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
> >
> > looks incomplete.  What you'd need to look for is MEM_REFs built
> > by the vectorizer and the address-space information on the pointers,
> > like generated from vect_create_data_ref_ptr.  It might also be that
> > data-ref analysis / SCEV looks through address-space qualifier changing
> > casts and thus we pick up the wrong address-space in the end.
>
> Aah, more GCC pieces to learn about ;-) -- thanks for the pointers!
>
> > What's the testcase that ICEs on trunk?
>
> You'll need a GCN offloading build with the attached
> "[WIP] Reproduce GCN address space vs. SLP vectorization ICEs",
> run 'make check-target-libgomp', and observe a number of ICEs like:

Eh, OK ;)   Too much for a quick look - if you got sth that ICEs / shows
missing address-spaces and that is reproducible with a cc1 cross
to nvptx/gcn and a C testcase then I'm in to debug where the vectorizer
is at fault ;)

Richard.

>     during RTL pass: expand
>     [...]/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: In function 'main._omp_fn.0':
>     [...]/libgomp.oacc-c-c++-common/loop-red-gwv-1.c:19:9: internal compiler error: in convert_memory_address_addr_space_1, at explow.c:301
>     [...]
>     mkoffload: fatal error: build-gcc/gcc/x86_64-pc-linux-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
>
> '-O1 -ftree-slp-vectorize' would be sufficient to trigger that one.
> Run with '-save-temps -v', see the
> '[...]/build-gcc-offload-amdgcn-amdhsa/gcc/lto1' command ICE:
>
>     #0  fancy_abort (file=file@entry=0x182e418 "[...]/source-gcc/gcc/explow.c", line=line@entry=301, function=function@entry=0x182e960 <convert_memory_address_addr_space_1(scalar_int_mode, rtx_def*, unsigned char, bool, bool)::__FUNCTION__> "convert_memory_address_addr_space_1") at [...]/source-gcc/gcc/diagnostic.c:1961
>     #1  0x00000000007ef690 in convert_memory_address_addr_space_1 (to_mode=..., x=x@entry=0x7ffff764fa08, as=as@entry=0 '\000', in_const=in_const@entry=false, no_emit=no_emit@entry=false) at [...]/source-gcc/gcc/explow.c:301
>     #2  0x00000000007ef6cb in convert_memory_address_addr_space (to_mode=..., x=0x7ffff764fa08, as=as@entry=0 '\000') at [...]/source-gcc/gcc/explow.c:423
>     #3  0x0000000000812f48 in expand_expr_addr_expr (modifier=EXPAND_SUM, tmode=E_DImode, target=0x0, exp=0x7ffff764a520) at [...]/source-gcc/gcc/expr.c:8535
>     #4  expand_expr_real_1 (exp=0x7ffff764a520, target=<optimized out>, tmode=<optimized out>, modifier=EXPAND_SUM, alt_rtl=0x0, inner_reference_p=<optimized out>) at [...]/source-gcc/gcc/expr.c:11741
>     #5  0x0000000000813139 in expand_expr (modifier=EXPAND_SUM, mode=E_VOIDmode, target=0x0, exp=0x7ffff764a520) at [...]/source-gcc/gcc/expr.h:301
>     #6  expand_expr_real_1 (exp=0x7ffff7649d48, target=<optimized out>, tmode=E_VOIDmode, modifier=EXPAND_WRITE, alt_rtl=0x0, inner_reference_p=<optimized out>) at [...]/source-gcc/gcc/expr.c:10887
>     #7  0x000000000082475a in expand_expr (modifier=EXPAND_WRITE, mode=E_VOIDmode, target=0x0, exp=0x7ffff7649d48) at [...]/source-gcc/gcc/expr.h:301
>     #8  expand_assignment (to=to@entry=0x7ffff7649d48, from=from@entry=0x7ffff763a7e0, nontemporal=<optimized out>) at [...]/source-gcc/gcc/expr.c:5732
>     #9  0x00000000006c807d in expand_gimple_stmt_1 (stmt=stmt@entry=0x7ffff7646aa0) at [...]/source-gcc/gcc/cfgexpand.c:3944
>     #10 0x00000000006c95c7 in expand_gimple_stmt (stmt=stmt@entry=0x7ffff7646aa0) at [...]/source-gcc/gcc/cfgexpand.c:4040
>     #11 0x00000000006ce884 in expand_gimple_basic_block (bb=0x7ffff7635dd0, disable_tail_calls=disable_tail_calls@entry=false) at [...]/source-gcc/gcc/cfgexpand.c:6082
>     #12 0x00000000006d13de in (anonymous namespace)::pass_expand::execute (this=<optimized out>, fun=<optimized out>) at [...]/source-gcc/gcc/cfgexpand.c:6808
>     [...]
>     (gdb) up
>     #1  0x00000000007ef690 in convert_memory_address_addr_space_1 (to_mode=..., x=x@entry=0x7ffff764fa08, as=as@entry=0 '\000', in_const=in_const@entry=false, no_emit=no_emit@entry=false) at [...]/source-gcc/gcc/explow.c:301
>     301       gcc_assert (GET_MODE (x) == to_mode || GET_MODE (x) == VOIDmode);
>     (gdb) list
>     296                                          rtx x, addr_space_t as ATTRIBUTE_UNUSED,
>     297                                          bool in_const ATTRIBUTE_UNUSED,
>     298                                          bool no_emit ATTRIBUTE_UNUSED)
>     299     {
>     300     #ifndef POINTERS_EXTEND_UNSIGNED
>     301       gcc_assert (GET_MODE (x) == to_mode || GET_MODE (x) == VOIDmode);
>     302       return x;
>     303     #else /* defined(POINTERS_EXTEND_UNSIGNED) */
>     304       scalar_int_mode pointer_mode, address_mode, from_mode;
>     305       rtx temp;
>     (gdb) call debug_rtx(x)
>     (symbol_ref:SI (".oacc_worker_o.13.6") [flags 0x2] <var_decl 0x7ffff7637d80 .oacc_worker_o.13>)
>     (gdb) print x->mode
>     $1 = E_SImode
>     (gdb) print to_mode
>     $2 = {m_mode = E_DImode}
>     (gdb) up
>     #2  0x00000000007ef6cb in convert_memory_address_addr_space (to_mode=..., x=0x7ffff764fa08, as=as@entry=0 '\000') at [...]/source-gcc/gcc/explow.c:423
>     423       return convert_memory_address_addr_space_1 (to_mode, x, as, false, false);
>     (gdb) up
>     #3  0x0000000000812f48 in expand_expr_addr_expr (modifier=EXPAND_SUM, tmode=E_DImode, target=0x0, exp=0x7ffff764a520) at [...]/source-gcc/gcc/expr.c:8535
>     8535        result = convert_memory_address_addr_space (new_tmode, result, as);
>     (gdb) call debug_tree(exp)
>      <addr_expr 0x7ffff764a520
>         type <pointer_type 0x7ffff7557888
>             type <integer_type 0x7ffff75505e8 int public SI
>                 size <integer_cst 0x7ffff754fbd0 constant 32>
>                 unit-size <integer_cst 0x7ffff754fbe8 constant 4>
>                 align:32 warn_if_not_align:0 symtab:0 alias-set 4 canonical-type 0x7ffff75505e8 precision:32 min <integer_cst 0x7ffff754fb88 -2147483648> max <integer_cst 0x7ffff754fba0 2147483647>
>                 pointer_to_this <pointer_type 0x7ffff7557888>>
>             public unsigned DI
>             size <integer_cst 0x7ffff754f990 constant 64>
>             unit-size <integer_cst 0x7ffff754f9a8 constant 8>
>             align:64 warn_if_not_align:0 symtab:0 alias-set 1 structural-equality>
>         constant
>         arg:0 <var_decl 0x7ffff7637d80 .oacc_worker_o.13
>             type <record_type 0x7ffff76215e8 .oacc_ws_data_s.0 address-space-4 no-force-blk BLK size <integer_cst 0x7ffff754f990 64> unit-size <integer_cst 0x7ffff754f9a8 8>
>                 align:32 warn_if_not_align:0 symtab:0 alias-set 5 canonical-type 0x7ffff76215e8 fields <field_decl 0x7ffff76317b8 t>
>                 pointer_to_this <pointer_type 0x7ffff76219d8>>
>             addressable used static ignored BLK source-gcc/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c:19:9 size <integer_cst 0x7ffff754f990 64> unit-size <integer_cst 0x7ffff754f9a8 8>
>             align:128 warn_if_not_align:0
>             (mem/c:BLK (symbol_ref:SI (".oacc_worker_o.13.6") [flags 0x2] <var_decl 0x7ffff7637d80 .oacc_worker_o.13>) [5 .oacc_worker_o.13+0 S8 A128 AS4])>>
>
> In 'arg:0' of 'exp' note 'address-space-4' (expected): 'ADDR_SPACE_LDS'
> (per 'gcc/config/gcn/gcn.h:gcn_address_spaces').
>
>
> With the attached "[WIP] [GCN] '+#define POINTERS_EXTEND_UNSIGNED 1'", we
> instead fail as follows:
>
>     ./a.xamdgcn-amdhsa.mkoffload.2.s:92:23: error: invalid modifier 'rel32@lo' (no symbols present)
>             s_add_u32       s2, s2, 32@rel32@lo+4
>                                        ^
>     ./a.xamdgcn-amdhsa.mkoffload.2.s:92:23: error: failed parsing operand.
>             s_add_u32       s2, s2, 32@rel32@lo+4
>                                        ^
>     ./a.xamdgcn-amdhsa.mkoffload.2.s:93:24: error: invalid modifier 'rel32@hi' (no symbols present)
>             s_addc_u32      s3, s3, 32@rel32@hi+4
>                                        ^
>     ./a.xamdgcn-amdhsa.mkoffload.2.s:93:24: error: failed parsing operand.
>             s_addc_u32      s3, s3, 32@rel32@hi+4
>                                        ^
>     mkoffload: fatal error: build-gcc/gcc/x86_64-pc-linux-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
>
> ..., so it's not that simple.  (I have no clue whether
> 'POINTERS_EXTEND_UNSIGNED' would make sense for GCN -- but thought it was
> worth a quick try.)
>
>
> Grüße
>  Thomas
>
>
> >> I did add a few 'assert's for non-generic address space to
> >> 'gcc/tree-vect*', but have not yet located where things may be going
> >> wrong.
> >>
> >>
> >> > I think keeping the qual addr space here is the wrong thing to do,
> >> > it should keep the other quals and clear the address space instead,
> >> > the whole struct is going to be in generic addres space, isn't it?
> >>
> >> Correct for 'omp_build_component_ref' called via host compilation
> >> 'pass_lower_omp', but in the case of 'omp_build_component_ref' called via
> >> GCN offloading compilation 'pass_omp_oacc_neuter_broadcast', 'obj_type'
> >> has a non-generic address space.
> >>
> >> However, regarding the former comment -- shouldn't we force generic
> >> address space for all 'tree' types read in via LTO streaming for
> >> offloading compilation?  I assume that (in the general case) address
> >> spaces are never compatible between host and offloading compilation?
> >> For the attached "Add 'libgomp.c/address-space-1.c'", propagating the
> >> '__seg_fs' address space across the offloading boundary (assuming I did
> >> interpret the dumps correctly) doesn't seem to cause any problems, but
> >> maybe it's problematic for other cases?  (This is, however, a separate
> >> issue from what I'm discussing here.)
> >>
> >>
> >> >> +  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
> >> >> +  if (TREE_THIS_VOLATILE (field))
> >> >> +    TREE_THIS_VOLATILE (ret) |= 1;
> >> >> +  if (TREE_READONLY (field))
> >> >> +    TREE_READONLY (ret) |= 1;
> >> >
> >> > When touching these two, shouldn't it be better written as
> >> > = 1; instead of |= 1; ?  For a bitfield...
> >>
> >> Yes, that was just copied from the original
> >> 'gcc/omp-general.c:omp_build_component_ref' -- but happy to simplify
> >> that, of course.
> >>
> >>
> >> Grüße
> >>  Thomas
> >>
> >>
> >> -----------------
> >> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
>
>
> -----------------
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
diff mbox series

Patch

From caee66cf2abd0bea3ee99b460a108ae0d69d599f Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Fri, 30 Jul 2021 16:15:25 +0200
Subject: [PATCH] Re-unify 'omp_build_component_ref' and
 'oacc_build_component_ref'

	gcc/
	* omp-general.c (omp_build_component_ref): New function,
	renamed/moved from...
	* omp-oacc-neuter-broadcast.cc (oacc_build_component_ref):
	... here.
	(build_receiver_ref, build_sender_ref): Update.
	* omp-low.c (omp_build_component_ref): Remove function.
	* omp-general.h (omp_build_component_ref): Declare function.
---
 gcc/omp-general.c                | 21 +++++++++++++++++++++
 gcc/omp-general.h                |  2 ++
 gcc/omp-low.c                    | 15 ---------------
 gcc/omp-oacc-neuter-broadcast.cc | 26 ++------------------------
 4 files changed, 25 insertions(+), 39 deletions(-)

diff --git a/gcc/omp-general.c b/gcc/omp-general.c
index b46a537e281..67a0b752f62 100644
--- a/gcc/omp-general.c
+++ b/gcc/omp-general.c
@@ -2815,4 +2815,25 @@  oacc_get_ifn_dim_arg (const gimple *stmt)
   return (int) axis;
 }
 
+/* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
+   as appropriate.  */
+
+tree
+omp_build_component_ref (tree obj, tree field)
+{
+  tree field_type = TREE_TYPE (field);
+  tree obj_type = TREE_TYPE (obj);
+  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
+    field_type
+      = build_qualified_type (field_type,
+			      KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));
+
+  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
+  if (TREE_THIS_VOLATILE (field))
+    TREE_THIS_VOLATILE (ret) |= 1;
+  if (TREE_READONLY (field))
+    TREE_READONLY (ret) |= 1;
+  return ret;
+}
+
 #include "gt-omp-general.h"
diff --git a/gcc/omp-general.h b/gcc/omp-general.h
index 5c3e0f0e205..6525175832c 100644
--- a/gcc/omp-general.h
+++ b/gcc/omp-general.h
@@ -145,4 +145,6 @@  get_openacc_privatization_dump_flags ()
   return l_dump_flags;
 }
 
+extern tree omp_build_component_ref (tree obj, tree field);
+
 #endif /* GCC_OMP_GENERAL_H */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 926087da701..1640321c445 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -613,21 +613,6 @@  omp_copy_decl_1 (tree var, omp_context *ctx)
   return omp_copy_decl_2 (var, DECL_NAME (var), TREE_TYPE (var), ctx);
 }
 
-/* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
-   as appropriate.  */
-/* See also 'gcc/omp-oacc-neuter-broadcast.cc:oacc_build_component_ref'.  */
-
-static tree
-omp_build_component_ref (tree obj, tree field)
-{
-  tree ret = build3 (COMPONENT_REF, TREE_TYPE (field), obj, field, NULL);
-  if (TREE_THIS_VOLATILE (field))
-    TREE_THIS_VOLATILE (ret) |= 1;
-  if (TREE_READONLY (field))
-    TREE_READONLY (ret) |= 1;
-  return ret;
-}
-
 /* Build tree nodes to access the field for VAR on the receiver side.  */
 
 static tree
diff --git a/gcc/omp-oacc-neuter-broadcast.cc b/gcc/omp-oacc-neuter-broadcast.cc
index f8555380451..720cf74f12f 100644
--- a/gcc/omp-oacc-neuter-broadcast.cc
+++ b/gcc/omp-oacc-neuter-broadcast.cc
@@ -936,28 +936,6 @@  worker_single_simple (basic_block from, basic_block to,
   update_stmt (acc_bar);
 }
 
-/* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
-   as appropriate.  */
-/* Adapted from 'gcc/omp-low.c:omp_build_component_ref'.  */
-
-static tree
-oacc_build_component_ref (tree obj, tree field)
-{
-  tree field_type = TREE_TYPE (field);
-  tree obj_type = TREE_TYPE (obj);
-  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
-    field_type = build_qualified_type
-			(field_type,
-			 KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));
-
-  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
-  if (TREE_THIS_VOLATILE (field))
-    TREE_THIS_VOLATILE (ret) |= 1;
-  if (TREE_READONLY (field))
-    TREE_READONLY (ret) |= 1;
-  return ret;
-}
-
 static tree
 build_receiver_ref (tree record_type, tree var, tree receiver_decl)
 {
@@ -965,7 +943,7 @@  build_receiver_ref (tree record_type, tree var, tree receiver_decl)
   tree x = build_simple_mem_ref (receiver_decl);
   tree field = *fields->get (var);
   TREE_THIS_NOTRAP (x) = 1;
-  x = oacc_build_component_ref (x, field);
+  x = omp_build_component_ref (x, field);
   return x;
 }
 
@@ -974,7 +952,7 @@  build_sender_ref (tree record_type, tree var, tree sender_decl)
 {
   field_map_t *fields = *field_map->get (record_type);
   tree field = *fields->get (var);
-  return oacc_build_component_ref (sender_decl, field);
+  return omp_build_component_ref (sender_decl, field);
 }
 
 static int
-- 
2.30.2