diff mbox series

Get rid of 'gcc/omp-oacc-neuter-broadcast.cc:oacc_build_component_ref' (was: Re-unify 'omp_build_component_ref' and 'oacc_build_component_ref')

Message ID 87sfsaomzh.fsf@euler.schwinge.homeip.net
State New
Headers show
Series Get rid of 'gcc/omp-oacc-neuter-broadcast.cc:oacc_build_component_ref' (was: Re-unify 'omp_build_component_ref' and 'oacc_build_component_ref') | expand

Commit Message

Thomas Schwinge Feb. 22, 2022, 5 p.m. UTC
Hi!

On 2021-08-09T16:16:51+0200, I wrote:
> This concerns a class of ICEs seen as of og10 branch with the
> "openacc: Middle-end worker-partitioning support" and "amdgcn:
> Enable OpenACC worker partitioning for AMD GCN" changes applied:

I've determined that as of commit 2a3f9f6532bb21d8ab6f16fbe9ee603f6b1405f2
"openacc: Shared memory layout optimisation", we're no longer running
into the vectorizer ICEs for '!ADDR_SPACE_GENERIC_P'.  I have not
researched if they've just gone latent (again), or whether that commit
really changed something to avoid those (bug fix).  Anyway: pushed to
master branch commit 54f745023276e5025e34b2cc22530c78423a93cb
"Get rid of 'gcc/omp-oacc-neuter-broadcast.cc:oacc_build_component_ref'",
see attached.


Grüße
 Thomas


> On 2020-06-06T16:07:36+0100, Kwok Cheung Yeung <kwok_yeung@mentor.com> wrote:
>> On 01/06/2020 8:48 pm, Kwok Cheung Yeung wrote:
>>> On 21/05/2020 10:23 pm, Kwok Cheung Yeung wrote:
>>>> These all have the same failure mode:
>>>>
>>>> during RTL pass: expand
>>>> [...]/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90: In function 'MAIN__._omp_fn.1':
>>>> [...]/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90:86: internal compiler error: in convert_memory_address_addr_space_1, at explow.c:302
>>>> 0xc29f20 convert_memory_address_addr_space_1(scalar_int_mode, rtx_def*, unsigned char, bool, bool)
>>>>          [...]/gcc/explow.c:302
>>>> 0xc29f57 convert_memory_address_addr_space(scalar_int_mode, rtx_def*, unsigned char)
>>>>          [...]/gcc/explow.c:404
>>>> [...]
>
>>>> This occurs if the -ftree-slp-vectorize flag is specified (default at -O3).
>
>>> The problematic bit of Gimple code is this:
>>>
>>>    .oacc_worker_o.44._120 = gangs_min_472;
>>>    .oacc_worker_o.44._122 = workers_min_473;
>>>    .oacc_worker_o.44._124 = vectors_min_474;
>>>    .oacc_worker_o.44._126 = gangs_max_475;
>>>    .oacc_worker_o.44._128 = workers_max_476;
>>>    .oacc_worker_o.44._130 = vectors_max_477;
>>>    .oacc_worker_o.44._132 = 0;
>>>
>>> With SLP vectorization enabled, it becomes this:
>>>
>>>    _40 = {gangs_min_472, workers_min_473, vectors_min_474, gangs_max_475};
>>>    ...
>>>    MEM <vector(4) int> [(int *)&.oacc_worker_o.44] = _40;
>>>    .oacc_worker_o.44._128 = workers_max_476;
>>>    .oacc_worker_o.44._130 = vectors_max_477;
>>>    .oacc_worker_o.44._132 = 0;
>>>
>>> The optimization is trying to transform 4 separate assignments into a single
>>> memory operation. The trouble is that &o.acc_worker_o is an SImode pointer in
>>> AS4 (LDS), while the memory expression appears to be in the default memory
>>> space. The 'to' expression of the assignment is:
>>>
>>>   <mem_ref 0x7ffff74c61e0
>>>      type <vector_type 0x7ffff7470498
>>>          type <integer_type 0x7ffff73195e8 int public SI
>>>              size <integer_cst 0x7ffff7318bb8 constant 32>
>>>              unit-size <integer_cst 0x7ffff7318bd0 constant 4>
>>>              align:32 warn_if_not_align:0 symtab:0 alias-set 1 canonical-type 0x7ffff73195e8 precision:32 min <integer_cst 0x7ffff7318b70 -2147483648> max <integer_cst 0x7ffff7318b88 2147483647>
>>>              pointer_to_this <pointer_type 0x7ffff73209d8> reference_to_this <reference_type 0x7ffff73d9d20>>
>>>          TI
>>>          size <integer_cst 0x7ffff7318ca8 constant 128>
>>>          unit-size <integer_cst 0x7ffff7318cc0 constant 16>
>>>          align:128 warn_if_not_align:0 symtab:0 alias-set 1 structural-equality nunits:4
>>>          pointer_to_this <pointer_type 0x7ffff7470540>>
>>>
>>>      arg:0 <addr_expr 0x7ffff74cdb80
>>>          type <pointer_type 0x7ffff73209d8 type <integer_type 0x7ffff73195e8 int>
>>>              public unsigned DI
>>>              size <integer_cst 0x7ffff7318978 constant 64>
>>>              unit-size <integer_cst 0x7ffff7318990 constant 8>
>>>              align:64 warn_if_not_align:0 symtab:0 alias-set 2 structural-equality>
>>>          constant
>>>          arg:0 <var_decl 0x7ffff7477f30 .oacc_worker_o.44 type <record_type 0x7ffff73eb888 .oacc_ws_data_s.21 address-space-4>
>>>              addressable used static ignored BLK [...]/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90:86:0
>>>
>>>              size <integer_cst 0x7ffff746ce70 constant 224>
>>>              unit-size <integer_cst 0x7ffff746ce40 constant 28>
>>>              align:128 warn_if_not_align:0
>>>              (mem/c:BLK (symbol_ref:SI (".oacc_worker_o.44.14") [flags 0x2] <var_decl 0x7ffff7477f30 .oacc_worker_o.44>) [9 .oacc_worker_o.44+0 S28 A128 AS4])>>
>>>      arg:1 <integer_cst 0x7ffff73ff078 type <pointer_type 0x7ffff73209d8> constant 0>>
>>>
>>> In convert_memory_address_addr_space_1:
>>>
>>> #ifndef POINTERS_EXTEND_UNSIGNED
>>>    gcc_assert (GET_MODE (x) == to_mode || GET_MODE (x) == VOIDmode);
>>>    return x;
>>> #else /* defined(POINTERS_EXTEND_UNSIGNED) */
>>>
>>> POINTERS_EXTEND_UNSIGNED is not defined, so it hits the assert. The expected
>>> to_mode is DI_mode, but x is SI_mode, so the assert fires.
>
>> I now have a fix for this.
>>
>>  >    MEM <vector(4) int> [(int *)&.oacc_worker_o.44] = _40;
>>
>> The ICE occurs because the SLP vectorization pass creates the new statement
>> using the type of the expression '&.oacc_worker_o.44', which is a pointer to a
>> component ref in the default address space. The expand pass gets confused
>> because it is handed an SImode pointer (for LDS) when it is expecting a DImode
>> pointer (for flat/global space).
>>
>> The underlying problem is that although .oacc_worker_o is in the correct address
>> space, the component ref .oacc_worker_o is not. I fixed this by propagating the
>> address space of .oacc_worker_o when the component ref is created.
>
>>  static tree
>>  oacc_build_component_ref (tree obj, tree field)
>>  {
>> -  tree ret = build3 (COMPONENT_REF, TREE_TYPE (field), obj, field, NULL);
>> +  tree field_type = TREE_TYPE (field);
>> +  tree obj_type = TREE_TYPE (obj);
>> +  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
>> +    field_type = build_qualified_type
>> +                     (field_type,
>> +                      KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));
>> +
>> +  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
>>    if (TREE_THIS_VOLATILE (field))
>>      TREE_THIS_VOLATILE (ret) |= 1;
>>    if (TREE_READONLY (field))
>
> This code change has been included in the recent master branch commit
> e2a58ed6dc5293602d0d168475109caa81ad0f0d "openacc: Middle-end
> worker-partitioning support", which thus includes a
> 'gcc/omp-oacc-neuter-broadcast.cc:oacc_build_component_ref' that is
> slightly different from 'gcc/omp-low.c:omp_build_component_ref'.
>
> I'm confirming that with this reverted, we're seeing ICEs as follows:
>
>     +FAIL: libgomp.oacc-fortran/gemm-2.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/gemm-2.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/gemm.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/gemm.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/optional-reduction.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/optional-reduction.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/private-variables.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/private-variables.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/reduction-1.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/reduction-1.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/reduction-5.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/reduction-5.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/reduction-6.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
>
>     +FAIL: libgomp.oacc-fortran/reduction-6.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)
>
> Concerning the current 'gcc/omp-low.c:omp_build_component_ref', for the
> current set of offloading testcases, we never see a
> '!ADDR_SPACE_GENERIC_P' there, so the address space handling doesn't seem
> to be necessary there (but also won't do any harm: no-op).
>
> Would it make sense to "Re-unify 'omp_build_component_ref' and
> 'oacc_build_component_ref'", see attached?
>
>
> Grüße
>  Thomas


-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
diff mbox series

Patch

From 54f745023276e5025e34b2cc22530c78423a93cb Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Fri, 30 Jul 2021 16:15:25 +0200
Subject: [PATCH] Get rid of
 'gcc/omp-oacc-neuter-broadcast.cc:oacc_build_component_ref'

Clean-up for commit e2a58ed6dc5293602d0d168475109caa81ad0f0d
"openacc: Middle-end worker-partitioning support":
as of commit 2a3f9f6532bb21d8ab6f16fbe9ee603f6b1405f2
"openacc: Shared memory layout optimisation", we're no longer
running into the vectorizer ICEs for '!ADDR_SPACE_GENERIC_P'.

	gcc/
	* omp-low.cc (omp_build_component_ref): Move function...
	* omp-general.cc (omp_build_component_ref): ... here.  Remove
	'static'.
	* omp-general.h (omp_build_component_ref): Declare function.
	* omp-oacc-neuter-broadcast.cc (oacc_build_component_ref): Remove
	function.
	(build_receiver_ref, build_sender_ref): Call
	'omp_build_component_ref' instead.
---
 gcc/omp-general.cc               | 14 ++++++++++++++
 gcc/omp-general.h                |  2 ++
 gcc/omp-low.cc                   | 15 ---------------
 gcc/omp-oacc-neuter-broadcast.cc | 26 ++------------------------
 4 files changed, 18 insertions(+), 39 deletions(-)

diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
index 19f40dc0b1d..a406c578f33 100644
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -2980,4 +2980,18 @@  oacc_get_ifn_dim_arg (const gimple *stmt)
   return (int) axis;
 }
 
+/* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
+   as appropriate.  */
+
+tree
+omp_build_component_ref (tree obj, tree field)
+{
+  tree ret = build3 (COMPONENT_REF, TREE_TYPE (field), obj, field, NULL);
+  if (TREE_THIS_VOLATILE (field))
+    TREE_THIS_VOLATILE (ret) |= 1;
+  if (TREE_READONLY (field))
+    TREE_READONLY (ret) |= 1;
+  return ret;
+}
+
 #include "gt-omp-general.h"
diff --git a/gcc/omp-general.h b/gcc/omp-general.h
index c0cf5f014cd..7a94831e8f5 100644
--- a/gcc/omp-general.h
+++ b/gcc/omp-general.h
@@ -149,4 +149,6 @@  get_openacc_privatization_dump_flags ()
   return l_dump_flags;
 }
 
+extern tree omp_build_component_ref (tree obj, tree field);
+
 #endif /* GCC_OMP_GENERAL_H */
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 77176efe715..2294456b27d 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -621,21 +621,6 @@  omp_copy_decl_1 (tree var, omp_context *ctx)
   return omp_copy_decl_2 (var, DECL_NAME (var), TREE_TYPE (var), ctx);
 }
 
-/* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
-   as appropriate.  */
-/* See also 'gcc/omp-oacc-neuter-broadcast.cc:oacc_build_component_ref'.  */
-
-static tree
-omp_build_component_ref (tree obj, tree field)
-{
-  tree ret = build3 (COMPONENT_REF, TREE_TYPE (field), obj, field, NULL);
-  if (TREE_THIS_VOLATILE (field))
-    TREE_THIS_VOLATILE (ret) |= 1;
-  if (TREE_READONLY (field))
-    TREE_READONLY (ret) |= 1;
-  return ret;
-}
-
 /* Build tree nodes to access the field for VAR on the receiver side.  */
 
 static tree
diff --git a/gcc/omp-oacc-neuter-broadcast.cc b/gcc/omp-oacc-neuter-broadcast.cc
index 314161e38f5..81e3223a94c 100644
--- a/gcc/omp-oacc-neuter-broadcast.cc
+++ b/gcc/omp-oacc-neuter-broadcast.cc
@@ -937,35 +937,13 @@  worker_single_simple (basic_block from, basic_block to,
     }
 }
 
-/* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
-   as appropriate.  */
-/* Adapted from 'gcc/omp-low.cc:omp_build_component_ref'.  */
-
-static tree
-oacc_build_component_ref (tree obj, tree field)
-{
-  tree field_type = TREE_TYPE (field);
-  tree obj_type = TREE_TYPE (obj);
-  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
-    field_type = build_qualified_type
-			(field_type,
-			 KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));
-
-  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
-  if (TREE_THIS_VOLATILE (field))
-    TREE_THIS_VOLATILE (ret) |= 1;
-  if (TREE_READONLY (field))
-    TREE_READONLY (ret) |= 1;
-  return ret;
-}
-
 static tree
 build_receiver_ref (tree var, tree receiver_decl, field_map_t *fields)
 {
   tree x = build_simple_mem_ref (receiver_decl);
   tree field = *fields->get (var);
   TREE_THIS_NOTRAP (x) = 1;
-  x = oacc_build_component_ref (x, field);
+  x = omp_build_component_ref (x, field);
   return x;
 }
 
@@ -975,7 +953,7 @@  build_sender_ref (tree var, tree sender_decl, field_map_t *fields)
   if (POINTER_TYPE_P (TREE_TYPE (sender_decl)))
     sender_decl = build_simple_mem_ref (sender_decl);
   tree field = *fields->get (var);
-  return oacc_build_component_ref (sender_decl, field);
+  return omp_build_component_ref (sender_decl, field);
 }
 
 static int
-- 
2.34.1