Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/1.2/patches/2233012/?format=api
{ "id": 2233012, "url": "http://patchwork.ozlabs.org/api/1.2/patches/2233012/?format=api", "web_url": "http://patchwork.ozlabs.org/project/gcc/patch/20260505134929.3522938-2-aarsenovic@baylibre.com/", "project": { "id": 17, "url": "http://patchwork.ozlabs.org/api/1.2/projects/17/?format=api", "name": "GNU Compiler Collection", "link_name": "gcc", "list_id": "gcc-patches.gcc.gnu.org", "list_email": "gcc-patches@gcc.gnu.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20260505134929.3522938-2-aarsenovic@baylibre.com>", "list_archive_url": null, "date": "2026-05-05T13:14:57", "name": "[1/4] libgomp/gcn: parallelize initializing threads of a team", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "0d9aa9ff94e9921b101e143131ac2d3fb59006e7", "submitter": { "id": 92125, "url": "http://patchwork.ozlabs.org/api/1.2/people/92125/?format=api", "name": "Arsen Arsenović", "email": "aarsenovic@baylibre.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/gcc/patch/20260505134929.3522938-2-aarsenovic@baylibre.com/mbox/", "series": [ { "id": 502845, "url": "http://patchwork.ozlabs.org/api/1.2/series/502845/?format=api", "web_url": "http://patchwork.ozlabs.org/project/gcc/list/?series=502845", "date": "2026-05-05T13:14:59", "name": "GCN: Target offload overhead improvements, batch 2", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/502845/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2233012/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2233012/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Delivered-To": [ "patchwork-incoming@legolas.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (2048-bit key;\n unprotected) header.d=baylibre-com.20251104.gappssmtp.com\n header.i=@baylibre-com.20251104.gappssmtp.com header.a=rsa-sha256\n header.s=20251104 header.b=kNe4/4KM;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org\n (client-ip=38.145.34.32; helo=vm01.sourceware.org;\n envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org;\n receiver=patchwork.ozlabs.org)", "sourceware.org;\n\tdkim=pass (2048-bit key,\n unprotected) header.d=baylibre-com.20251104.gappssmtp.com\n header.i=@baylibre-com.20251104.gappssmtp.com header.a=rsa-sha256\n header.s=20251104 header.b=kNe4/4KM", "sourceware.org;\n dmarc=none (p=none dis=none) header.from=baylibre.com", "sourceware.org; spf=pass smtp.mailfrom=baylibre.com", "server2.sourceware.org;\n arc=none smtp.remote-ip=2a00:1450:4864:20::630" ], "Received": [ "from vm01.sourceware.org (vm01.sourceware.org [38.145.34.32])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4g90KN262dz1yJV\n\tfor <incoming@patchwork.ozlabs.org>; Tue, 05 May 2026 23:51:04 +1000 (AEST)", "from vm01.sourceware.org (localhost [127.0.0.1])\n\tby sourceware.org (Postfix) with ESMTP id 4A1D04BA900E\n\tfor <incoming@patchwork.ozlabs.org>; Tue, 5 May 2026 13:51:02 +0000 (GMT)", "from mail-ej1-x630.google.com (mail-ej1-x630.google.com\n [IPv6:2a00:1450:4864:20::630])\n by sourceware.org (Postfix) with ESMTPS id 61AFD4BA79B7\n for <gcc-patches@gcc.gnu.org>; Tue, 5 May 2026 13:50:09 +0000 (GMT)", "by mail-ej1-x630.google.com with SMTP id\n a640c23a62f3a-ba60d78aff3so749182566b.2\n for <gcc-patches@gcc.gnu.org>; Tue, 05 May 2026 06:50:09 -0700 (PDT)", "from localhost ([146.70.193.12])\n by smtp.googlemail.com with ESMTPSA id\n a640c23a62f3a-bc1b97a11easm240536366b.44.2026.05.05.06.50.07\n (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n Tue, 05 May 2026 06:50:07 -0700 (PDT)" ], "DKIM-Filter": [ "OpenDKIM Filter v2.11.0 sourceware.org 4A1D04BA900E", "OpenDKIM Filter v2.11.0 sourceware.org 61AFD4BA79B7" ], "DMARC-Filter": "OpenDMARC Filter v1.4.2 sourceware.org 61AFD4BA79B7", "ARC-Filter": "OpenARC Filter v1.0.0 sourceware.org 61AFD4BA79B7", "ARC-Seal": "i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1777989009; cv=none;\n b=v/uy4sKeUKtmD14Rkr2+tfzJCRtWxCy5QVoR0WIHKltspCp//HfPQV+PZeh/CPSPdKMnvvIZsH+r0NJdLiTlDcfr81lAdiiSve9gq1wudNGrUD7UtQtN5PzcI6MKxQM7JrY8popN5aTqiAAaUm7iayqGtBqJZ71NPw+Nj+G3nB0=", "ARC-Message-Signature": "i=1; a=rsa-sha256; d=sourceware.org; s=key;\n t=1777989009; c=relaxed/simple;\n bh=B4sBieq29FtRaqkWoaTnltoPYMTA/J9BQZiLE2mtArY=;\n h=DKIM-Signature:From:To:Subject:Date:Message-ID:MIME-Version;\n b=Nkl3sRQ0ZLEc2lTtmedcQVXa76gkCMG0HtPx8y69FVCVLwjqCIJRYU1sHhA7ikFtED08R/OUe8F1aaUnU2kSdpKE7Bvuyub9y4sCkIH0lzgkKrXDuJISi8t1/7e7MrQXTITu9r/3vGX40eeKnKpEyyK+J4Mw6A+9AGlrTSgJ5hE=", "ARC-Authentication-Results": "i=1; server2.sourceware.org", "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=baylibre-com.20251104.gappssmtp.com; s=20251104; t=1777989008;\n x=1778593808;\n darn=gcc.gnu.org;\n h=content-transfer-encoding:mime-version:references:in-reply-to\n :message-id:date:subject:cc:to:from:from:to:cc:subject:date\n :message-id:reply-to;\n bh=32GgWGiO9idxYmaigETdv0n9Ac5gva+hBSsk879Nn3w=;\n b=kNe4/4KMhD4IXixASHhjBTw+t73awyPzxJGsSv+r9dlokWMzbqo7SkXET8Qic41IYL\n R/8erY0akFqCDd7pcj4671gjUSE/wfVk+k5KcqPossWx9fG+UVwp80RSoKmpeRHalzdU\n bIIT1gc5b+trN6Nu0jk29nRvznmPcJeH3RYYAqk2jARi8/k+nW98fTHoo9V9UFXj0JQi\n qY/VUlMzi3IBwXc9IgRzbhMSQR//JumIZSwb66UWxKBkfVJgZBKYoNFUIaqAmmW4R44d\n Zt3YswAiJkJHyGBm3haI6jBeQ3krAk8XBjWj0q2qZXuzNWyHTVvZ6aEc5A+j1gIFMrpa\n pXmw==", "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=1e100.net; s=20251104; t=1777989008; x=1778593808;\n h=content-transfer-encoding:mime-version:references:in-reply-to\n :message-id:date:subject:cc:to:from:x-gm-gg:x-gm-message-state:from\n :to:cc:subject:date:message-id:reply-to;\n bh=32GgWGiO9idxYmaigETdv0n9Ac5gva+hBSsk879Nn3w=;\n b=q490vrIK7yyHBROPWen7erdooxoydX/8c2drutoV+M6nUgFkGAhOxfFEoJWLoNCFhL\n ayVwuDg7lHL4T1/wMMxhGI2x8+IkPML7eC0N/mGeIpSskNWohUCWGEXSdHDCgL/8j5FT\n jzeJkwcrz24IJEDA6LBypdGVXdsp/wYAKLv36qOfEiuimO5Kaex4cpcTXH73UisylYBU\n X1M3dvSFv+vJ5uvh9HTbPasmfESH3hjWGMBg4qsmWmJPT/H0qSfvZSjYmuEJSI9CokAL\n eIUiwFd5C7y1MeW7LRg7Ll3kCn1uSkOPSLCg9wJaWyZE4VaX6cJoqqK72CQ5bLzk2adw\n ERAQ==", "X-Gm-Message-State": "AOJu0YwpizfTTmxPriEnzPOM8dAJcGCY/c4KiQ+j4eWQ4rcDEJZEQlxZ\n 1SaOqJZWjkihya+VFJ5TUD+9Iti28NMfBYYohK5VJwl/kyFBFSBbbTuaADk+q4YThxwFK2j9fga\n kvdEC6iI=", "X-Gm-Gg": "AeBDietf5ZJkCfTWppHYukVNs6mGPZHmvnZNUmBYlLSOgBjHwp+5qeOrg5Y2TERN2k6\n +uMTaSAuIlQOlykVWOiw+fyNfD1AQvjFHi5bFH/jWpAtgoCH0rvMwf/qt6J7XufbJ7nMMrPo+Ig\n DZxA5G7uT891G330OzTDOf4WLXF3kkStcuiiyjVvN5y0oHv4xj1WIGuFipFNTdR6YjudweK238D\n Gu0qCoHbYP7hL1E6FWkVyV3HPdFb0+vNdj24WusErv1D6nwU37JeNRU+K/pLhWmp4i9IWpecRlg\n 6nXMgaGxxC1FBzS5f+IFrnhCDm4NZ9wTd/hn7YneXj93e8TkQvGT9gk6PRskxBjSsCohzJPxycD\n oBpkTOoyTwTOyAcBtUOhD6xZHVOGmJAr6E64iqnIZnYtmtuO9sErmZlq0yiSh7XMoWVJv4nrAd0\n N5ihqJdyuO1nOqIiKx+G5w7gQfNVsnqq3ODLA=", "X-Received": "by 2002:a17:907:7f90:b0:bbe:4727:a7bb with SMTP id\n a640c23a62f3a-bc40ae536b7mr173649366b.0.1777989007877;\n Tue, 05 May 2026 06:50:07 -0700 (PDT)", "From": "=?utf-8?q?Arsen_Arsenovi=C4=87?= <aarsenovic@baylibre.com>", "To": "gcc-patches@gcc.gnu.org", "Cc": "=?utf-8?q?Arsen_Arsenovi=C4=87?= <aarsenovic@baylibre.com>", "Subject": "[PATCH 1/4] libgomp/gcn: parallelize initializing threads of a team", "Date": "Tue, 5 May 2026 15:14:57 +0200", "Message-ID": "<20260505134929.3522938-2-aarsenovic@baylibre.com>", "X-Mailer": "git-send-email 2.54.0", "In-Reply-To": "<20260505134929.3522938-1-aarsenovic@baylibre.com>", "References": "<20260505134929.3522938-1-aarsenovic@baylibre.com>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "X-BeenThere": "gcc-patches@gcc.gnu.org", "X-Mailman-Version": "2.1.30", "Precedence": "list", "List-Id": "Gcc-patches mailing list <gcc-patches.gcc.gnu.org>", "List-Unsubscribe": "<https://gcc.gnu.org/mailman/options/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>", "List-Archive": "<https://gcc.gnu.org/pipermail/gcc-patches/>", "List-Post": "<mailto:gcc-patches@gcc.gnu.org>", "List-Help": "<mailto:gcc-patches-request@gcc.gnu.org?subject=help>", "List-Subscribe": "<https://gcc.gnu.org/mailman/listinfo/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>", "Errors-To": "gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org" }, "content": "Currently, libgomp performs initialization of all threads in a team\nin its lead thread, and then releases all threads to do work. This\nmeans that, before reaching the release, each thread is doing nothing,\nwaiting for the lead threads to do lots of thread initialization\noperations.\n\nThis initialization is identical for each thread.\n\nWe can parallelize it by performing this initialization in each thread,\nafter releasing each. This allows the threads of a team to be released\nnear-immediately, which should cut team startup time roughly by just\nunder the number of threads.\n\nIn order to achieve this, the lead thread prepares the parameters each\nthread needs for initialization by copying them into an object each will\nbe able to read from, and only initializes each remaining thread in the\nteam with a few pointers.\n\nNo functional changes intended in this commit.\n\nlibgomp/ChangeLog:\n\n\t* libgomp.h (struct gomp_thread_start_data): New struct. Holds\n\tthread-independent parameters needed to initialize current\n\tthread.\n\t(struct gomp_team): On GCN, add thr_start_data field, that holds\n\ta gomp_thread_start_data to be used in each thread.\n\t(struct gomp_thread): Add start_data field, that points to\n\tthread initialization parameters.\n\t* config/gcn/team.c (gomp_team_start): Move thread\n\tinitialization steps into ...\n\t(gomp_prep_our_thread): this new function, such that it reads\n\tfrom a gomp_thread_start_data object.\n\t(gomp_thread_start): Call the above to initialize our thread.\n---\n libgomp/config/gcn/team.c | 121 +++++++++++++++++++++++++-------------\n libgomp/libgomp.h | 31 ++++++++++\n 2 files changed, 112 insertions(+), 40 deletions(-)", "diff": "diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c\nindex c9c2f3c24191..1ca4c6b1266a 100644\n--- a/libgomp/config/gcn/team.c\n+++ b/libgomp/config/gcn/team.c\n@@ -24,6 +24,7 @@\n <http://www.gnu.org/licenses/>. */\n \n /* This file handles maintenance of threads on AMD GCN. */\n+#include <assert.h>\n \n #include \"libgomp.h\"\n #include <stdlib.h>\n@@ -132,6 +133,33 @@ gomp_gcn_exit_kernel (void)\n team_free (gcn_thrs ());\n }\n \n+/* Populate THR from START_DATA. Assumes THR is current thread. Argument is\n+ broken out to avoid repeated calls to gomp_thread, which may be\n+ expensive. */\n+\n+static inline void\n+gomp_prep_our_thread (struct gomp_thread *thr,\n+\t\t struct gomp_thread_start_data *start_data,\n+\t\t int threadid)\n+{\n+ thr->ts.team = start_data->team;\n+ thr->ts.work_share = &start_data->team->work_shares[0];\n+ thr->ts.last_work_share = NULL;\n+ thr->ts.team_id = threadid;\n+ thr->ts.level = start_data->level;\n+ thr->ts.active_level = start_data->active_level;\n+ thr->ts.single_count = 0;\n+ thr->ts.static_trip = 0;\n+ thr->task = &start_data->team->implicit_task[threadid];\n+ gomp_init_task (thr->task, start_data->parent_task, &start_data->prev_icvs);\n+ /* TODO(arsen): This should be part of a mechanism that allows us to override\n+ nthreads-var with OMP_NUM_THREADS. But, we currently don't have access to\n+ that list on the device.\n+\n+ thr->task->icv.nthreads_var = ...; */\n+ thr->task->taskgroup = start_data->taskgroup;\n+}\n+\n /* This function contains the idle loop in which a thread waits\n to be called up to become part of a team. */\n \n@@ -162,6 +190,19 @@ gomp_thread_start (struct gomp_thread_pool *pool)\n \t abort();\n \t }\n \t}\n+\n+ /* Perform rest of task initialization. Populated from\n+\t gomp_team_start. */\n+ if (thr->start_data)\n+\t/* If !start_data, we're probably executing cleanup helpers, so we\n+\t don't really care about initializing these fields. */\n+\t{\n+\t /* On threads other than the main thread, the thread ID within a\n+\t team is always equal to dim_pos(1). */\n+\t gomp_prep_our_thread (thr, thr->start_data, __builtin_gcn_dim_pos (1));\n+\t thr->start_data = NULL;\n+\t}\n+\n thr->fn (thr->data);\n thr->fn = NULL;\n \n@@ -180,61 +221,61 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,\n \t\t struct gomp_taskgroup *taskgroup)\n {\n struct gomp_thread *thr, *nthr;\n- struct gomp_task *task;\n+ struct gomp_task *prev_task;\n struct gomp_task_icv *icv;\n struct gomp_thread_pool *pool;\n- unsigned long nthreads_var;\n \n thr = gomp_thread ();\n pool = thr->thread_pool;\n- task = thr->task;\n- icv = task ? &task->icv : &gomp_global_icv;\n+ prev_task = thr->task;\n+ icv = prev_task ? &prev_task->icv : &gomp_global_icv;\n \n /* Always save the previous state, even if this isn't a nested team.\n In particular, we should save any work share state from an outer\n orphaned work share construct. */\n team->prev_ts = thr->ts;\n \n- thr->ts.team = team;\n- thr->ts.team_id = 0;\n- ++thr->ts.level;\n- if (nthreads > 1)\n- ++thr->ts.active_level;\n- thr->ts.work_share = &team->work_shares[0];\n- thr->ts.last_work_share = NULL;\n- thr->ts.single_count = 0;\n- thr->ts.static_trip = 0;\n- thr->task = &team->implicit_task[0];\n- nthreads_var = icv->nthreads_var;\n- gomp_init_task (thr->task, task, icv);\n- team->implicit_task[0].icv.nthreads_var = nthreads_var;\n- team->implicit_task[0].taskgroup = taskgroup;\n+ /* Populate start data. */\n+ team->thr_start_data = (struct gomp_thread_start_data) {\n+ .team = team,\n+ .level = thr->ts.level + 1,\n+ .active_level = thr->ts.active_level + (nthreads > 1),\n+ .parent_task = thr->task,\n+ .prev_icvs = *icv,\n+ .taskgroup = taskgroup\n+ };\n \n- if (nthreads == 1)\n- return;\n-\n- /* Release existing idle threads. */\n- for (unsigned i = 1; i < nthreads; ++i)\n+ if (nthreads != 1)\n {\n- nthr = pool->threads[i];\n- nthr->ts.team = team;\n- nthr->ts.work_share = &team->work_shares[0];\n- nthr->ts.last_work_share = NULL;\n- nthr->ts.team_id = i;\n- nthr->ts.level = team->prev_ts.level + 1;\n- nthr->ts.active_level = thr->ts.active_level;\n- nthr->ts.single_count = 0;\n- nthr->ts.static_trip = 0;\n- nthr->task = &team->implicit_task[i];\n- gomp_init_task (nthr->task, task, icv);\n- team->implicit_task[i].icv.nthreads_var = nthreads_var;\n- team->implicit_task[i].taskgroup = taskgroup;\n- nthr->fn = fn;\n- nthr->data = data;\n- team->ordered_release[i] = &nthr->release;\n+ /* When there's more than one thread, we expect that we're operating on\n+\t thread w/ dim_pos(1) == 0, and that each of the other initialized\n+\t threads will operate with team_id == dim_pos(1). */\n+ assert (__builtin_gcn_dim_pos (1) == 0);\n+ /* We only expect one team to have more than one active thread. See\n+\t accelerator-specific logic in gomp_resolve_num_threads. */\n+ assert (!thr->ts.active_level);\n+\n+ /* Prepare other threads waiting on our barrier. Besides fn, data,\n+\t taskgroup, all the fields of those threads are initialized based on\n+\t the values initialized in our thread above (which is always the master\n+\t thread). */\n+ for (unsigned i = 1; i < nthreads; ++i)\n+\t{\n+\t nthr = pool->threads[i];\n+\n+\t nthr->start_data = &team->thr_start_data;\n+\t nthr->fn = fn;\n+\t nthr->data = data;\n+\t team->ordered_release[i] = &nthr->release;\n+\t}\n+\n+ /* Release the other threads. */\n+ gomp_simple_barrier_wait (&pool->threads_dock);\n }\n \n- gomp_simple_barrier_wait (&pool->threads_dock);\n+ /* Finish initializing our thread. The thread ID in the team of the caller\n+ is always zero, even if __builtin_gcn_dim_pos (1) != 0. */\n+ gomp_prep_our_thread (thr, &team->thr_start_data, 0);\n }\n \n #include \"../../team.c\"\ndiff --git a/libgomp/libgomp.h b/libgomp/libgomp.h\nindex 42f324392957..c51bd680713f 100644\n--- a/libgomp/libgomp.h\n+++ b/libgomp/libgomp.h\n@@ -775,6 +775,27 @@ struct gomp_target_task\n void *hostaddrs[];\n };\n \n+#ifdef __AMDGCN__\n+/* Parameters needed to kick off new threads on AMD GCN. They correspond to\n+ various fields in gomp_thread. This struct, and all its contents, should\n+ only be modified by gomp_team_start, and stay untouched until the threads\n+ of a team reach the final barrier. */\n+\n+struct gomp_thread_start_data\n+{\n+ /* Team the new thread is part of. */\n+ struct gomp_team *team;\n+ /* Active nesting level. */\n+ unsigned level, active_level;\n+ /* Parent task. */\n+ struct gomp_task *parent_task;\n+ /* Previous ICVs. */\n+ struct gomp_task_icv prev_icvs;\n+ /* Task group for the new threads implicit task. */\n+ struct gomp_taskgroup *taskgroup;\n+};\n+#endif\n+\n /* This structure describes a \"team\" of threads. These are the threads\n that are spawned by a PARALLEL constructs, as well as the work sharing\n constructs that the team encounters. */\n@@ -857,6 +878,11 @@ struct gomp_team\n /* Number of tasks waiting for their completion event to be fulfilled. */\n unsigned int task_detach_count;\n \n+#ifdef __AMDGCN__\n+ /* Used on AMD GCN to inform threads how to launch in a team. */\n+ struct gomp_thread_start_data thr_start_data;\n+#endif\n+\n /* This array contains structures for implicit tasks. */\n struct gomp_task implicit_task[];\n };\n@@ -870,6 +896,11 @@ struct gomp_thread\n void (*fn) (void *data);\n void *data;\n \n+#ifdef __AMDGCN__\n+ /* And these are the parameters it should set. */\n+ struct gomp_thread_start_data *start_data;\n+#endif\n+\n /* This is the current team state for this thread. The ts.team member\n is NULL only if the thread is idle. */\n struct gomp_team_state ts;\n", "prefixes": [ "1/4" ] }