diff mbox series

amdgcn: Add waitcnt after LDS write instructions

Message ID 20200908201947.43277-3-julian@codesourcery.com
State New
Headers show
Series amdgcn: Add waitcnt after LDS write instructions | expand

Commit Message

Julian Brown Sept. 8, 2020, 8:19 p.m. UTC
Data-share write (ds_write) instructions do not necessarily complete
the write to LDS immediately. When a write completes, LGKM_CNT is
decremented. For now, we wait until LGKM_CNT reaches zero after each
ds_write instruction.

This fixes a race condition in the case where LDS is read immediately
after being written. This can happen with broadcast operations.

This may be latent on mainline, since the broadcast machinery isn't
present there yet. Nonetheless, I will apply shortly.

Julian

2020-09-08  Julian Brown  <julian@codesourcery.com>

gcc/
	* config/gcn/gcn-valu.md (scatter<mode>_insn_1offset_ds<exec_scatter>):
	Add waitcnt.
	* config/gcn/gcn.md (*mov<mode>_insn, *movti_insn): Add waitcnt to
	ds_write alternatives.
---
 gcc/config/gcn/gcn-valu.md | 2 +-
 gcc/config/gcn/gcn.md      | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 26559ff765e..e4d7f2a0f49 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -923,7 +923,7 @@ 
   {
     addr_space_t as = INTVAL (operands[3]);
     static char buf[200];
-    sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
+    sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)",
 	     (AS_GDS_P (as) ? " gds" : ""));
     return buf;
   }
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index ed98d2d2706..aeb25fbb931 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -554,7 +554,7 @@ 
   flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
   flat_store_dword\t%A0, %1%O0%g0
   v_mov_b32\t%0, %1
-  ds_write_b32\t%A0, %1%O0
+  ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   s_mov_b32\t%0, %1
   global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
@@ -582,7 +582,7 @@ 
   flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
   flat_store%s0\t%A0, %1%O0%g0
   v_mov_b32\t%0, %1
-  ds_write%b0\t%A0, %1%O0
+  ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
   global_store%s0\t%A0, %1%O0%g0"
@@ -611,7 +611,7 @@ 
   #
   flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
   flat_store_dwordx2\t%A0, %1%O0%g0
-  ds_write_b64\t%A0, %1%O0
+  ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
   global_store_dwordx2\t%A0, %1%O0%g0"
@@ -667,7 +667,7 @@ 
   #
   global_store_dwordx4\t%A0, %1%O0%g0
   global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  ds_write_b128\t%A0, %1%O0
+  ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
   "reload_completed
    && REG_P (operands[0])