diff mbox

[Fortran] Add library support for coarray's atomic intrinsics

Message ID 53C14ED4.6000806@net-b.de
State New
Headers show

Commit Message

Tobias Burnus July 12, 2014, 3:05 p.m. UTC
This patch is relative to the still unreviewed patch 
https://gcc.gnu.org/ml/gcc-patches/2014-07/msg00864.html

With this patch, all of Fortran 2008's and TS18508's atomics should be 
supported with both -fcoarray=single and =lib (with libcaf_single). 
Still missing is the support in the MPI and GASNet multi-image 
libraries, which is supposed to get released soon. However, I think 
adding atomics support to libcaf_mpi should be very simple.

I haven't included a -fdump-tree-original test case, but in the pending 
patch is coarray/atomic_2.f90, which a run-time test which is also run 
with -fcoarray=lib and -lcaf_single.

The patch has been build and regtested on x86-64-gnu-linux.
OK for the trunk?

Tobias

Comments

Paul Richard Thomas July 12, 2014, 6:42 p.m. UTC | #1
Dear Tobias,

That's good for trunk.

Thanks for the patch.

Paul

On 12 July 2014 17:05, Tobias Burnus <burnus@net-b.de> wrote:
> This patch is relative to the still unreviewed patch
> https://gcc.gnu.org/ml/gcc-patches/2014-07/msg00864.html
>
> With this patch, all of Fortran 2008's and TS18508's atomics should be
> supported with both -fcoarray=single and =lib (with libcaf_single). Still
> missing is the support in the MPI and GASNet multi-image libraries, which is
> supposed to get released soon. However, I think adding atomics support to
> libcaf_mpi should be very simple.
>
> I haven't included a -fdump-tree-original test case, but in the pending
> patch is coarray/atomic_2.f90, which a run-time test which is also run with
> -fcoarray=lib and -lcaf_single.
>
> The patch has been build and regtested on x86-64-gnu-linux.
> OK for the trunk?
>
> Tobias
diff mbox

Patch

2014-07-10  Tobias Burnus  <burnus@net-b.de>

gcc/fortran/
	* libgfortran.h (libcaf_atomic_codes): Add.
	* trans-decl.c (gfor_fndecl_caf_atomic_def,
	gfor_fndecl_caf_atomic_ref, gfor_fndecl_caf_atomic_cas,
	gfor_fndecl_caf_atomic_op): New variables.
	(gfc_build_builtin_function_decls): Initialize them.
	* trans.h (gfor_fndecl_caf_atomic_def,
	gfor_fndecl_caf_atomic_ref, gfor_fndecl_caf_atomic_cas,
	gfor_fndecl_caf_atomic_op): New variables.
	* trans-intrinsic.c (conv_intrinsic_atomic_op,
	conv_intrinsic_atomic_ref, conv_intrinsic_atomic_cas):
	Add library calls with -fcoarray=lib.

libgfortran/
	* caf/libcaf.h (_gfortran_caf_atomic_define,
	_gfortran_caf_atomic_ref, _gfortran_caf_atomic_op,
	_gfortran_caf_atomic_cas): New prototypes.
	* caf/single.c (_gfortran_caf_atomic_define,
	_gfortran_caf_atomic_ref, _gfortran_caf_atomic_op,
	_gfortran_caf_atomic_cas): New functions.

diff --git a/gcc/fortran/libgfortran.h b/gcc/fortran/libgfortran.h
index b90dac6..df5c14f 100644
--- a/gcc/fortran/libgfortran.h
+++ b/gcc/fortran/libgfortran.h
@@ -120,6 +120,14 @@  typedef enum
 }
 libgfortran_stat_codes;
 
+typedef enum
+{
+  GFC_CAF_ATOMIC_ADD = 1,
+  GFC_CAF_ATOMIC_AND,
+  GFC_CAF_ATOMIC_OR,
+  GFC_CAF_ATOMIC_XOR
+} libcaf_atomic_codes;
+
 /* Default unit number for preconnected standard input and output.  */
 #define GFC_STDIN_UNIT_NUMBER 5
 #define GFC_STDOUT_UNIT_NUMBER 6
diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c
index 00ac010..4db10be 100644
--- a/gcc/fortran/trans-decl.c
+++ b/gcc/fortran/trans-decl.c
@@ -141,6 +141,10 @@  tree gfor_fndecl_caf_sync_all;
 tree gfor_fndecl_caf_sync_images;
 tree gfor_fndecl_caf_error_stop;
 tree gfor_fndecl_caf_error_stop_str;
+tree gfor_fndecl_caf_atomic_def;
+tree gfor_fndecl_caf_atomic_ref;
+tree gfor_fndecl_caf_atomic_cas;
+tree gfor_fndecl_caf_atomic_op;
 tree gfor_fndecl_co_max;
 tree gfor_fndecl_co_min;
 tree gfor_fndecl_co_sum;
@@ -3391,6 +3395,28 @@  gfc_build_builtin_function_decls (void)
       /* CAF's ERROR STOP doesn't return.  */
       TREE_THIS_VOLATILE (gfor_fndecl_caf_error_stop_str) = 1;
 
+      gfor_fndecl_caf_atomic_def = gfc_build_library_function_decl_with_spec (
+	get_identifier (PREFIX("caf_atomic_define")), "R..RW",
+	void_type_node, 7, pvoid_type_node, size_type_node, integer_type_node,
+        pvoid_type_node, pint_type, integer_type_node, integer_type_node);
+
+      gfor_fndecl_caf_atomic_ref = gfc_build_library_function_decl_with_spec (
+	get_identifier (PREFIX("caf_atomic_ref")), "R..WW",
+	void_type_node, 7, pvoid_type_node, size_type_node, integer_type_node,
+        pvoid_type_node, pint_type, integer_type_node, integer_type_node);
+
+      gfor_fndecl_caf_atomic_cas = gfc_build_library_function_decl_with_spec (
+	get_identifier (PREFIX("caf_atomic_cas")), "R..WRRW",
+	void_type_node, 9, pvoid_type_node, size_type_node, integer_type_node,
+        pvoid_type_node, pvoid_type_node, pvoid_type_node, pint_type,
+	integer_type_node, integer_type_node);
+
+      gfor_fndecl_caf_atomic_op = gfc_build_library_function_decl_with_spec (
+	get_identifier (PREFIX("caf_atomic_op")), ".R..RWW",
+	void_type_node, 9, integer_type_node, pvoid_type_node, size_type_node,
+	integer_type_node, pvoid_type_node, pvoid_type_node, pint_type,
+	integer_type_node, integer_type_node);
+
       gfor_fndecl_co_max = gfc_build_library_function_decl_with_spec (
 	get_identifier (PREFIX("caf_co_max")), "W.WW",
 	void_type_node, 6, pvoid_type_node, integer_type_node,
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index a285e9d..57b7f4d 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -7007,7 +7007,7 @@  gfc_conv_intrinsic_loc (gfc_se * se, gfc_expr * expr)
     gfc_conv_expr_reference (se, arg_expr);
   else
     gfc_conv_array_parameter (se, arg_expr, true, NULL, NULL, NULL);
-  se->expr= convert (gfc_get_int_type (gfc_index_integer_kind), se->expr);
+  se->expr = convert (gfc_get_int_type (gfc_index_integer_kind), se->expr);
    
   /* Create a temporary variable for loc return value.  Without this, 
      we get an error an ICE in gcc/expr.c(expand_expr_addr_expr_1).  */
@@ -8341,11 +8341,11 @@  conv_co_minmaxsum (gfc_code *code)
 static tree
 conv_intrinsic_atomic_op (gfc_code *code)
 {
-  gfc_se atom, value, old;
-  tree tmp;
+  gfc_se argse;
+  tree tmp, atom, value, old = NULL_TREE, stat = NULL_TREE;
   stmtblock_t block, post_block;
   gfc_expr *atom_expr = code->ext.actual->expr;
-  gfc_expr *stat;
+  gfc_expr *stat_expr;
   built_in_function fn;
 
   if (atom_expr->expr_type == EXPR_FUNCTION
@@ -8355,15 +8355,129 @@  conv_intrinsic_atomic_op (gfc_code *code)
 
   gfc_start_block (&block);
   gfc_init_block (&post_block);
-  gfc_init_se (&atom, NULL);
-  gfc_init_se (&value, NULL);
-  atom.want_pointer = 1;
-  gfc_conv_expr (&atom, atom_expr);
-  gfc_add_block_to_block (&block, &atom.pre);
-  gfc_add_block_to_block (&post_block, &atom.post);
-  gfc_conv_expr (&value, code->ext.actual->next->expr);
-  gfc_add_block_to_block (&block, &value.pre);
-  gfc_add_block_to_block (&post_block, &value.post);
+
+  gfc_init_se (&argse, NULL);
+  argse.want_pointer = 1;
+  gfc_conv_expr (&argse, atom_expr);
+  gfc_add_block_to_block (&block, &argse.pre);
+  gfc_add_block_to_block (&post_block, &argse.post);
+  atom = argse.expr;
+
+  gfc_init_se (&argse, NULL);
+  if (gfc_option.coarray == GFC_FCOARRAY_LIB
+      && code->ext.actual->next->expr->ts.kind == atom_expr->ts.kind)
+    argse.want_pointer = 1;
+  gfc_conv_expr (&argse, code->ext.actual->next->expr);
+  gfc_add_block_to_block (&block, &argse.pre);
+  gfc_add_block_to_block (&post_block, &argse.post);
+  value = argse.expr;
+
+  switch (code->resolved_isym->id)
+    {
+    case GFC_ISYM_ATOMIC_ADD:
+    case GFC_ISYM_ATOMIC_AND:
+    case GFC_ISYM_ATOMIC_DEF:
+    case GFC_ISYM_ATOMIC_OR:
+    case GFC_ISYM_ATOMIC_XOR:
+      stat_expr = code->ext.actual->next->next->expr;
+      if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+	old = null_pointer_node;
+      break;
+    default:
+      gfc_init_se (&argse, NULL);
+      if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+	argse.want_pointer = 1;
+      gfc_conv_expr (&argse, code->ext.actual->next->next->expr);
+      gfc_add_block_to_block (&block, &argse.pre);
+      gfc_add_block_to_block (&post_block, &argse.post);
+      old = argse.expr;
+      stat_expr = code->ext.actual->next->next->next->expr;
+    }
+
+  /* STAT=  */
+  if (stat_expr != NULL)
+    {
+      gcc_assert (stat_expr->expr_type == EXPR_VARIABLE);
+      gfc_init_se (&argse, NULL);
+      if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+	argse.want_pointer = 1;
+      gfc_conv_expr_val (&argse, stat_expr);
+      gfc_add_block_to_block (&block, &argse.pre);
+      gfc_add_block_to_block (&post_block, &argse.post);
+      stat = argse.expr;
+    }
+  else if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+    stat = null_pointer_node;
+
+  if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+    {
+      tree image_index, caf_decl, offset, token;
+      int op;
+
+      switch (code->resolved_isym->id)
+	{
+	case GFC_ISYM_ATOMIC_ADD:
+	case GFC_ISYM_ATOMIC_FETCH_ADD:
+	  op = (int) GFC_CAF_ATOMIC_ADD;
+	  break;
+	case GFC_ISYM_ATOMIC_AND:
+	case GFC_ISYM_ATOMIC_FETCH_AND:
+	  op = (int) GFC_CAF_ATOMIC_AND;
+	  break;
+	case GFC_ISYM_ATOMIC_OR:
+	case GFC_ISYM_ATOMIC_FETCH_OR:
+	  op = (int) GFC_CAF_ATOMIC_OR;
+	  break;
+	case GFC_ISYM_ATOMIC_XOR:
+	case GFC_ISYM_ATOMIC_FETCH_XOR:
+	  op = (int) GFC_CAF_ATOMIC_XOR;
+	  break;
+	case GFC_ISYM_ATOMIC_DEF:
+	  op = 0;  /* Unused.  */
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      caf_decl = gfc_get_tree_for_caf_expr (atom_expr);
+      if (TREE_CODE (TREE_TYPE (caf_decl)) == REFERENCE_TYPE)
+	caf_decl = build_fold_indirect_ref_loc (input_location, caf_decl);
+
+      if (gfc_is_coindexed (atom_expr))
+	image_index = caf_get_image_index (&block, atom_expr, caf_decl);
+      else
+	image_index = integer_zero_node;
+
+      if (TREE_TYPE (TREE_TYPE (atom)) != TREE_TYPE (TREE_TYPE (value)))
+	{
+	  tmp = gfc_create_var (TREE_TYPE (TREE_TYPE (atom)), "value");
+	  gfc_add_modify (&block, tmp, fold_convert (TREE_TYPE (tmp), value));
+          value = gfc_build_addr_expr (NULL_TREE, tmp);
+	}
+
+      get_caf_token_offset (&token, &offset, caf_decl, atom, atom_expr);
+
+      if (code->resolved_isym->id == GFC_ISYM_ATOMIC_DEF)
+	tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_atomic_def, 7,
+				   token, offset, image_index, value, stat,
+				   build_int_cst (integer_type_node,
+						  (int) atom_expr->ts.type),
+				   build_int_cst (integer_type_node,
+						  (int) atom_expr->ts.kind));
+      else
+	tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_atomic_op, 9,
+				   build_int_cst (integer_type_node, op),
+				   token, offset, image_index, value, old, stat,
+				   build_int_cst (integer_type_node,
+						  (int) atom_expr->ts.type),
+				   build_int_cst (integer_type_node,
+						  (int) atom_expr->ts.kind));
+
+      gfc_add_expr_to_block (&block, tmp);
+      gfc_add_block_to_block (&block, &post_block);
+      return gfc_finish_block (&block);
+    }
+
 
   switch (code->resolved_isym->id)
     {
@@ -8390,12 +8504,12 @@  conv_intrinsic_atomic_op (gfc_code *code)
       gcc_unreachable ();
     }
 
-  tmp = TREE_TYPE (TREE_TYPE (atom.expr));
+  tmp = TREE_TYPE (TREE_TYPE (atom));
   fn = (built_in_function) ((int) fn
 			    + exact_log2 (tree_to_uhwi (TYPE_SIZE_UNIT (tmp)))
 			    + 1);
   tmp = builtin_decl_explicit (fn);
-  tree itype = TREE_TYPE (TREE_TYPE (atom.expr));
+  tree itype = TREE_TYPE (TREE_TYPE (atom));
   tmp = builtin_decl_explicit (fn);
 
   switch (code->resolved_isym->id)
@@ -8405,37 +8519,21 @@  conv_intrinsic_atomic_op (gfc_code *code)
     case GFC_ISYM_ATOMIC_DEF:
     case GFC_ISYM_ATOMIC_OR:
     case GFC_ISYM_ATOMIC_XOR:
-      stat = code->ext.actual->next->next->expr;
-      tmp = build_call_expr_loc (input_location, tmp, 3, atom.expr,
-				 fold_convert (itype, value.expr),
+      tmp = build_call_expr_loc (input_location, tmp, 3, atom,
+				 fold_convert (itype, value),
 				 build_int_cst (NULL, MEMMODEL_RELAXED));
       gfc_add_expr_to_block (&block, tmp);
       break;
     default:
-      stat = code->ext.actual->next->next->next->expr;
-      gfc_init_se (&old, NULL);
-      gfc_conv_expr (&old, code->ext.actual->next->next->expr);
-      gfc_add_block_to_block (&block, &old.pre);
-      gfc_add_block_to_block (&post_block, &old.post);
-      tmp = build_call_expr_loc (input_location, tmp, 3, atom.expr,
-				 fold_convert (itype, value.expr),
+      tmp = build_call_expr_loc (input_location, tmp, 3, atom,
+				 fold_convert (itype, value),
 				 build_int_cst (NULL, MEMMODEL_RELAXED));
-      gfc_add_modify (&block, old.expr,
-		      fold_convert (TREE_TYPE (old.expr), tmp));
+      gfc_add_modify (&block, old, fold_convert (TREE_TYPE (old), tmp));
       break;
     }
 
-  /* STAT=  */
-  if (stat != NULL)
-    {
-      gcc_assert (stat->expr_type == EXPR_VARIABLE);
-      gfc_init_se (&value, NULL);
-      gfc_conv_expr_val (&value, stat);
-      gfc_add_block_to_block (&block, &value.pre);
-      gfc_add_block_to_block (&post_block, &value.post);
-      gfc_add_modify (&block, value.expr,
-		      build_int_cst (TREE_TYPE (value.expr), 0));
-    }
+  if (stat != NULL_TREE)
+    gfc_add_modify (&block, stat, build_int_cst (TREE_TYPE (stat), 0));
   gfc_add_block_to_block (&block, &post_block);
   return gfc_finish_block (&block);
 }
@@ -8444,8 +8542,8 @@  conv_intrinsic_atomic_op (gfc_code *code)
 static tree
 conv_intrinsic_atomic_ref (gfc_code *code)
 {
-  gfc_se atom, value;
-  tree tmp;
+  gfc_se argse;
+  tree tmp, atom, value, stat = NULL_TREE;
   stmtblock_t block, post_block;
   built_in_function fn;
   gfc_expr *atom_expr = code->ext.actual->next->expr;
@@ -8457,39 +8555,75 @@  conv_intrinsic_atomic_ref (gfc_code *code)
 
   gfc_start_block (&block);
   gfc_init_block (&post_block);
-  gfc_init_se (&atom, NULL);
-  gfc_init_se (&value, NULL);
-  atom.want_pointer = 1;
-  gfc_conv_expr (&value, code->ext.actual->expr);
-  gfc_add_block_to_block (&block, &value.pre);
-  gfc_add_block_to_block (&post_block, &value.post);
-  gfc_conv_expr (&atom, atom_expr);
-  gfc_add_block_to_block (&block, &atom.pre);
-  gfc_add_block_to_block (&post_block, &atom.post);
-
-  tmp = TREE_TYPE (TREE_TYPE (atom.expr));
-  fn = (built_in_function) ((int) BUILT_IN_ATOMIC_LOAD_N
-			    + exact_log2 (tree_to_uhwi (TYPE_SIZE_UNIT (tmp)))
-			    + 1);
-  tmp = builtin_decl_explicit (fn);
-  tmp = build_call_expr_loc (input_location, tmp, 2, atom.expr,
-			     build_int_cst (integer_type_node,
-					    MEMMODEL_RELAXED));
-  gfc_add_modify (&block, value.expr,
-		  fold_convert (TREE_TYPE (value.expr), tmp));
-  
+  gfc_init_se (&argse, NULL);
+  argse.want_pointer = 1;
+  gfc_conv_expr (&argse, atom_expr);
+  gfc_add_block_to_block (&block, &argse.pre);
+  gfc_add_block_to_block (&post_block, &argse.post);
+  atom = argse.expr;
+
+  gfc_init_se (&argse, NULL);
+  if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+    argse.want_pointer = 1;
+  gfc_conv_expr (&argse, code->ext.actual->expr);
+  gfc_add_block_to_block (&block, &argse.pre);
+  gfc_add_block_to_block (&post_block, &argse.post);
+  value = argse.expr;
+
   /* STAT=  */
   if (code->ext.actual->next->next->expr != NULL)
     {
       gcc_assert (code->ext.actual->next->next->expr->expr_type
 		  == EXPR_VARIABLE);
-      gfc_init_se (&value, NULL);
-      gfc_conv_expr_val (&value, code->ext.actual->next->next->expr);
-      gfc_add_block_to_block (&block, &value.pre);
-      gfc_add_block_to_block (&post_block, &value.post);
-      gfc_add_modify (&block, value.expr,
-		      build_int_cst (TREE_TYPE (value.expr), 0));
+      gfc_init_se (&argse, NULL);
+      if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+	argse.want_pointer = 1;
+      gfc_conv_expr_val (&argse, code->ext.actual->next->next->expr);
+      gfc_add_block_to_block (&block, &argse.pre);
+      gfc_add_block_to_block (&post_block, &argse.post);
+      stat = argse.expr;
+    }
+  else if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+    stat = null_pointer_node;
+
+  if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+    {
+      tree image_index, caf_decl, offset, token;
+
+      caf_decl = gfc_get_tree_for_caf_expr (atom_expr);
+      if (TREE_CODE (TREE_TYPE (caf_decl)) == REFERENCE_TYPE)
+	caf_decl = build_fold_indirect_ref_loc (input_location, caf_decl);
+
+      if (gfc_is_coindexed (atom_expr))
+	image_index = caf_get_image_index (&block, atom_expr, caf_decl);
+      else
+	image_index = integer_zero_node;
+
+      get_caf_token_offset (&token, &offset, caf_decl, atom, atom_expr);
+
+      tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_atomic_ref, 7,
+				 token, offset, image_index, value, stat,
+				 build_int_cst (integer_type_node,
+						(int) atom_expr->ts.type),
+				 build_int_cst (integer_type_node,
+						(int) atom_expr->ts.kind));
+      gfc_add_expr_to_block (&block, tmp);
+      gfc_add_block_to_block (&block, &post_block);
+      return gfc_finish_block (&block);
     }
+
+  tmp = TREE_TYPE (TREE_TYPE (atom));
+  fn = (built_in_function) ((int) BUILT_IN_ATOMIC_LOAD_N
+			    + exact_log2 (tree_to_uhwi (TYPE_SIZE_UNIT (tmp)))
+			    + 1);
+  tmp = builtin_decl_explicit (fn);
+  tmp = build_call_expr_loc (input_location, tmp, 2, atom,
+			     build_int_cst (integer_type_node,
+					    MEMMODEL_RELAXED));
+  gfc_add_modify (&block, value, fold_convert (TREE_TYPE (value), tmp));
+
+  if (stat != NULL_TREE)
+    gfc_add_modify (&block, stat, build_int_cst (TREE_TYPE (stat), 0));
   gfc_add_block_to_block (&block, &post_block);
   return gfc_finish_block (&block);
 }
@@ -8499,7 +8633,7 @@  static tree
 conv_intrinsic_atomic_cas (gfc_code *code)
 {
   gfc_se argse;
-  tree tmp, atom, old, new_val, comp;
+  tree tmp, atom, old, new_val, comp, stat = NULL_TREE;
   stmtblock_t block, post_block;
   built_in_function fn;
   gfc_expr *atom_expr = code->ext.actual->expr;
@@ -8517,23 +8651,89 @@  conv_intrinsic_atomic_cas (gfc_code *code)
   atom = argse.expr;
 
   gfc_init_se (&argse, NULL);
+  if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+    argse.want_pointer = 1;
   gfc_conv_expr (&argse, code->ext.actual->next->expr);
   gfc_add_block_to_block (&block, &argse.pre);
   gfc_add_block_to_block (&post_block, &argse.post);
   old = argse.expr;
 
   gfc_init_se (&argse, NULL);
+  if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+    argse.want_pointer = 1;
   gfc_conv_expr (&argse, code->ext.actual->next->next->expr);
   gfc_add_block_to_block (&block, &argse.pre);
   gfc_add_block_to_block (&post_block, &argse.post);
   comp = argse.expr;
 
   gfc_init_se (&argse, NULL);
+  if (gfc_option.coarray == GFC_FCOARRAY_LIB
+      && code->ext.actual->next->next->next->expr->ts.kind
+	 == atom_expr->ts.kind)
+    argse.want_pointer = 1;
   gfc_conv_expr (&argse, code->ext.actual->next->next->next->expr);
   gfc_add_block_to_block (&block, &argse.pre);
   gfc_add_block_to_block (&post_block, &argse.post);
   new_val = argse.expr;
 
+  /* STAT=  */
+  if (code->ext.actual->next->next->next->next->expr != NULL)
+    {
+      gcc_assert (code->ext.actual->next->next->next->next->expr->expr_type
+		  == EXPR_VARIABLE);
+      gfc_init_se (&argse, NULL);
+      if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+	argse.want_pointer = 1;
+      gfc_conv_expr_val (&argse,
+			 code->ext.actual->next->next->next->next->expr);
+      gfc_add_block_to_block (&block, &argse.pre);
+      gfc_add_block_to_block (&post_block, &argse.post);
+      stat = argse.expr;
+    }
+  else if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+    stat = null_pointer_node;
+
+  if (gfc_option.coarray == GFC_FCOARRAY_LIB)
+    {
+      tree image_index, caf_decl, offset, token;
+
+      caf_decl = gfc_get_tree_for_caf_expr (atom_expr);
+      if (TREE_CODE (TREE_TYPE (caf_decl)) == REFERENCE_TYPE)
+	caf_decl = build_fold_indirect_ref_loc (input_location, caf_decl);
+
+      if (gfc_is_coindexed (atom_expr))
+	image_index = caf_get_image_index (&block, atom_expr, caf_decl);
+      else
+	image_index = integer_zero_node;
+
+      if (TREE_TYPE (TREE_TYPE (new_val)) != TREE_TYPE (TREE_TYPE (old)))
+	{
+	  tmp = gfc_create_var (TREE_TYPE (TREE_TYPE (old)), "new");
+	  gfc_add_modify (&block, tmp, fold_convert (TREE_TYPE (tmp), new_val));
+          new_val = gfc_build_addr_expr (NULL_TREE, tmp);
+	}
+
+      /* Convert a constant to a pointer.  */
+      if (!POINTER_TYPE_P (TREE_TYPE (comp)))
+	{
+	  tmp = gfc_create_var (TREE_TYPE (TREE_TYPE (old)), "comp");
+	  gfc_add_modify (&block, tmp, fold_convert (TREE_TYPE (tmp), comp));
+          comp = gfc_build_addr_expr (NULL_TREE, tmp);
+	}
+
+      get_caf_token_offset (&token, &offset, caf_decl, atom, atom_expr);
+
+      tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_atomic_cas, 9,
+				 token, offset, image_index, old, comp, new_val,
+				 stat, build_int_cst (integer_type_node,
+						      (int) atom_expr->ts.type),
+				 build_int_cst (integer_type_node,
+						(int) atom_expr->ts.kind));
+      gfc_add_expr_to_block (&block, tmp);
+      gfc_add_block_to_block (&block, &post_block);
+      return gfc_finish_block (&block);
+    }
+
   tmp = TREE_TYPE (TREE_TYPE (atom));
   fn = (built_in_function) ((int) BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N
 			    + exact_log2 (tree_to_uhwi (TYPE_SIZE_UNIT (tmp)))
@@ -8549,19 +8749,8 @@  conv_intrinsic_atomic_cas (gfc_code *code)
 			     build_int_cst (NULL, MEMMODEL_RELAXED));
   gfc_add_expr_to_block (&block, tmp);
   
-  /* STAT=  */
-  if (code->ext.actual->next->next->next->next->expr != NULL)
-    {
-      gcc_assert (code->ext.actual->next->next->next->next->expr->expr_type
-		  == EXPR_VARIABLE);
-      gfc_init_se (&argse, NULL);
-      gfc_conv_expr_val (&argse,
-			 code->ext.actual->next->next->next->next->expr);
-      gfc_add_block_to_block (&block, &argse.pre);
-      gfc_add_block_to_block (&post_block, &argse.post);
-      gfc_add_modify (&block, argse.expr,
-		      build_int_cst (TREE_TYPE (argse.expr), 0));
-    }
+  if (stat != NULL_TREE)
+    gfc_add_modify (&block, stat, build_int_cst (TREE_TYPE (stat), 0));
   gfc_add_block_to_block (&block, &post_block);
   return gfc_finish_block (&block);
 }
diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h
index 472b841..bae51bf 100644
--- a/gcc/fortran/trans.h
+++ b/gcc/fortran/trans.h
@@ -720,6 +720,10 @@  extern GTY(()) tree gfor_fndecl_caf_sync_all;
 extern GTY(()) tree gfor_fndecl_caf_sync_images;
 extern GTY(()) tree gfor_fndecl_caf_error_stop;
 extern GTY(()) tree gfor_fndecl_caf_error_stop_str;
+extern GTY(()) tree gfor_fndecl_caf_atomic_def;
+extern GTY(()) tree gfor_fndecl_caf_atomic_ref;
+extern GTY(()) tree gfor_fndecl_caf_atomic_cas;
+extern GTY(()) tree gfor_fndecl_caf_atomic_op;
 extern GTY(()) tree gfor_fndecl_co_max;
 extern GTY(()) tree gfor_fndecl_co_min;
 extern GTY(()) tree gfor_fndecl_co_sum;
diff --git a/libgfortran/caf/libcaf.h b/libgfortran/caf/libcaf.h
index 2c97880..0ae7135 100644
--- a/libgfortran/caf/libcaf.h
+++ b/libgfortran/caf/libcaf.h
@@ -128,4 +128,13 @@  void _gfortran_caf_send (caf_token_t, size_t, int, gfc_descriptor_t *,
 void _gfortran_caf_sendget (caf_token_t, size_t, int, gfc_descriptor_t *,
 			    caf_vector_t *, caf_token_t, size_t, int,
 			    gfc_descriptor_t *, caf_vector_t *, int, int);
+
+void _gfortran_caf_atomic_define (caf_token_t, size_t, int, void *, int *,
+				  int, int);
+void _gfortran_caf_atomic_ref (caf_token_t, size_t, int, void *, int *,
+			       int, int);
+void _gfortran_caf_atomic_cas (caf_token_t, size_t, int, void *, void *,
+			       void *, int *, int, int);
+void _gfortran_caf_atomic_op (int, caf_token_t, size_t, int, void *, void *,
+			      int *, int, int);
 #endif  /* LIBCAF_H  */
diff --git a/libgfortran/caf/single.c b/libgfortran/caf/single.c
index d053c50..1f5da72 100644
--- a/libgfortran/caf/single.c
+++ b/libgfortran/caf/single.c
@@ -28,6 +28,7 @@  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #include <stdlib.h> /* For exit and malloc.  */
 #include <string.h> /* For memcpy and memset.  */
 #include <stdarg.h> /* For variadic arguments.  */
+#include <assert.h>
 
 /* Define GFC_CAF_CHECK to enable run-time checking.  */
 /* #define GFC_CAF_CHECK  1  */
@@ -774,3 +775,92 @@  _gfortran_caf_sendget (caf_token_t dst_token, size_t dst_offset,
 		      src, dst_len, src_len);
   GFC_DESCRIPTOR_DATA (src) = src_base;
 }
+
+
+void
+_gfortran_caf_atomic_define (caf_token_t token, size_t offset,
+			     int image_index __attribute__ ((unused)),
+			     void *value, int *stat,
+			     int type __attribute__ ((unused)), int kind)
+{
+  assert(kind == 4);
+
+  uint32_t *atom = (uint32_t *) ((char *) TOKEN (token) + offset);
+
+  __atomic_store (atom, (uint32_t *) value, __ATOMIC_RELAXED);
+
+  if (stat)
+    *stat = 0;
+}
+
+void
+_gfortran_caf_atomic_ref (caf_token_t token, size_t offset,
+			  int image_index __attribute__ ((unused)),
+			  void *value, int *stat,
+			  int type __attribute__ ((unused)), int kind)
+{
+  assert(kind == 4);
+
+  uint32_t *atom = (uint32_t *) ((char *) TOKEN (token) + offset);
+
+  __atomic_load (atom, (uint32_t *) value, __ATOMIC_RELAXED);
+
+  if (stat)
+    *stat = 0;
+}
+
+
+void
+_gfortran_caf_atomic_cas (caf_token_t token, size_t offset,
+			  int image_index __attribute__ ((unused)),
+			  void *old, void *compare, void *new_val, int *stat,
+			  int type __attribute__ ((unused)), int kind)
+{
+  assert(kind == 4);
+
+  uint32_t *atom = (uint32_t *) ((char *) TOKEN (token) + offset);
+
+  *(uint32_t *) old = *(uint32_t *) compare;
+  (void) __atomic_compare_exchange_n (atom, (uint32_t *) old,
+				      *(uint32_t *) new_val, false,
+				      __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+  if (stat)
+    *stat = 0;
+}
+
+
+void
+_gfortran_caf_atomic_op (int op, caf_token_t token, size_t offset,
+			 int image_index __attribute__ ((unused)),
+			 void *value, void *old, int *stat,
+			 int type __attribute__ ((unused)), int kind)
+{
+  assert(kind == 4);
+
+  uint32_t res;
+  uint32_t *atom = (uint32_t *) ((char *) TOKEN (token) + offset);
+
+  switch (op)
+    {
+    case GFC_CAF_ATOMIC_ADD:
+      res = __atomic_fetch_add (atom, *(uint32_t *) value, __ATOMIC_RELAXED);
+      break;
+    case GFC_CAF_ATOMIC_AND:
+      res = __atomic_fetch_and (atom, *(uint32_t *) value, __ATOMIC_RELAXED);
+      break;
+    case GFC_CAF_ATOMIC_OR:
+      res = __atomic_fetch_or (atom, *(uint32_t *) value, __ATOMIC_RELAXED);
+      break;
+    case GFC_CAF_ATOMIC_XOR:
+      res = __atomic_fetch_xor (atom, *(uint32_t *) value, __ATOMIC_RELAXED);
+      break;
+    default:
+      __builtin_unreachable();
+    }
+
+  if (old)
+    *(uint32_t *) old = res;
+
+  if (stat)
+    *stat = 0;
+}