Patchwork [RFC] OpenMP 3.1 atomics

login
register
mail settings
Submitter Jakub Jelinek
Date March 3, 2011, 4:54 p.m.
Message ID <20110303165432.GW30899@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/85299/
State New
Headers show

Comments

Jakub Jelinek - March 3, 2011, 4:54 p.m.
Hi!

This patch is a WIP patch for OpenMP 3.1 atomics, so far for C FE only.
It handles parsing (I created 3 new tree codes for atomic read,
atomic capture of the old and of the new value), gimplification and
omp expansion thereof, but currently both atomic read and atomic write
are implemented always pretty expensively using __sync_val_compare_and_swap.

I think we probably want to implement them as volatile read or write
(or perhaps just non-volatile read with some barriers around?), but probably
need some target hook to tell us what loads/stores aren't actually atomic
(say on old alpha char/short/int loads and stores aren't atomic).

Any comments?


	Jakub
Jakub Jelinek - April 21, 2011, 1:03 p.m.
On Thu, Mar 03, 2011 at 05:54:32PM +0100, Jakub Jelinek wrote:
> This patch is a WIP patch for OpenMP 3.1 atomics, so far for C FE only.
> It handles parsing (I created 3 new tree codes for atomic read,
> atomic capture of the old and of the new value), gimplification and
> omp expansion thereof, but currently both atomic read and atomic write
> are implemented always pretty expensively using __sync_val_compare_and_swap.
> 
> I think we probably want to implement them as volatile read or write
> (or perhaps just non-volatile read with some barriers around?), but probably
> need some target hook to tell us what loads/stores aren't actually atomic
> (say on old alpha char/short/int loads and stores aren't atomic).

I've now committed the patch as is to gomp-3_1-branch, so that I can
continue working on C++ and Fortran 3.1 atomics parsing.
Comments/suggestions about the omp-low.c implementation of atomic reads
and writes are welcome.

Here is ChangeLog for the committed bits.

2011-04-21  Jakub Jelinek  <jakub@redhat.com>

	* c-parser.c (c_parser_omp_atomic): Handle parsing
	OpenMP 3.1 atomics.  Adjust c_finish_omp_atomic caller.
	* tree.def (OMP_ATOMIC_READ, OMP_ATOMIC_CAPTURE_OLD,
	OMP_ATOMIC_CAPTURE_NEW): New.
	* gimple.h (GF_OMP_ATOMIC_NEED_VALUE): New.
	(gimple_omp_atomic_need_value_p, gimple_omp_atomic_set_need_value):
	New inlines.
	* gimplify.c (gimplify_omp_atomic, gimplify_expr): Handle
	OMP_ATOMIC_READ, OMP_ATOMIC_CAPTURE_OLD and OMP_ATOMIC_CAPTURE_NEW.
	* omp-low.c (expand_omp_atomic_load, expand_omp_atomic_store): New
	functions.
	(expand_omp_atomic_fetch_op): Handle cases where old or new
	value is needed afterwards.
	(expand_omp_atomic): Call expand_omp_atomic_load resp.
	expand_omp_atomic_store.
	* tree-pretty-print.c (dump_generic_node): Handle
	OMP_ATOMIC_READ, OMP_ATOMIC_CAPTURE_OLD and OMP_ATOMIC_CAPTURE_NEW.

	* c-common.h (c_finish_omp_atomic): Adjust prototype.
	* c-omp.c (c_finish_omp_atomic): Add OPCODE, V and LHS1 arguments.
	Handle OMP_ATOMIC_READ, OMP_ATOMIC_CAPTURE_OLD and
	OMP_ATOMIC_CAPTURE_NEW in addition to OMP_ATOMIC.

	* semantics.c (finish_omp_atomic): Adjust c_finish_omp_atomic
	caller.

	* testsuite/libgomp.c/atomic-11.c: New test.
	* testsuite/libgomp.c/atomic-12.c: New test.

	Jakub

Patch

--- gcc/c-parser.c.jj	2011-02-24 14:20:36.000000000 +0100
+++ gcc/c-parser.c	2011-03-03 16:57:07.000000000 +0100
@@ -9006,20 +9006,114 @@  c_parser_omp_structured_block (c_parser 
 
   where x is an lvalue expression with scalar type.
 
+   OpenMP 3.1:
+   # pragma omp atomic read new-line
+     read-stmt
+
+   # pragma omp atomic write new-line
+     write-stmt
+
+   # pragma omp atomic update new-line
+     expression-stmt
+
+   # pragma omp atomic capture new-line
+     capture-stmt
+
+   # pragma omp atomic capture new-line
+     capture-block
+
+   read-stmt:
+     v = x
+   write-stmt:
+     x = expr
+   capture-stmt:
+     v = x binop= expr | v = x++ | v = ++x | v = x-- | v = --x
+   capture-block:
+     { v = x; x binop= expr; } | { x binop= expr; v = x; }
+
+  where x and v are lvalue expressions with scalar type.
+
   LOC is the location of the #pragma token.  */
 
 static void
 c_parser_omp_atomic (location_t loc, c_parser *parser)
 {
-  tree lhs, rhs;
-  tree stmt;
-  enum tree_code code;
+  tree lhs = NULL_TREE, rhs = NULL_TREE, v = NULL_TREE, lhs1 = NULL_TREE;
+  tree stmt, orig_lhs;
+  enum tree_code code = OMP_ATOMIC, opcode = NOP_EXPR;
   struct c_expr rhs_expr;
+  bool structured_block = false;
+
+  if (c_parser_next_token_is (parser, CPP_NAME))
+    {
+      const char *p = IDENTIFIER_POINTER (c_parser_peek_token (parser)->value);
 
+      if (!strcmp (p, "read"))
+	code = OMP_ATOMIC_READ;
+      else if (!strcmp (p, "write"))
+	code = NOP_EXPR;
+      else if (!strcmp (p, "update"))
+	code = OMP_ATOMIC;
+      else if (!strcmp (p, "capture"))
+	code = OMP_ATOMIC_CAPTURE_NEW;
+      else
+	p = NULL;
+      if (p)
+	c_parser_consume_token (parser);
+    }
   c_parser_skip_to_pragma_eol (parser);
 
+  switch (code)
+    {
+    case OMP_ATOMIC_READ:
+    case NOP_EXPR: /* atomic write */
+      v = c_parser_unary_expression (parser).value;
+      v = c_fully_fold (v, false, NULL);
+      if (v == error_mark_node)
+	goto saw_error;
+      loc = c_parser_peek_token (parser)->location;
+      if (!c_parser_require (parser, CPP_EQ, "expected %<=%>"))
+	goto saw_error;
+      lhs = c_parser_unary_expression (parser).value;
+      lhs = c_fully_fold (lhs, false, NULL);
+      if (lhs == error_mark_node)
+	goto saw_error;
+      if (code == NOP_EXPR)
+	{
+	  /* atomic write is represented by OMP_ATOMIC with NOP_EXPR
+	     opcode.  */
+	  code = OMP_ATOMIC;
+	  rhs = lhs;
+	  lhs = v;
+	  v = NULL_TREE;
+	}
+      goto done;
+    case OMP_ATOMIC_CAPTURE_NEW:
+      if (c_parser_next_token_is (parser, CPP_OPEN_BRACE))
+	{
+	  c_parser_consume_token (parser);
+	  structured_block = true;
+	}
+      else
+	{
+	  v = c_parser_unary_expression (parser).value;
+	  v = c_fully_fold (v, false, NULL);
+	  if (v == error_mark_node)
+	    goto saw_error;
+	  if (!c_parser_require (parser, CPP_EQ, "expected %<=%>"))
+	    goto saw_error;
+	}
+      break;
+    default:
+      break;
+    }
+
+  /* For structured_block case we don't know yet whether
+     old or new x should be captured.  */
+restart:
   lhs = c_parser_unary_expression (parser).value;
   lhs = c_fully_fold (lhs, false, NULL);
+  orig_lhs = lhs;
   switch (TREE_CODE (lhs))
     {
     case ERROR_MARK:
@@ -9027,17 +9121,23 @@  c_parser_omp_atomic (location_t loc, c_p
       c_parser_skip_to_end_of_block_or_statement (parser);
       return;
 
-    case PREINCREMENT_EXPR:
     case POSTINCREMENT_EXPR:
+      if (code == OMP_ATOMIC_CAPTURE_NEW)
+	code = OMP_ATOMIC_CAPTURE_OLD;
+      /* FALLTHROUGH */
+    case PREINCREMENT_EXPR:
       lhs = TREE_OPERAND (lhs, 0);
-      code = PLUS_EXPR;
+      opcode = PLUS_EXPR;
       rhs = integer_one_node;
       break;
 
-    case PREDECREMENT_EXPR:
     case POSTDECREMENT_EXPR:
+      if (code == OMP_ATOMIC_CAPTURE_NEW)
+	code = OMP_ATOMIC_CAPTURE_OLD;
+      /* FALLTHROUGH */
+    case PREDECREMENT_EXPR:
       lhs = TREE_OPERAND (lhs, 0);
-      code = MINUS_EXPR;
+      opcode = MINUS_EXPR;
       rhs = integer_one_node;
       break;
 
@@ -9062,7 +9162,10 @@  c_parser_omp_atomic (location_t loc, c_p
 	      /* This is pre or post increment.  */
 	      rhs = TREE_OPERAND (lhs, 1);
 	      lhs = TREE_OPERAND (lhs, 0);
-	      code = NOP_EXPR;
+	      opcode = NOP_EXPR;
+	      if (code == OMP_ATOMIC_CAPTURE_NEW
+		  && TREE_CODE (orig_lhs) == COMPOUND_EXPR)
+		code = OMP_ATOMIC_CAPTURE_OLD;
 	      break;
 	    }
 	  if (TREE_CODE (TREE_OPERAND (lhs, 1)) == TRUTH_NOT_EXPR
@@ -9072,7 +9175,10 @@  c_parser_omp_atomic (location_t loc, c_p
 	      /* This is pre or post decrement.  */
 	      rhs = TREE_OPERAND (lhs, 1);
 	      lhs = TREE_OPERAND (lhs, 0);
-	      code = NOP_EXPR;
+	      opcode = NOP_EXPR;
+	      if (code == OMP_ATOMIC_CAPTURE_NEW
+		  && TREE_CODE (orig_lhs) == COMPOUND_EXPR)
+		code = OMP_ATOMIC_CAPTURE_OLD;
 	      break;
 	    }
 	}
@@ -9081,32 +9187,48 @@  c_parser_omp_atomic (location_t loc, c_p
       switch (c_parser_peek_token (parser)->type)
 	{
 	case CPP_MULT_EQ:
-	  code = MULT_EXPR;
+	  opcode = MULT_EXPR;
 	  break;
 	case CPP_DIV_EQ:
-	  code = TRUNC_DIV_EXPR;
+	  opcode = TRUNC_DIV_EXPR;
 	  break;
 	case CPP_PLUS_EQ:
-	  code = PLUS_EXPR;
+	  opcode = PLUS_EXPR;
 	  break;
 	case CPP_MINUS_EQ:
-	  code = MINUS_EXPR;
+	  opcode = MINUS_EXPR;
 	  break;
 	case CPP_LSHIFT_EQ:
-	  code = LSHIFT_EXPR;
+	  opcode = LSHIFT_EXPR;
 	  break;
 	case CPP_RSHIFT_EQ:
-	  code = RSHIFT_EXPR;
+	  opcode = RSHIFT_EXPR;
 	  break;
 	case CPP_AND_EQ:
-	  code = BIT_AND_EXPR;
+	  opcode = BIT_AND_EXPR;
 	  break;
 	case CPP_OR_EQ:
-	  code = BIT_IOR_EXPR;
+	  opcode = BIT_IOR_EXPR;
 	  break;
 	case CPP_XOR_EQ:
-	  code = BIT_XOR_EXPR;
+	  opcode = BIT_XOR_EXPR;
 	  break;
+	case CPP_EQ:
+	  if (structured_block && code == OMP_ATOMIC_CAPTURE_NEW)
+	    {
+	      code = OMP_ATOMIC_CAPTURE_OLD;
+	      v = lhs;
+	      lhs = NULL_TREE;
+	      c_parser_consume_token (parser);
+	      lhs1 = c_parser_unary_expression (parser).value;
+	      lhs1 = c_fully_fold (lhs1, false, NULL);
+	      if (lhs1 == error_mark_node)
+		goto saw_error;
+	      if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>"))
+		goto saw_error;
+	      goto restart;
+	    }
+	  /* FALLTHROUGH */
 	default:
 	  c_parser_error (parser,
 			  "invalid operator for %<#pragma omp atomic%>");
@@ -9126,10 +9248,33 @@  c_parser_omp_atomic (location_t loc, c_p
       rhs = c_fully_fold (rhs, false, NULL);
       break;
     }
-  stmt = c_finish_omp_atomic (loc, code, lhs, rhs);
+  if (structured_block && code == OMP_ATOMIC_CAPTURE_NEW)
+    {
+      if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>"))
+	goto saw_error;
+      v = c_parser_unary_expression (parser).value;
+      v = c_fully_fold (v, false, NULL);
+      if (v == error_mark_node)
+	goto saw_error;
+      if (!c_parser_require (parser, CPP_EQ, "expected %<=%>"))
+	goto saw_error;
+      lhs1 = c_parser_unary_expression (parser).value;
+      lhs1 = c_fully_fold (lhs1, false, NULL);
+      if (lhs1 == error_mark_node)
+	goto saw_error;
+    }
+  if (structured_block)
+    {
+      c_parser_skip_until_found (parser, CPP_SEMICOLON, "expected %<;%>");
+      c_parser_require (parser, CPP_CLOSE_BRACE, "expected %<}%>");
+    }
+done:
+  stmt = c_finish_omp_atomic (loc, code, opcode, lhs, rhs, v, lhs1);
   if (stmt != error_mark_node)
     add_stmt (stmt);
-  c_parser_skip_until_found (parser, CPP_SEMICOLON, "expected %<;%>");
+
+  if (!structured_block)
+    c_parser_skip_until_found (parser, CPP_SEMICOLON, "expected %<;%>");
 }
 
 
--- gcc/tree.def.jj	2011-02-24 14:20:36.000000000 +0100
+++ gcc/tree.def	2011-02-28 20:31:13.000000000 +0100
@@ -1040,6 +1040,22 @@  DEFTREECODE (OMP_CRITICAL, "omp_critical
 	build_fold_indirect_ref of the address.  */
 DEFTREECODE (OMP_ATOMIC, "omp_atomic", tcc_statement, 2)
 
+/* OpenMP - #pragma omp atomic read
+   Operand 0: The address at which the atomic operation is to be performed.
+	This address should be stabilized with save_expr.  */
+DEFTREECODE (OMP_ATOMIC_READ, "omp_atomic_read", tcc_statement, 1)
+
+/* OpenMP - #pragma omp atomic capture
+   Operand 0: The address at which the atomic operation is to be performed.
+	This address should be stabilized with save_expr.
+   Operand 1: The expression to evaluate.  When the old value of the object
+	at the address is used in the expression, it should appear as if
+	build_fold_indirect_ref of the address.
+   OMP_ATOMIC_CAPTURE_OLD returns the old memory content,
+   OMP_ATOMIC_CAPTURE_NEW the new value.  */
+DEFTREECODE (OMP_ATOMIC_CAPTURE_OLD, "omp_atomic_capture_old", tcc_statement, 2)
+DEFTREECODE (OMP_ATOMIC_CAPTURE_NEW, "omp_atomic_capture_new", tcc_statement, 2)
+
 /* OpenMP clauses.  */
 DEFTREECODE (OMP_CLAUSE, "omp_clause", tcc_exceptional, 0)
 
--- gcc/cp/semantics.c.jj	2011-02-24 14:18:07.000000000 +0100
+++ gcc/cp/semantics.c	2011-02-28 19:27:17.000000000 +0100
@@ -4584,7 +4584,8 @@  finish_omp_atomic (enum tree_code code, 
     }
   if (!dependent_p)
     {
-      stmt = c_finish_omp_atomic (input_location, code, lhs, rhs);
+      stmt = c_finish_omp_atomic (input_location, OMP_ATOMIC, code, lhs, rhs,
+				  NULL_TREE, NULL_TREE);
       if (stmt == error_mark_node)
 	return;
     }
--- gcc/gimple.h.jj	2011-02-24 14:20:36.000000000 +0100
+++ gcc/gimple.h	2011-02-24 14:20:36.000000000 +0100
@@ -111,6 +111,7 @@  enum gf_mask {
     GF_OMP_RETURN_NOWAIT	= 1 << 0,
 
     GF_OMP_SECTION_LAST		= 1 << 0,
+    GF_OMP_ATOMIC_NEED_VALUE	= 1 << 0,
     GF_PREDICT_TAKEN		= 1 << 15
 };
 
@@ -1618,6 +1619,29 @@  gimple_omp_parallel_set_combined_p (gimp
 }
 
 
+/* Return true if OMP atomic load/store statement G has the
+   GF_OMP_ATOMIC_NEED_VALUE flag set.  */
+
+static inline bool
+gimple_omp_atomic_need_value_p (const_gimple g)
+{
+  if (gimple_code (g) != GIMPLE_OMP_ATOMIC_LOAD)
+    GIMPLE_CHECK (g, GIMPLE_OMP_ATOMIC_STORE);
+  return (gimple_omp_subcode (g) & GF_OMP_ATOMIC_NEED_VALUE) != 0;
+}
+
+
+/* Set the GF_OMP_ATOMIC_NEED_VALUE flag on G.  */
+
+static inline void
+gimple_omp_atomic_set_need_value (gimple g)
+{
+  if (gimple_code (g) != GIMPLE_OMP_ATOMIC_LOAD)
+    GIMPLE_CHECK (g, GIMPLE_OMP_ATOMIC_STORE);
+  g->gsbase.subcode |= GF_OMP_ATOMIC_NEED_VALUE;
+}
+
+
 /* Return the number of operands for statement GS.  */
 
 static inline unsigned
--- gcc/c-family/c-common.h.jj	2011-02-24 14:11:18.000000000 +0100
+++ gcc/c-family/c-common.h	2011-02-28 19:22:50.000000000 +0100
@@ -995,7 +995,8 @@  extern tree c_finish_omp_master (locatio
 extern tree c_finish_omp_critical (location_t, tree, tree);
 extern tree c_finish_omp_ordered (location_t, tree);
 extern void c_finish_omp_barrier (location_t);
-extern tree c_finish_omp_atomic (location_t, enum tree_code, tree, tree);
+extern tree c_finish_omp_atomic (location_t, enum tree_code, enum tree_code,
+				 tree, tree, tree, tree);
 extern void c_finish_omp_flush (location_t);
 extern void c_finish_omp_taskwait (location_t);
 extern tree c_finish_omp_for (location_t, tree, tree, tree, tree, tree, tree);
--- gcc/c-family/c-omp.c.jj	2011-02-24 14:11:18.000000000 +0100
+++ gcc/c-family/c-omp.c	2011-03-01 12:42:15.000000000 +0100
@@ -96,18 +96,24 @@  c_finish_omp_taskwait (location_t loc)
 }
 
 
-/* Complete a #pragma omp atomic construct.  The expression to be
-   implemented atomically is LHS code= RHS.  LOC is the location of
-   the atomic statement.  The value returned is either error_mark_node
-   (if the construct was erroneous) or an OMP_ATOMIC node which should
-   be added to the current statement tree with add_stmt.*/
+/* Complete a #pragma omp atomic construct.  For CODE OMP_ATOMIC
+   the expression to be implemented atomically is LHS opcode= RHS. 
+   For OMP_ATOMIC_READ V = LHS, for OMP_ATOMIC_CAPTURE_{NEW,OLD} LHS
+   opcode= RHS with the new or old content of LHS returned.
+   LOC is the location of the atomic statement.  The value returned
+   is either error_mark_node (if the construct was erroneous) or an
+   OMP_ATOMIC* node which should be added to the current statement
+   tree with add_stmt.  */
 
 tree
-c_finish_omp_atomic (location_t loc, enum tree_code code, tree lhs, tree rhs)
+c_finish_omp_atomic (location_t loc, enum tree_code code,
+		     enum tree_code opcode, tree lhs, tree rhs,
+		     tree v, tree lhs1)
 {
   tree x, type, addr;
 
-  if (lhs == error_mark_node || rhs == error_mark_node)
+  if (lhs == error_mark_node || rhs == error_mark_node
+      || v == error_mark_node || lhs1 == error_mark_node)
     return error_mark_node;
 
   /* ??? According to one reading of the OpenMP spec, complex type are
@@ -143,10 +149,19 @@  c_finish_omp_atomic (location_t loc, enu
     }
   lhs = build_indirect_ref (loc, addr, RO_NULL);
 
+  if (code == OMP_ATOMIC_READ)
+    {
+      x = build1 (OMP_ATOMIC_READ, type, addr);
+      SET_EXPR_LOCATION (x, loc);
+      return build_modify_expr (loc, v, NULL_TREE, NOP_EXPR,
+			        loc, x, NULL_TREE);
+      return x;
+    }
+
   /* There are lots of warnings, errors, and conversions that need to happen
      in the course of interpreting a statement.  Use the normal mechanisms
      to do this, and then take it apart again.  */
-  x = build_modify_expr (input_location, lhs, NULL_TREE, code,
+  x = build_modify_expr (input_location, lhs, NULL_TREE, opcode,
       			 input_location, rhs, NULL_TREE);
   if (x == error_mark_node)
     return error_mark_node;
@@ -154,8 +169,40 @@  c_finish_omp_atomic (location_t loc, enu
   rhs = TREE_OPERAND (x, 1);
 
   /* Punt the actual generation of atomic operations to common code.  */
-  x = build2 (OMP_ATOMIC, void_type_node, addr, rhs);
+  if (code == OMP_ATOMIC)
+    type = void_type_node;
+  x = build2 (code, type, addr, rhs);
   SET_EXPR_LOCATION (x, loc);
+
+  if (code != OMP_ATOMIC)
+    {
+      /* Generally it is hard to prove lhs1 and lhs are the same memory
+	 location, just diagnose different variables.  */
+      if (lhs1 && TREE_CODE (lhs1) == VAR_DECL && TREE_CODE (lhs) == VAR_DECL)
+	{
+	  if (lhs1 != lhs)
+	    {
+	      error_at (loc, "%<#pragma omp atomic capture%> uses two different variables for memory");
+	      return error_mark_node;
+	    }
+	}
+      x = build_modify_expr (loc, v, NULL_TREE, NOP_EXPR,
+			     loc, x, NULL_TREE);
+      if (lhs1 && lhs1 != lhs)
+	{
+	  tree lhs1addr = build_unary_op (loc, ADDR_EXPR, lhs1, 0);
+	  if (lhs1addr == error_mark_node)
+	    return error_mark_node;
+	  if (code == OMP_ATOMIC_CAPTURE_OLD)
+	    x = omit_one_operand_loc (loc, type, x, lhs1addr);
+	  else
+	    {
+	      x = save_expr (x);
+	      x = omit_two_operands_loc (loc, type, x, lhs1addr, x);
+	    }
+	}
+    }
+
   return x;
 }
 
--- gcc/gimplify.c.jj	2011-02-24 14:20:36.000000000 +0100
+++ gcc/gimplify.c	2011-03-03 14:29:08.000000000 +0100
@@ -6456,24 +6456,45 @@  static enum gimplify_status
 gimplify_omp_atomic (tree *expr_p, gimple_seq *pre_p)
 {
   tree addr = TREE_OPERAND (*expr_p, 0);
-  tree rhs = TREE_OPERAND (*expr_p, 1);
+  tree rhs = TREE_CODE (*expr_p) == OMP_ATOMIC_READ
+	     ? NULL : TREE_OPERAND (*expr_p, 1);
   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
   tree tmp_load;
+  gimple loadstmt, storestmt;
 
-   tmp_load = create_tmp_reg (type, NULL);
-   if (goa_stabilize_expr (&rhs, pre_p, addr, tmp_load) < 0)
-     return GS_ERROR;
-
-   if (gimplify_expr (&addr, pre_p, NULL, is_gimple_val, fb_rvalue)
-       != GS_ALL_DONE)
-     return GS_ERROR;
-
-   gimplify_seq_add_stmt (pre_p, gimple_build_omp_atomic_load (tmp_load, addr));
-   if (gimplify_expr (&rhs, pre_p, NULL, is_gimple_val, fb_rvalue)
-       != GS_ALL_DONE)
-     return GS_ERROR;
-   gimplify_seq_add_stmt (pre_p, gimple_build_omp_atomic_store (rhs));
-   *expr_p = NULL;
+  tmp_load = create_tmp_reg (type, NULL);
+  if (rhs && goa_stabilize_expr (&rhs, pre_p, addr, tmp_load) < 0)
+    return GS_ERROR;
+
+  if (gimplify_expr (&addr, pre_p, NULL, is_gimple_val, fb_rvalue)
+      != GS_ALL_DONE)
+    return GS_ERROR;
+
+  loadstmt = gimple_build_omp_atomic_load (tmp_load, addr);
+  gimplify_seq_add_stmt (pre_p, loadstmt);
+  if (rhs && gimplify_expr (&rhs, pre_p, NULL, is_gimple_val, fb_rvalue)
+      != GS_ALL_DONE)
+    return GS_ERROR;
+
+  if (TREE_CODE (*expr_p) == OMP_ATOMIC_READ)
+    rhs = tmp_load;
+  storestmt = gimple_build_omp_atomic_store (rhs);
+  gimplify_seq_add_stmt (pre_p, storestmt);
+  switch (TREE_CODE (*expr_p))
+    {
+    case OMP_ATOMIC_READ:
+    case OMP_ATOMIC_CAPTURE_OLD:
+      *expr_p = tmp_load;
+      gimple_omp_atomic_set_need_value (loadstmt);
+      break;
+    case OMP_ATOMIC_CAPTURE_NEW:
+      *expr_p = rhs;
+      gimple_omp_atomic_set_need_value (storestmt);
+      break;
+    default:
+      *expr_p = NULL;
+      break;
+    }
 
    return GS_ALL_DONE;
 }
@@ -7175,6 +7196,9 @@  gimplify_expr (tree *expr_p, gimple_seq 
 	  }
 
 	case OMP_ATOMIC:
+	case OMP_ATOMIC_READ:
+	case OMP_ATOMIC_CAPTURE_OLD:
+	case OMP_ATOMIC_CAPTURE_NEW:
 	  ret = gimplify_omp_atomic (expr_p, pre_p);
 	  break;
 
--- gcc/omp-low.c.jj	2011-02-24 14:20:35.000000000 +0100
+++ gcc/omp-low.c	2011-03-03 15:44:03.000000000 +0100
@@ -4924,6 +4924,31 @@  expand_omp_synch (struct omp_region *reg
 }
 
 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
+   operation as a normal volatile load.  */
+
+static bool
+expand_omp_atomic_load (basic_block load_bb, tree addr, tree loaded_val)
+{
+  /* FIXME */
+  (void) load_bb;
+  (void) addr;
+  (void) loaded_val;
+  return false;
+}
+
+/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
+   operation as a normal volatile store.  */
+
+static bool
+expand_omp_atomic_store (basic_block load_bb, tree addr)
+{
+  /* FIXME */
+  (void) load_bb;
+  (void) addr;
+  return false;
+}
+
+/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
    operation as a __sync_fetch_and_op builtin.  INDEX is log2 of the
    size of the data type, and thus usable to find the index of the builtin
    decl.  Returns false if the expression is not of the proper form.  */
@@ -4933,14 +4958,15 @@  expand_omp_atomic_fetch_op (basic_block 
 			    tree addr, tree loaded_val,
 			    tree stored_val, int index)
 {
-  enum built_in_function base;
+  enum built_in_function oldbase, newbase;
   tree decl, itype, call;
-  direct_optab optab;
-  tree rhs;
+  direct_optab optab, oldoptab, newoptab;
+  tree lhs, rhs;
   basic_block store_bb = single_succ (load_bb);
   gimple_stmt_iterator gsi;
   gimple stmt;
   location_t loc;
+  bool need_old, need_new;
 
   /* We expect to find the following sequences:
 
@@ -4964,6 +4990,9 @@  expand_omp_atomic_fetch_op (basic_block 
   gsi_next (&gsi);
   if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
     return false;
+  need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
+  need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
+  gcc_checking_assert (!need_old || !need_new);
 
   if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
     return false;
@@ -4973,24 +5002,39 @@  expand_omp_atomic_fetch_op (basic_block 
     {
     case PLUS_EXPR:
     case POINTER_PLUS_EXPR:
-      base = BUILT_IN_FETCH_AND_ADD_N;
+      oldbase = BUILT_IN_FETCH_AND_ADD_N;
+      newbase = BUILT_IN_ADD_AND_FETCH_N;
       optab = sync_add_optab;
+      oldoptab = sync_old_add_optab;
+      newoptab = sync_new_add_optab;
       break;
     case MINUS_EXPR:
-      base = BUILT_IN_FETCH_AND_SUB_N;
+      oldbase = BUILT_IN_FETCH_AND_SUB_N;
+      newbase = BUILT_IN_SUB_AND_FETCH_N;
       optab = sync_add_optab;
+      oldoptab = sync_old_add_optab;
+      newoptab = sync_new_add_optab;
       break;
     case BIT_AND_EXPR:
-      base = BUILT_IN_FETCH_AND_AND_N;
+      oldbase = BUILT_IN_FETCH_AND_AND_N;
+      newbase = BUILT_IN_AND_AND_FETCH_N;
       optab = sync_and_optab;
+      oldoptab = sync_old_and_optab;
+      newoptab = sync_new_and_optab;
       break;
     case BIT_IOR_EXPR:
-      base = BUILT_IN_FETCH_AND_OR_N;
+      oldbase = BUILT_IN_FETCH_AND_OR_N;
+      newbase = BUILT_IN_OR_AND_FETCH_N;
       optab = sync_ior_optab;
+      oldoptab = sync_old_ior_optab;
+      newoptab = sync_new_ior_optab;
       break;
     case BIT_XOR_EXPR:
-      base = BUILT_IN_FETCH_AND_XOR_N;
+      oldbase = BUILT_IN_FETCH_AND_XOR_N;
+      newbase = BUILT_IN_XOR_AND_FETCH_N;
       optab = sync_xor_optab;
+      oldoptab = sync_old_xor_optab;
+      newoptab = sync_new_xor_optab;
       break;
     default:
       return false;
@@ -5004,18 +5048,47 @@  expand_omp_atomic_fetch_op (basic_block 
   else
     return false;
 
-  decl = built_in_decls[base + index + 1];
+  decl = built_in_decls[(need_new ? newbase : oldbase) + index + 1];
   itype = TREE_TYPE (TREE_TYPE (decl));
 
-  if (direct_optab_handler (optab, TYPE_MODE (itype)) == CODE_FOR_nothing)
+  if (need_new)
+    {
+      /* expand_sync_fetch_operation can always compensate when interested
+	 in the new value.  */
+      if (direct_optab_handler (newoptab, TYPE_MODE (itype))
+	  == CODE_FOR_nothing
+	  && direct_optab_handler (oldoptab, TYPE_MODE (itype))
+	     == CODE_FOR_nothing)
+	return false;
+    }
+  else if (need_old)
+    {
+      /* When interested in the old value, expand_sync_fetch_operation
+	 can compensate only if the operation is reversible.  AND and OR
+	 are not reversible.  */
+      if (direct_optab_handler (oldoptab, TYPE_MODE (itype))
+	  == CODE_FOR_nothing
+	  && (oldbase == BUILT_IN_FETCH_AND_AND_N
+	      || oldbase == BUILT_IN_FETCH_AND_OR_N
+	      || direct_optab_handler (newoptab, TYPE_MODE (itype))
+		 == CODE_FOR_nothing))
+	return false;
+    }
+  else if (direct_optab_handler (optab, TYPE_MODE (itype)) == CODE_FOR_nothing)
     return false;
 
   gsi = gsi_last_bb (load_bb);
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
-  call = build_call_expr_loc (loc,
-			  decl, 2, addr,
-			  fold_convert_loc (loc, itype, rhs));
-  call = fold_convert_loc (loc, void_type_node, call);
+  call = build_call_expr_loc (loc, decl, 2, addr,
+			      fold_convert_loc (loc, itype, rhs));
+  if (need_old || need_new)
+    {
+      lhs = need_old ? loaded_val : stored_val;
+      call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
+      call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
+    }
+  else
+    call = fold_convert_loc (loc, void_type_node, call);
   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
   gsi_remove (&gsi, true);
 
@@ -5294,6 +5367,25 @@  expand_omp_atomic (struct omp_region *re
       /* __sync builtins require strict data alignment.  */
       if (exact_log2 (align) >= index)
 	{
+	  /* Atomic load.  FIXME: have some target hook signalize what loads
+	     are actually atomic?  */
+	  if (loaded_val == stored_val
+	      && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
+		  || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
+	      && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
+	      && expand_omp_atomic_load (load_bb, addr, loaded_val))
+	    return;
+
+	  /* Atomic store.  FIXME: have some target hook signalize what
+	     stores are actually atomic?  */
+	  if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
+	       || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
+	      && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
+	      && store_bb == single_succ (load_bb)
+	      && first_stmt (store_bb) == store
+	      && expand_omp_atomic_store (load_bb, addr))
+	    return;
+
 	  /* When possible, use specialized atomic update functions.  */
 	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
 	      && store_bb == single_succ (load_bb))
--- gcc/tree-pretty-print.c.jj	2011-02-24 14:20:35.000000000 +0100
+++ gcc/tree-pretty-print.c	2011-03-01 12:43:49.000000000 +0100
@@ -2165,6 +2165,24 @@  dump_generic_node (pretty_printer *buffe
       dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
       break;
 
+    case OMP_ATOMIC_READ:
+      pp_string (buffer, "#pragma omp atomic read");
+      newline_and_indent (buffer, spc + 2);
+      dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+      pp_space (buffer);
+      break;
+
+    case OMP_ATOMIC_CAPTURE_OLD:
+    case OMP_ATOMIC_CAPTURE_NEW:
+      pp_string (buffer, "#pragma omp atomic capture");
+      newline_and_indent (buffer, spc + 2);
+      dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+      pp_space (buffer);
+      pp_character (buffer, '=');
+      pp_space (buffer);
+      dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
+      break;
+
     case OMP_SINGLE:
       pp_string (buffer, "#pragma omp single");
       dump_omp_clauses (buffer, OMP_SINGLE_CLAUSES (node), spc, flags);
--- libgomp/testsuite/libgomp.c/atomic-11.c.jj	2011-03-03 16:28:27.000000000 +0100
+++ libgomp/testsuite/libgomp.c/atomic-11.c	2011-03-03 16:28:17.000000000 +0100
@@ -0,0 +1,112 @@ 
+/* { dg-do run } */
+
+extern void abort (void);
+int x = 6;
+float y;
+
+int
+main (void)
+{
+  int v;
+  float f;
+  #pragma omp atomic read
+    v = x;
+  if (v != 6)
+    abort ();
+  #pragma omp atomic write
+    x = 17;
+  #pragma omp atomic read
+  v = x;
+  if (v != 17)
+    abort ();
+  #pragma omp atomic update
+    x++;
+  #pragma omp atomic read
+    v = x;
+  if (v != 18)
+    abort ();
+  #pragma omp atomic capture
+    v = x++;
+  if (v != 18)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 19)
+    abort ();
+  #pragma omp atomic capture
+    v = ++x;
+  if (v != 20)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 20)
+    abort ();
+  #pragma omp atomic capture
+    { v = x; x *= 3; }
+  if (v != 20)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 60)
+    abort ();
+  #pragma omp atomic capture
+    {
+      x |= 2;
+      v = x;
+    }
+  if (v != 62)
+    abort ();
+  #pragma omp atomic read
+    v = x;
+  if (v != 62)
+    abort ();
+  #pragma omp atomic write
+    y = 17.5f;
+  #pragma omp atomic read
+    f = y;
+  if (f != 17.5)
+    abort ();
+  #pragma omp atomic update
+    y *= 2.0f;
+  #pragma omp atomic read
+    f = y;
+  if (y != 35.0)
+    abort ();
+  #pragma omp atomic capture
+    f = y *= 2.0f;
+  if (f != 70.0)
+    abort ();
+  #pragma omp atomic capture
+    f = y++;
+  if (f != 70.0)
+    abort ();
+  #pragma omp atomic read
+    f = y;
+  if (f != 71.0)
+    abort ();
+  #pragma omp atomic capture
+    f = --y;
+  if (f != 70.0)
+    abort ();
+  #pragma omp atomic read
+    f = y;
+  if (f != 70.0)
+    abort ();
+  #pragma omp atomic capture
+    { f = y; y /= 2.0f; }
+  if (f != 70.0)
+    abort ();
+  #pragma omp atomic read
+    f = y;
+  if (f != 35.0)
+    abort ();
+  #pragma omp atomic capture
+    { y /= 2.0f; f = y; }
+  if (f != 17.5)
+    abort ();
+  #pragma omp atomic read
+    f = y;
+  if (f != 17.5)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/atomic-12.c.jj	2011-03-03 17:01:19.000000000 +0100
+++ libgomp/testsuite/libgomp.c/atomic-12.c	2011-03-03 17:00:11.000000000 +0100
@@ -0,0 +1,44 @@ 
+/* { dg-do run } */
+
+extern void abort (void);
+_Bool v, x1, x2, x3, x4, x5, x6, x7, x8;
+
+void
+foo (void)
+{
+  #pragma omp atomic capture
+  v = ++x1;
+  if (!v)
+    abort ();
+  #pragma omp atomic capture
+  v = x2++;
+  if (v)
+    abort ();
+  #pragma omp atomic capture
+  v = --x3;
+  if (v)
+    abort ();
+  #pragma omp atomic capture
+  v = x4--;
+  if (!v)
+    abort ();
+  #pragma omp atomic capture
+  { v = x5; x5 |= 1; }
+  if (v)
+    abort ();
+  #pragma omp atomic capture
+  { x6 |= 1; v = x6; }
+  if (!v)
+    abort ();
+}
+
+int
+main ()
+{
+  #pragma omp atomic write
+  x3 = 1;
+  #pragma omp atomic write
+  x4 = 1;
+  foo ();
+  return 0;
+}