--- gcc/omp-low.c.jj	2013-01-11 09:02:35.000000000 +0100
+++ gcc/omp-low.c	2013-02-06 10:42:19.016346866 +0100
@@ -757,12 +757,20 @@ use_pointer_for_field (tree decl, omp_co
       if (TREE_ADDRESSABLE (decl))
 	return true;
 
+      /* lower_send_shared_vars only uses copy-in, but not copy-out
+	 for these.  */
+      if (TREE_READONLY (decl)
+	  || ((TREE_CODE (decl) == RESULT_DECL
+	       || TREE_CODE (decl) == PARM_DECL)
+	      && DECL_BY_REFERENCE (decl)))
+	return false;
+
       /* Disallow copy-in/out in nested parallel if
 	 decl is shared in outer parallel, otherwise
 	 each thread could store the shared variable
 	 in its own copy-in location, making the
 	 variable no longer really shared.  */
-      if (!TREE_READONLY (decl) && shared_ctx->is_nested)
+      if (shared_ctx->is_nested)
 	{
 	  omp_context *up;
 
@@ -785,11 +793,10 @@ use_pointer_for_field (tree decl, omp_co
 	    }
 	}
 
-      /* For tasks avoid using copy-in/out, unless they are readonly
-	 (in which case just copy-in is used).  As tasks can be
+      /* For tasks avoid using copy-in/out.  As tasks can be
 	 deferred or executed in different thread, when GOMP_task
 	 returns, the task hasn't necessarily terminated.  */
-      if (!TREE_READONLY (decl) && is_task_ctx (shared_ctx))
+      if (is_task_ctx (shared_ctx))
 	{
 	  tree outer;
 	maybe_mark_addressable_and_ret:
--- gcc/testsuite/g++.dg/gomp/pr56217.C.jj	2013-02-06 10:44:19.001660809 +0100
+++ gcc/testsuite/g++.dg/gomp/pr56217.C	2013-02-06 10:45:56.991115914 +0100
@@ -0,0 +1,14 @@
+// PR middle-end/56217
+// { dg-do compile }
+// { dg-options "-fopenmp" }
+
+struct S { int *p; S (); S (S &); };
+
+S
+foo ()
+{
+  S s;
+  #pragma omp task shared (s)
+    s.p = 0;
+  return s;
+}
--- libgomp/testsuite/libgomp.c++/pr56217.C.jj	2013-02-06 10:48:10.125404318 +0100
+++ libgomp/testsuite/libgomp.c++/pr56217.C	2013-02-06 11:16:30.000000000 +0100
@@ -0,0 +1,36 @@
+// PR middle-end/56217
+// { dg-do run }
+// { dg-options "-std=c++0x" }
+
+extern "C" void abort ();
+
+template <typename T>
+struct ptr {
+  T *p;
+  ptr () : p () {}
+  ptr (ptr &) = delete;
+  ptr (ptr &&o) : p(o) {}
+  operator T * () { return p; }
+};
+
+int a[6] = { 100, 101, 102, 103, 104, 105 };
+
+static ptr<int>
+f ()
+{
+  ptr<int> pt;
+  #pragma omp task shared (pt)
+    pt.p = a + 2;
+  #pragma omp taskwait
+  return pt;
+}
+
+int
+main ()
+{
+  ptr<int> pt;
+  #pragma omp parallel
+  #pragma omp single
+  if (f () != a + 2 || *f () != 102)
+    abort ();
+}
