diff -ur gcc/builtins.c gcc.new/builtins.c
--- gcc/builtins.c	2012-08-07 16:04:45.054348099 +0200
+++ gcc.new/builtins.c	2012-08-07 15:44:01.304349225 +0200
@@ -5376,6 +5376,7 @@
 
   expect = expand_normal (CALL_EXPR_ARG (exp, 1));
   expect = convert_memory_address (Pmode, expect);
+  expect = gen_rtx_MEM (mode, expect);
   desired = expand_expr_force_mode (CALL_EXPR_ARG (exp, 2), mode);
 
   weak = CALL_EXPR_ARG (exp, 3);
@@ -5383,14 +5384,15 @@
   if (host_integerp (weak, 0) && tree_low_cst (weak, 0) != 0)
     is_weak = true;
 
-  oldval = copy_to_reg (gen_rtx_MEM (mode, expect));
-
+  oldval = expect;
   if (!expand_atomic_compare_and_swap ((target == const0_rtx ? NULL : &target),
 				       &oldval, mem, oldval, desired,
 				       is_weak, success, failure))
     return NULL_RTX;
 
-  emit_move_insn (gen_rtx_MEM (mode, expect), oldval);
+  if (oldval != expect)
+    emit_move_insn (expect, oldval);
+
   return target;
 }
 
diff -ur gcc/config/s390/s390.md gcc.new/config/s390/s390.md
--- gcc/config/s390/s390.md	2012-08-07 16:04:54.204348621 +0200
+++ gcc.new/config/s390/s390.md	2012-08-07 16:00:21.934348628 +0200
@@ -8870,7 +8870,7 @@
 
 (define_expand "atomic_compare_and_swap<mode>"
   [(match_operand:SI 0 "register_operand")	;; bool success output
-   (match_operand:DGPR 1 "register_operand")	;; oldval output
+   (match_operand:DGPR 1 "nonimmediate_operand");; oldval output
    (match_operand:DGPR 2 "memory_operand")	;; memory
    (match_operand:DGPR 3 "register_operand")	;; expected intput
    (match_operand:DGPR 4 "register_operand")	;; newval intput
@@ -8879,9 +8879,17 @@
    (match_operand:SI 7 "const_int_operand")]	;; failure model
   ""
 {
-  rtx cc, cmp;
+  rtx cc, cmp, output = operands[1];
+
+  if (!register_operand (output, <MODE>mode))
+    output = gen_reg_rtx (<MODE>mode);
+
   emit_insn (gen_atomic_compare_and_swap<mode>_internal
-	     (operands[1], operands[2], operands[3], operands[4]));
+	     (output, operands[2], operands[3], operands[4]));
+
+  if (output != operands[1])
+    emit_move_insn (operands[1], output);
+
   cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
   cmp = gen_rtx_EQ (SImode, cc, const0_rtx);
   emit_insn (gen_cstorecc4 (operands[0], cmp, cc, const0_rtx));
