diff mbox series

[COMMITTED] Implementation of asm goto outputs

Message ID d4dd70d9-8f20-7770-c591-ae97d343010c@redhat.com
State New
Headers show
Series [COMMITTED] Implementation of asm goto outputs | expand

Commit Message

Vladimir Makarov Nov. 13, 2020, 6:08 p.m. UTC
The original patch has been modified according to the reviewers comments 
and the following patch has been committed.
diff mbox series

Patch

commit e3b3b59683c1e7d31a9d313dd97394abebf644be
Author: Vladimir N. Makarov <vmakarov@redhat.com>
Date:   Fri Nov 13 12:45:59 2020 -0500

    [PATCH] Implementation of asm goto outputs
    
    gcc/
            * cfgexpand.c (expand_asm_stmt): Output asm goto with outputs too.
            Place insns after asm goto on edges.
            * doc/extend.texi: Reflect the changes in asm goto documentation.
            * gimple.c (gimple_build_asm_1): Remove an assert checking output
            absence for asm goto.
            * gimple.h (gimple_asm_label_op, gimple_asm_set_label_op): Take
            possible asm goto outputs into account.
            * ira.c (ira): Remove critical edges for potential asm goto output
            reloads.
            (ira_nullify_asm_goto): New function.
            * ira.h (ira_nullify_asm_goto): New prototype.
            * lra-assigns.c (lra_split_hard_reg_for): Use ira_nullify_asm_goto.
            Check that splitting is done inside a basic block.
            * lra-constraints.c (curr_insn_transform): Permit output reloads
            for any jump insn.
            * lra-spills.c (lra_final_code_change): Remove USEs added in ira
            for asm gotos.
            * lra.c (lra_process_new_insns): Place output reload insns after
            jumps in the beginning of destination BBs.
            * reload.c (find_reloads): Report error for asm gotos with
            outputs.  Modify them to keep CFG consistency to avoid crashes.
            * tree-into-ssa.c (rewrite_stmt): Don't put debug stmt after asm
            goto.
    
    gcc/c/
            * c-parser.c (c_parser_asm_statement): Parse outputs for asm
            goto too.
            * c-typeck.c (build_asm_expr): Remove an assert checking output
            absence for asm goto.
    
    gcc/cp
            * parser.c (cp_parser_asm_definition): Parse outputs for asm
            goto too.
    
    gcc/testsuite/
            * c-c++-common/asmgoto-2.c: Permit output in asm goto.
            * gcc.c-torture/compile/asmgoto-2.c: New.
            * gcc.c-torture/compile/asmgoto-3.c: New.
            * gcc.c-torture/compile/asmgoto-4.c: New.
            * gcc.c-torture/compile/asmgoto-5.c: New.

diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index f4c4cf7bf8f..7540a15d65d 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -7144,10 +7144,7 @@  c_parser_asm_statement (c_parser *parser)
 	switch (section)
 	  {
 	  case 0:
-	    /* For asm goto, we don't allow output operands, but reserve
-	       the slot for a future extension that does allow them.  */
-	    if (!is_goto)
-	      outputs = c_parser_asm_operands (parser);
+	    outputs = c_parser_asm_operands (parser);
 	    break;
 	  case 1:
 	    inputs = c_parser_asm_operands (parser);
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 26a5f7128d2..413109c916c 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -10666,10 +10666,6 @@  build_asm_expr (location_t loc, tree string, tree outputs, tree inputs,
       TREE_VALUE (tail) = input;
     }
 
-  /* ASMs with labels cannot have outputs.  This should have been
-     enforced by the parser.  */
-  gcc_assert (outputs == NULL || labels == NULL);
-
   args = build_stmt (loc, ASM_EXPR, string, outputs, inputs, clobbers, labels);
 
   /* asm statements without outputs, including simple ones, are treated
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 1b7bdbc15be..1df6f4bc55a 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3371,20 +3371,21 @@  expand_asm_stmt (gasm *stmt)
 			       ARGVEC CONSTRAINTS OPNAMES))
      If there is more than one, put them inside a PARALLEL.  */
 
-  if (nlabels > 0 && nclobbers == 0)
-    {
-      gcc_assert (noutputs == 0);
-      emit_jump_insn (body);
-    }
-  else if (noutputs == 0 && nclobbers == 0)
+  if (noutputs == 0 && nclobbers == 0)
     {
       /* No output operands: put in a raw ASM_OPERANDS rtx.  */
-      emit_insn (body);
+      if (nlabels > 0)
+	emit_jump_insn (body);
+      else
+	emit_insn (body);
     }
   else if (noutputs == 1 && nclobbers == 0)
     {
       ASM_OPERANDS_OUTPUT_CONSTRAINT (body) = constraints[0];
-      emit_insn (gen_rtx_SET (output_rvec[0], body));
+      if (nlabels > 0)
+	emit_jump_insn (gen_rtx_SET (output_rvec[0], body));
+      else 
+	emit_insn (gen_rtx_SET (output_rvec[0], body));
     }
   else
     {
@@ -3461,7 +3462,27 @@  expand_asm_stmt (gasm *stmt)
   if (after_md_seq)
     emit_insn (after_md_seq);
   if (after_rtl_seq)
-    emit_insn (after_rtl_seq);
+    {
+      if (nlabels == 0)
+	emit_insn (after_rtl_seq);
+      else
+	{
+	  edge e;
+	  edge_iterator ei;
+	  
+	  FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
+	    {
+	      start_sequence ();
+	      for (rtx_insn *curr = after_rtl_seq;
+		   curr != NULL_RTX;
+		   curr = NEXT_INSN (curr))
+		emit_insn (copy_insn (PATTERN (curr)));
+	      rtx_insn *copy = get_insns ();
+	      end_sequence ();
+	      insert_insn_on_edge (copy, e);
+	    }
+	}
+    }
 
   free_temp_slots ();
   crtl->has_asm_statement = 1;
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 6b0447ea496..68f1cfa23f2 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -20534,8 +20534,7 @@  cp_parser_asm_definition (cp_parser* parser)
 	      && cp_lexer_next_token_is_not (parser->lexer,
 					     CPP_SCOPE)
 	      && cp_lexer_next_token_is_not (parser->lexer,
-					     CPP_CLOSE_PAREN)
-	      && !goto_p)
+					     CPP_CLOSE_PAREN))
             {
               outputs = cp_parser_asm_operand_list (parser);
               if (outputs == error_mark_node)
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 4ddbf80a229..af25f66c8b4 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9566,7 +9566,7 @@  asm @var{asm-qualifiers} ( @var{AssemblerTemplate}
                  @r{[} : @var{Clobbers} @r{]} @r{]})
 
 asm @var{asm-qualifiers} ( @var{AssemblerTemplate} 
-                      : 
+                      : @var{OutputOperands}
                       : @var{InputOperands}
                       : @var{Clobbers}
                       : @var{GotoLabels})
@@ -9673,7 +9673,7 @@  there is no need for the output variables. Also, the optimizers may move
 code out of loops if they believe that the code will always return the same 
 result (i.e.@: none of its input values change between calls). Using the 
 @code{volatile} qualifier disables these optimizations. @code{asm} statements 
-that have no output operands, including @code{asm goto} statements, 
+that have no output operands and @code{asm goto} statements, 
 are implicitly volatile.
 
 This i386 code demonstrates a case that does not use (or require) the 
@@ -10532,9 +10532,6 @@  case, consider using the @code{__builtin_unreachable} intrinsic after the
 using the @code{hot} and @code{cold} label attributes (@pxref{Label 
 Attributes}).
 
-An @code{asm goto} statement cannot have outputs.
-This is due to an internal restriction of 
-the compiler: control transfer instructions cannot have outputs. 
 If the assembler code does modify anything, use the @code{"memory"} clobber 
 to force the 
 optimizers to flush all register values to memory and reload them if 
@@ -10543,6 +10540,13 @@  necessary after the @code{asm} statement.
 Also note that an @code{asm goto} statement is always implicitly
 considered volatile.
 
+Be careful when you set output operands inside @code{asm goto} only on
+some possible control flow paths.  If you don't set up the output on
+given path and never use it on this path, it is okay.  Otherwise, you
+should use @samp{+} constraint modifier meaning that the operand is
+input and output one.  With this modifier you will have the correct
+values on all possible paths from the @code{asm goto}.
+
 To reference a label in the assembler template,
 prefix it with @samp{%l} (lowercase @samp{L}) followed 
 by its (zero-based) position in @var{GotoLabels} plus the number of input 
@@ -10588,6 +10592,41 @@  error:
 @}
 @end example
 
+The following example shows an @code{asm goto} that uses an output.
+
+@example
+int foo(int count)
+@{
+  asm goto ("dec %0; jb %l[stop]"
+            : "+r" (count)
+            :
+            :
+            : stop);
+  return count;
+stop:
+  return 0;
+@}
+@end example
+
+The following artificial example shows an @code{asm goto} that sets
+up an output only on one path inside the @code{asm goto}.  Usage of
+constraint modifier @code{=} instead of @code{+} would be wrong as
+@code{factor} is used on all paths from the @code{asm goto}.
+
+@example
+int foo(int inp)
+@{
+  int factor = 0;
+  asm goto ("cmp %1, 10; jb %l[lab]; mov 2, %0"
+            : "+r" (factor)
+            : "r" (inp)
+            :
+            : lab);
+lab:
+  return inp * factor; /* return 2 * inp or 0 if inp < 10 */
+@}
+@end example
+
 @anchor{x86Operandmodifiers}
 @subsubsection x86 Operand Modifiers
 
diff --git a/gcc/gimple.c b/gcc/gimple.c
index 1afed88e1f1..60afc54e794 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -611,10 +611,6 @@  gimple_build_asm_1 (const char *string, unsigned ninputs, unsigned noutputs,
   gasm *p;
   int size = strlen (string);
 
-  /* ASMs with labels cannot have outputs.  This should have been
-     enforced by the front end.  */
-  gcc_assert (nlabels == 0 || noutputs == 0);
-
   p = as_a <gasm *> (
         gimple_build_with_ops (GIMPLE_ASM, ERROR_MARK,
 			       ninputs + noutputs + nclobbers + nlabels));
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 62b5a8a6124..d359f7827b1 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -4025,7 +4025,7 @@  static inline tree
 gimple_asm_label_op (const gasm *asm_stmt, unsigned index)
 {
   gcc_gimple_checking_assert (index < asm_stmt->nl);
-  return asm_stmt->op[index + asm_stmt->ni + asm_stmt->nc];
+  return asm_stmt->op[index + asm_stmt->no + asm_stmt->ni + asm_stmt->nc];
 }
 
 /* Set LABEL_OP to be label operand INDEX in GIMPLE_ASM ASM_STMT.  */
@@ -4035,7 +4035,7 @@  gimple_asm_set_label_op (gasm *asm_stmt, unsigned index, tree label_op)
 {
   gcc_gimple_checking_assert (index < asm_stmt->nl
 			      && TREE_CODE (label_op) == TREE_LIST);
-  asm_stmt->op[index + asm_stmt->ni + asm_stmt->nc] = label_op;
+  asm_stmt->op[index + asm_stmt->no + asm_stmt->ni + asm_stmt->nc] = label_op;
 }
 
 /* Return the string representing the assembly instruction in
diff --git a/gcc/ira.c b/gcc/ira.c
index 5443031674e..3c824e9be39 100644
--- a/gcc/ira.c
+++ b/gcc/ira.c
@@ -5401,6 +5401,48 @@  ira (FILE *f)
   int ira_max_point_before_emit;
   bool saved_flag_caller_saves = flag_caller_saves;
   enum ira_region saved_flag_ira_region = flag_ira_region;
+  basic_block bb;
+  edge_iterator ei;
+  edge e;
+  bool output_jump_reload_p = false;
+  
+  if (ira_use_lra_p)
+    {
+      /* First put potential jump output reloads on the output edges
+	 as USE which will be removed at the end of LRA.  The major
+	 goal is actually to create BBs for critical edges for LRA and
+	 populate them later by live info.  In LRA it will be
+	 difficult to do this. */
+      FOR_EACH_BB_FN (bb, cfun)
+	{
+	  rtx_insn *end = BB_END (bb);
+	  if (!JUMP_P (end))
+	    continue;
+	  extract_insn (end);
+	  for (int i = 0; i < recog_data.n_operands; i++)
+	    if (recog_data.operand_type[i] != OP_IN)
+	      {
+		output_jump_reload_p = true;
+		FOR_EACH_EDGE (e, ei, bb->succs)
+		  if (EDGE_CRITICAL_P (e)
+		      && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
+		    {
+		      ira_assert (!(e->flags & EDGE_ABNORMAL));
+		      start_sequence ();
+		      /* We need to put some no-op insn here.  We can
+			 not put a note as commit_edges insertion will
+			 fail.  */
+		      emit_insn (gen_rtx_USE (VOIDmode, const1_rtx));
+		      rtx_insn *insns = get_insns ();
+		      end_sequence ();
+		      insert_insn_on_edge (insns, e);
+		    }
+		break;
+	      }
+	}
+      if (output_jump_reload_p)
+	commit_edge_insertions ();
+    }
 
   if (flag_ira_verbose < 10)
     {
@@ -5709,6 +5751,21 @@  ira (FILE *f)
     }
 }
 
+/* Modify asm goto to avoid further trouble with this insn.  We can
+   not replace the insn by USE as in other asm insns as we still
+   need to keep CFG consistency.  */
+void
+ira_nullify_asm_goto (rtx_insn *insn)
+{
+  ira_assert (JUMP_P (insn) && INSN_CODE (insn) < 0);
+  rtx tmp = extract_asm_operands (PATTERN (insn));
+  PATTERN (insn) = gen_rtx_ASM_OPERANDS (VOIDmode, ggc_strdup (""), "", 0,
+					 rtvec_alloc (0),
+					 rtvec_alloc (0),
+					 ASM_OPERANDS_LABEL_VEC (tmp),
+					 ASM_OPERANDS_SOURCE_LOCATION(tmp));
+}
+
 static void
 do_reload (void)
 {
diff --git a/gcc/ira.h b/gcc/ira.h
index c30f36aecca..0da06ee846b 100644
--- a/gcc/ira.h
+++ b/gcc/ira.h
@@ -213,6 +213,7 @@  extern void ira_register_new_scratch_op (rtx_insn *insn, int nop, int icode);
 extern bool ira_remove_insn_scratches (rtx_insn *insn, bool all_p, FILE *dump_file,
 				       rtx (*get_reg) (rtx original));
 extern void ira_restore_scratches (FILE *dump_file);
+extern void ira_nullify_asm_goto (rtx_insn *insn);
 
 /* ira-costs.c */
 extern void ira_costs_c_finalize (void);
diff --git a/gcc/lra-assigns.c b/gcc/lra-assigns.c
index 40e323c2a64..b040f7f6f33 100644
--- a/gcc/lra-assigns.c
+++ b/gcc/lra-assigns.c
@@ -1715,8 +1715,8 @@  find_reload_regno_insns (int regno, rtx_insn * &start, rtx_insn * &finish)
 	start_insn = lra_insn_recog_data[uid]->insn;
       n++;
     }
-  /* For reload pseudo we should have at most 3 insns referring for it:
-     input/output reload insns and the original insn.  */
+  /* For reload pseudo we should have at most 3 insns referring for
+     it: input/output reload insns and the original insn.  */
   if (n > 3)
     return false;
   if (n > 1)
@@ -1792,7 +1792,8 @@  lra_split_hard_reg_for (void)
       {
 	if (! find_reload_regno_insns (i, first, last))
 	  continue;
-	if (spill_hard_reg_in_range (i, rclass, first, last))
+	if (BLOCK_FOR_INSN (first) == BLOCK_FOR_INSN (last)
+	    && spill_hard_reg_in_range (i, rclass, first, last))
 	  {
 	    bitmap_clear (&failed_reload_pseudos);
 	    return true;
@@ -1817,16 +1818,10 @@  lra_split_hard_reg_for (void)
 	  lra_asm_error_p = asm_p = true;
 	  error_for_asm (insn,
 			 "%<asm%> operand has impossible constraints");
-	  /* Avoid further trouble with this insn.
-	     For asm goto, instead of fixing up all the edges
-	     just clear the template and clear input operands
-	     (asm goto doesn't have any output operands).  */
+	  /* Avoid further trouble with this insn.  */
 	  if (JUMP_P (insn))
 	    {
-	      rtx asm_op = extract_asm_operands (PATTERN (insn));
-	      ASM_OPERANDS_TEMPLATE (asm_op) = ggc_strdup ("");
-	      ASM_OPERANDS_INPUT_VEC (asm_op) = rtvec_alloc (0);
-	      ASM_OPERANDS_INPUT_CONSTRAINT_VEC (asm_op) = rtvec_alloc (0);
+	      ira_nullify_asm_goto (insn);
 	      lra_update_insn_regno_info (insn);
 	    }
 	  else
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index fbc47baa998..f034c7749e9 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -3954,10 +3954,10 @@  curr_insn_transform (bool check_only_p)
   no_input_reloads_p = no_output_reloads_p = false;
   goal_alt_number = -1;
   change_p = sec_mem_p = false;
-  /* JUMP_INSNs and CALL_INSNs are not allowed to have any output
-     reloads; neither are insns that SET cc0.  Insns that use CC0 are
-     not allowed to have any input reloads.  */
-  if (JUMP_P (curr_insn) || CALL_P (curr_insn))
+  /* CALL_INSNs are not allowed to have any output reloads; neither
+     are insns that SET cc0.  Insns that use CC0 are not allowed to
+     have any input reloads.  */
+  if (CALL_P (curr_insn))
     no_output_reloads_p = true;
 
   if (HAVE_cc0 && reg_referenced_p (cc0_rtx, PATTERN (curr_insn)))
diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c
index 8082a5b489f..85c36647be0 100644
--- a/gcc/lra-spills.c
+++ b/gcc/lra-spills.c
@@ -788,6 +788,14 @@  lra_final_code_change (void)
 	{
 	  rtx pat = PATTERN (insn);
 
+	  if (GET_CODE (pat) == USE && XEXP (pat, 0) == const1_rtx)
+	    {
+	      /* Remove markers to eliminate critical edges for jump insn
+		 output reloads (see code in ira.c::ira).  */
+	      lra_invalidate_insn_data (insn);
+	      delete_insn (insn);
+	      continue;
+	    }
 	  if (GET_CODE (pat) == CLOBBER && LRA_TEMP_CLOBBER_P (pat))
 	    {
 	      /* Remove clobbers temporarily created in LRA.  We don't
diff --git a/gcc/lra.c b/gcc/lra.c
index 664f1b5e5da..673554d0a4b 100644
--- a/gcc/lra.c
+++ b/gcc/lra.c
@@ -1852,8 +1852,6 @@  void
 lra_process_new_insns (rtx_insn *insn, rtx_insn *before, rtx_insn *after,
 		       const char *title)
 {
-  rtx_insn *last;
-
   if (before == NULL_RTX && after == NULL_RTX)
     return;
   if (lra_dump_file != NULL)
@@ -1864,12 +1862,6 @@  lra_process_new_insns (rtx_insn *insn, rtx_insn *before, rtx_insn *after,
 	  fprintf (lra_dump_file,"    %s before:\n", title);
 	  dump_rtl_slim (lra_dump_file, before, NULL, -1, 0);
 	}
-      if (after != NULL_RTX)
-	{
-	  fprintf (lra_dump_file, "    %s after:\n", title);
-	  dump_rtl_slim (lra_dump_file, after, NULL, -1, 0);
-	}
-      fprintf (lra_dump_file, "\n");
     }
   if (before != NULL_RTX)
     {
@@ -1883,12 +1875,63 @@  lra_process_new_insns (rtx_insn *insn, rtx_insn *before, rtx_insn *after,
     {
       if (cfun->can_throw_non_call_exceptions)
 	copy_reg_eh_region_note_forward (insn, after, NULL);
-      for (last = after; NEXT_INSN (last) != NULL_RTX; last = NEXT_INSN (last))
-	;
-      emit_insn_after (after, insn);
-      push_insns (last, insn);
-      setup_sp_offset (after, last);
+      if (! JUMP_P (insn))
+	{
+	  rtx_insn *last;
+	  
+	  if (lra_dump_file != NULL)
+	    {
+	      fprintf (lra_dump_file, "    %s after:\n", title);
+	      dump_rtl_slim (lra_dump_file, after, NULL, -1, 0);
+	    }
+	  for (last = after;
+	       NEXT_INSN (last) != NULL_RTX;
+	       last = NEXT_INSN (last))
+	    ;
+	  emit_insn_after (after, insn);
+	  push_insns (last, insn);
+	  setup_sp_offset (after, last);
+	}
+      else
+	{
+	  /* Put output reload insns on successor BBs: */
+	  edge_iterator ei;
+	  edge e;
+	  
+	  FOR_EACH_EDGE (e, ei, BLOCK_FOR_INSN (insn)->succs)
+	    if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
+	      {
+		/* We already made the edge no-critical in ira.c::ira */
+		lra_assert (!EDGE_CRITICAL_P (e));
+		rtx_insn *tmp = BB_HEAD (e->dest);
+		if (LABEL_P (tmp))
+		  tmp = NEXT_INSN (tmp);
+		if (NOTE_INSN_BASIC_BLOCK_P (tmp))
+		  tmp = NEXT_INSN (tmp);
+		start_sequence ();
+		for (rtx_insn *curr = after;
+		     curr != NULL_RTX;
+		     curr = NEXT_INSN (curr))
+		  emit_insn (copy_insn (PATTERN (curr)));
+		rtx_insn *copy = get_insns (), *last = get_last_insn ();
+		end_sequence ();
+		if (lra_dump_file != NULL)
+		  {
+		    fprintf (lra_dump_file, "    %s after in bb%d:\n", title,
+			     e->dest->index);
+		    dump_rtl_slim (lra_dump_file, copy, NULL, -1, 0);
+		  }
+		emit_insn_before (copy, tmp);
+		push_insns (last, PREV_INSN (copy));
+		setup_sp_offset (copy, last);
+		/* We can ignore BB live info here as it and reg notes
+		   will be updated before the next assignment
+		   sub-pass. */
+	      }
+	}
     }
+  if (lra_dump_file != NULL)
+    fprintf (lra_dump_file, "\n");
   if (cfun->can_throw_non_call_exceptions)
     {
       rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
diff --git a/gcc/reload.c b/gcc/reload.c
index 78b4049f465..445f9bdca43 100644
--- a/gcc/reload.c
+++ b/gcc/reload.c
@@ -2656,6 +2656,22 @@  find_reloads (rtx_insn *insn, int replace, int ind_levels, int live_known,
   hard_regs_live_known = live_known;
   static_reload_reg_p = reload_reg_p;
 
+  if (JUMP_P (insn) && INSN_CODE (insn) < 0)
+    {
+      extract_insn (insn);
+      for (i = 0; i < recog_data.n_operands; i++)
+	if (recog_data.operand_type[i] != OP_IN)
+	  break;
+      if (i < recog_data.n_operands)
+	{
+	  error_for_asm (insn,
+			 "the target does not support asm goto "
+			 "with outputs in %<asm%>");
+	  ira_nullify_asm_goto (insn);
+	  return 0;
+	}
+    }
+
   /* JUMP_INSNs and CALL_INSNs are not allowed to have any output reloads;
      neither are insns that SET cc0.  Insns that use CC0 are not allowed
      to have any input reloads.  */
diff --git a/gcc/testsuite/c-c++-common/asmgoto-2.c b/gcc/testsuite/c-c++-common/asmgoto-2.c
index 5bf45725d80..fb81cdec21d 100644
--- a/gcc/testsuite/c-c++-common/asmgoto-2.c
+++ b/gcc/testsuite/c-c++-common/asmgoto-2.c
@@ -7,7 +7,7 @@  foo (void)
   __label__ lab;
   int i = 0;
   asm goto ("" : : : : lab);
-  asm goto ("" : "=r" (i) : : : lab);	/* { dg-error "expected" } */
+  asm goto ("" : "=r" (i) : : : lab);
   asm goto ("" : : : : );	/* { dg-error "expected" } */
   asm goto ("" : : : "memory");	/* { dg-error "expected" } */
   asm goto ("" : : : );		/* { dg-error "expected" } */
diff --git a/gcc/testsuite/gcc.c-torture/compile/asmgoto-2.c b/gcc/testsuite/gcc.c-torture/compile/asmgoto-2.c
new file mode 100644
index 00000000000..f1b30c02884
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/asmgoto-2.c
@@ -0,0 +1,65 @@ 
+/* This test should be switched off for a new target with less than 4 allocatable registers */
+/* { dg-do compile } */
+int
+foo (void)
+{
+  int x, y, z, v;
+  
+  asm goto ("": "=r" (x), "=r" (y), "=r" (z), "=r" (v) : : : lab, lab2, lab3, lab4);
+ lab:
+  return x;
+ lab2:
+  return y;
+ lab3:
+  return z;
+ lab4:
+  return v;
+}
+
+int
+foo2 (void)
+{
+  int x = 0, y = 1, z = 2, v = 3;
+  
+  asm goto ("": "+r" (x), "+r" (y), "+r" (z), "+r" (v) : : : lab, lab2, lab3, lab4);
+ lab:
+  return x;
+ lab2:
+  return y;
+ lab3:
+  return z;
+ lab4:
+  return v;
+}
+
+int
+foo3 (void)
+{
+  int x, y, z, v;
+  
+  asm goto ("": "=rm" (x), "=mr" (y), "=rm" (z), "=mr" (v) : : : lab, lab2, lab3, lab4);
+ lab:
+  return x;
+ lab2:
+  return y;
+ lab3:
+  return z;
+ lab4:
+  return v;
+}
+
+int
+foo4 (void)
+{
+  int x, y, z, v;
+  
+  asm goto ("": "=r,m" (x), "=m,r" (y), "=r,m" (z), "=m,r" (v) : : : lab, lab2, lab3, lab4);
+ lab:
+  return x;
+ lab2:
+  return y;
+ lab3:
+  return z;
+ lab4:
+  return v;
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/asmgoto-3.c b/gcc/testsuite/gcc.c-torture/compile/asmgoto-3.c
new file mode 100644
index 00000000000..842b73e772d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/asmgoto-3.c
@@ -0,0 +1,89 @@ 
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+
+int
+foo (void)
+{
+  int x;
+  
+  asm goto ("": "=a" (x) : : : lab);
+ lab:
+  return x;
+}
+
+int
+foo2 (void)
+{
+  int x, y;
+  
+  asm goto ("": "=a" (x), "=d" (y) : : : lab, lab2);
+ lab:
+  return x;
+ lab2:
+  return y;
+}
+
+int
+foo3 (void)
+{
+  int x, y, z;
+  
+  asm goto ("": "=a" (x), "=d" (y), "=c" (z) : : : lab, lab2, lab3);
+ lab:
+  return x;
+ lab2:
+  return y;
+ lab3:
+  return z;
+}
+
+int
+foo4 (void)
+{
+  int x, y, z, v;
+  
+  asm goto ("": "=a" (x), "=d" (y), "=c" (z) , "=b" (v) : : : lab, lab2, lab3, lab4);
+ lab:
+  return x;
+ lab2:
+  return y;
+ lab3:
+  return z;
+ lab4:
+  return v;
+}
+
+int
+foo5 (void)
+{
+  int x, y, z, v, w;
+  
+  asm goto ("": "=a" (x), "=d" (y), "=c" (z), "=b" (v), "=S" (w) : : : lab, lab2, lab3, lab4, lab5);
+ lab:
+  return x;
+ lab2:
+  return y;
+ lab3:
+  return z;
+ lab4:
+  return v;
+ lab5:
+  return w;
+}
+
+int
+foo6 (void)
+{
+  int x = 0, y = 1, z = 2, v = 3, w = 4;
+  
+  asm goto ("": "+a" (x), "+d" (y), "+c" (z), "+b" (v), "+S" (w) : : : lab, lab2, lab3, lab4, lab5);
+ lab:
+  return x;
+ lab2:
+  return y;
+ lab3:
+  return z;
+ lab4:
+  return v;
+ lab5:
+  return w;
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/asmgoto-4.c b/gcc/testsuite/gcc.c-torture/compile/asmgoto-4.c
new file mode 100644
index 00000000000..844157e48fc
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/asmgoto-4.c
@@ -0,0 +1,14 @@ 
+/* Check that LRA really puts output reloads for p4 in two successors blocks */
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } */
+/* { dg-options "-O0 -fdump-rtl-reload" } */
+
+int f (int *p1, int *p2, int *p3, int *p4) {
+  asm volatile goto (
+            ""
+            : "=r" (*p2), "=a" (p4)
+            : "r" (*p2), "r" (p2)
+            : "r8", "r9" : lab, lab2);
+ lab: return p2 - p4;
+ lab2: return p3 - p4;
+}
+/* { dg-final { scan-rtl-dump-times "Inserting insn reload after in bb" 2 "reload" } } */
diff --git a/gcc/testsuite/gcc.c-torture/compile/asmgoto-5.c b/gcc/testsuite/gcc.c-torture/compile/asmgoto-5.c
new file mode 100644
index 00000000000..94c14dd4005
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/asmgoto-5.c
@@ -0,0 +1,56 @@ 
+/* Test to generate output reload in asm goto on x86_64.  */
+/* { dg-do compile } */
+/* { dg-skip-if "no O0" { { i?86-*-* x86_64-*-* } && { ! ia32 } } { "-O0" } { "" } } */
+
+#if defined __x86_64__
+#define ASM(s) asm (s)
+#else
+#define ASM(s)
+#endif
+
+int
+foo (int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8,
+     int a9, int a10, int a11, int a12, int a13, int a14, int a15, int a16)
+{
+  register int v0 ASM ("rax") = a3;
+  register int v1 ASM ("rbx") = a4;
+  register int v2 ASM ("rcx") = a5;
+  register int v3 ASM ("rdx") = a6;
+  register int v4 ASM ("rsi") = a7;
+  register int v5 ASM ("rdi") = a8;
+  register int v6 ASM ("r8") = a9;
+  register int v7 ASM ("r9") = a10;
+  register int v8 ASM ("r10") = a11;
+  register int v9 ASM ("r11") = a12;
+  register int v10 ASM ("r12") = a13;
+  register int v11 ASM ("r13") = a14;
+  register int v12 ASM ("r14") = a15;
+  register int v13 ASM ("r15") = a16;
+  int x;
+  
+  v0 += a0;
+  v1 += a1;
+  v2 += a2;
+  v0 |= a0;
+  v1 |= a1;
+  v2 |= a2;
+  v0 ^= a0;
+  v1 ^= a1;
+  v2 ^= a2;
+  v0 &= a0;
+  v1 &= a1;
+  v2 &= a2;
+  asm goto ("": "=r" (x) : : : lab);
+  a1 ^= a0;
+  a2 = a1;
+  a0 |= a2;
+  a0 |= x;
+ lab:
+  v0 += x + a0 + a1 + a2;
+  v1 -= a0 - a1 - a2;
+  v2 |= a0 | a1 | a2;
+  v3 |= a0 & a1 & a2;
+  v4 ^= a0 ^ a1 ^ a2;
+  return  v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8 + v9 + v10 + v11 + v12 + v13 + a0 + a1 + a2;
+}
+
diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c
index 1493b323956..51c619d3280 100644
--- a/gcc/tree-into-ssa.c
+++ b/gcc/tree-into-ssa.c
@@ -1412,6 +1412,10 @@  rewrite_stmt (gimple_stmt_iterator *si)
 	SET_DEF (def_p, name);
 	register_new_def (DEF_FROM_PTR (def_p), var);
 
+	/* Do not insert debug stmts if the stmt ends the BB.  */
+	if (stmt_ends_bb_p (stmt))
+	  continue;
+	
 	tracked_var = target_for_debug_bind (var);
 	if (tracked_var)
 	  {