diff mbox

#pragma GCC unroll support

Message ID 50C46965-AAA5-4F52-B4B8-2C12EA8D3864@comcast.net
State New
Headers show

Commit Message

Mike Stump March 5, 2015, 11:12 p.m. UTC
On Jan 30, 2015, at 8:27 AM, Mike Stump <mikestump@comcast.net> wrote:
> On Jan 30, 2015, at 7:49 AM, Joseph Myers <joseph@codesourcery.com> wrote:
>> Use error_at, and %u directly in the format.
> 
> Done.

Ping?

Comments

Sandra Loosemore March 6, 2015, 1:31 a.m. UTC | #1
On 03/05/2015 04:12 PM, Mike Stump wrote:
>
> Ping?
>

Just commenting on the documentation part:

> Index: doc/extend.texi
> ===================================================================
> --- doc/extend.texi	(revision 220084)
> +++ doc/extend.texi	(working copy)
> @@ -17881,6 +17881,18 @@ void ignore_vec_dep (int *a, int k, int
>  @}
>  @end smallexample
>
> +@table @code
> +@item #pragma GCC unroll @var{n}
> +@cindex pragma GCC unroll @var{n}
> +
> +With this pragma, the programmer informs the optimizer how many times
> +a loop should be unrolled.  A 0 or 1 informs the compiler to not
> +perform any loop unrolling.  The pragma must be immediately before
> +@samp{#pragma ivdep} or a @code{for}, @code{while} or @code{do} loop
> +and applies only to the loop that follows.  @var{n} is an
> +assignment-expression that evaluates to an integer constant.
> +
> +@end table
>
>  @node Unnamed Fields
>  @section Unnamed struct/union fields within structs/unions

User documentation shouldn't refer to the reader as "the programmer"; 
either use the second person "you" or the imperative.  I'd also 
rearrange the paragraph slightly to put the two sentences about the 
parameter together, something like:

Use this pragma to inform the compiler how many times a loop should be 
unrolled.  The pragma must be immediately before
@samp{#pragma ivdep} or a @code{for}, @code{while} or @code{do} loop
and applies only to the loop that follows.  @var{n} is an
assignment-expression that evaluates to an integer constant.
A 0 or 1 informs the compiler to not perform any loop unrolling.

-Sandra
Bernhard Reutner-Fischer March 6, 2015, 12:37 p.m. UTC | #2
On 6 March 2015 at 02:31, Sandra Loosemore <sandra@codesourcery.com> wrote:
> On 03/05/2015 04:12 PM, Mike Stump wrote:
>>
>>
>> Ping?
>>
>
> Just commenting on the documentation part:
[]

and a few coding style nits:

+++ b/gcc/c-family/c-pragma.c
@@ -1459,6 +1459,10 @@ init_pragma (void)
     cpp_register_deferred_pragma (parse_in, "GCC", "ivdep",
PRAGMA_IVDEP, false,
                                  false);

+  if (!flag_preprocess_only)
+    cpp_register_deferred_pragma (parse_in, "GCC", "unroll",
PRAGMA_UNROLL, false,
+                                 false);
+

overlong line (also for the IVDEP above)

+++ b/gcc/c/c-parser.c
+static void c_parser_while_statement (c_parser *, bool, unsigned short);
+static void c_parser_do_statement (c_parser *, bool, unsigned short);
+static void c_parser_for_statement (c_parser *, bool, unsigned short);

since we're now a C++ app I would have added a default for the
unsigned short unroll of = 0
Same for
finish_while_stmt_cond, finish_do_stmt, finish_for_cond et al.

In cp_parser_range_for() i take it you remember there is a //TODO
I am attaching an unroll-5.C which might show that this does not seem
to be implemented yet, IIUC

gcc/loop-unroll.c::decide_unrolling()
I'd put the "if (loop->unroll == 1) {continue}" earlier in the
FOR_EACH_LOOP body (we're C++ nowadays) but maybe our optimizers are
good enough to do that anyway (but i fear we're not up to that?).

I did not see c/c++ tests for both !DIR$ UNROLL and !DIR$ IVDEP, fwiw.
You seem to handle both placements proper, though.
cheers,
Joseph Myers March 7, 2015, 1:16 a.m. UTC | #3
On Thu, 5 Mar 2015, Mike Stump wrote:

> On Jan 30, 2015, at 8:27 AM, Mike Stump <mikestump@comcast.net> wrote:
> > On Jan 30, 2015, at 7:49 AM, Joseph Myers <joseph@codesourcery.com> wrote:
> >> Use error_at, and %u directly in the format.
> > 
> > Done.
> 
> Ping?

I don't see any sign of 
<https://gcc.gnu.org/ml/gcc-patches/2015-01/msg02735.html> having been 
addressed.
diff mbox

Patch

Index: ada/gcc-interface/trans.c
===================================================================
--- ada/gcc-interface/trans.c	(revision 220084)
+++ ada/gcc-interface/trans.c	(working copy)
@@ -7870,17 +7870,20 @@  gnat_gimplify_stmt (tree *stmt_p)
 	  {
 	    /* Deal with the optimization hints.  */
 	    if (LOOP_STMT_IVDEP (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_ivdep_kind));
+						annot_expr_ivdep_kind),
+				 NULL_TREE);
 	    if (LOOP_STMT_NO_VECTOR (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_no_vector_kind));
+						annot_expr_no_vector_kind),
+				 NULL_TREE);
 	    if (LOOP_STMT_VECTOR (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_vector_kind));
+						annot_expr_vector_kind),
+				 NULL_TREE);
 
 	    gnu_cond
 	      = build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE,
Index: c/c-parser.c
===================================================================
--- c/c-parser.c	(revision 220084)
+++ c/c-parser.c	(working copy)
@@ -1217,9 +1217,9 @@  static void c_parser_statement (c_parser
 static void c_parser_statement_after_labels (c_parser *);
 static void c_parser_if_statement (c_parser *);
 static void c_parser_switch_statement (c_parser *);
-static void c_parser_while_statement (c_parser *, bool);
-static void c_parser_do_statement (c_parser *, bool);
-static void c_parser_for_statement (c_parser *, bool);
+static void c_parser_while_statement (c_parser *, bool, unsigned short);
+static void c_parser_do_statement (c_parser *, bool, unsigned short);
+static void c_parser_for_statement (c_parser *, bool, unsigned short);
 static tree c_parser_asm_statement (c_parser *);
 static tree c_parser_asm_operands (c_parser *);
 static tree c_parser_asm_goto_operands (c_parser *);
@@ -4972,13 +4972,13 @@  c_parser_statement_after_labels (c_parse
 	  c_parser_switch_statement (parser);
 	  break;
 	case RID_WHILE:
-	  c_parser_while_statement (parser, false);
+	  c_parser_while_statement (parser, false, 0);
 	  break;
 	case RID_DO:
-	  c_parser_do_statement (parser, false);
+	  c_parser_do_statement (parser, false, 0);
 	  break;
 	case RID_FOR:
-	  c_parser_for_statement (parser, false);
+	  c_parser_for_statement (parser, false, 0);
 	  break;
 	case RID_CILK_FOR:
 	  if (!flag_cilkplus)
@@ -5340,7 +5340,7 @@  c_parser_switch_statement (c_parser *par
 */
 
 static void
-c_parser_while_statement (c_parser *parser, bool ivdep)
+c_parser_while_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, body, save_break, save_cont;
   location_t loc;
@@ -5354,9 +5354,15 @@  c_parser_while_statement (c_parser *pars
 	 "%<_Cilk_spawn%> statement cannot be used as a condition for while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		   build_int_cst (integer_type_node,
-		   annot_expr_ivdep_kind));
+				  annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node,
+				  annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node, unroll));
   save_break = c_break_label;
   c_break_label = NULL_TREE;
   save_cont = c_cont_label;
@@ -5375,7 +5381,7 @@  c_parser_while_statement (c_parser *pars
 */
 
 static void
-c_parser_do_statement (c_parser *parser, bool ivdep)
+c_parser_do_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, body, save_break, save_cont, new_break, new_cont;
   location_t loc;
@@ -5403,9 +5409,16 @@  c_parser_do_statement (c_parser *parser,
 	 "%<_Cilk_spawn%> statement cannot be used as a condition for a do-while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node,
+				  annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		   build_int_cst (integer_type_node,
-		   annot_expr_ivdep_kind));
+				  annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node,
+				  unroll));
   if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>"))
     c_parser_skip_to_end_of_block_or_statement (parser);
   c_finish_loop (loc, cond, NULL, body, new_break, new_cont, false);
@@ -5469,7 +5482,7 @@  c_parser_do_statement (c_parser *parser,
 */
 
 static void
-c_parser_for_statement (c_parser *parser, bool ivdep)
+c_parser_for_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, incr, save_break, save_cont, body;
   /* The following are only used when parsing an ObjC foreach statement.  */
@@ -5587,6 +5600,12 @@  c_parser_for_statement (c_parser *parser
 				  "%<GCC ivdep%> pragma");
 		  cond = error_mark_node;
 		}
+	      else if (unroll)
+		{
+		  c_parser_error (parser, "missing loop condition in loop with "
+				  "%<GCC unroll%> pragma");
+		  cond = error_mark_node;
+		}
 	      else
 		{
 		  c_parser_consume_token (parser);
@@ -5604,9 +5623,15 @@  c_parser_for_statement (c_parser *parser
 					 "expected %<;%>");
 	    }
 	  if (ivdep && cond != error_mark_node)
-	    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+			   build_int_cst (integer_type_node,
+					  annot_expr_ivdep_kind),
+			   NULL_TREE);
+	  if (unroll && cond != error_mark_node)
+	    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 			   build_int_cst (integer_type_node,
-			   annot_expr_ivdep_kind));
+					  annot_expr_unroll_kind),
+			   build_int_cst (integer_type_node, unroll));
 	}
       /* Parse the increment expression (the third expression in a
 	 for-statement).  In the case of a foreach-statement, this is
@@ -9592,6 +9617,45 @@  c_parser_objc_at_dynamic_declaration (c_
 }
 
 
+static bool
+c_parse_pragma_ivdep (c_parser *parser)
+{
+  c_parser_consume_pragma (parser);
+  c_parser_skip_to_pragma_eol (parser);
+  return true;
+}
+
+static unsigned short
+c_parser_pragma_unroll (c_parser *parser)
+{
+  unsigned short unroll;
+  c_parser_consume_pragma (parser);
+  location_t location = c_parser_peek_token (parser)->location;
+  tree expr = c_parser_expr_no_commas (parser, NULL).value;
+  mark_exp_read (expr);
+  expr = c_fully_fold (expr, false, NULL);
+  HOST_WIDE_INT lunroll = 0;
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+      || TREE_CODE (expr) != INTEGER_CST
+      || (lunroll = tree_to_shwi (expr)) < 0
+      || lunroll > USHRT_MAX)
+    {
+      error_at (location, "%<#pragma GCC unroll%> requires an"
+		" assignment-expression that evaluates to a non-negative"
+		" integral constant less than or equal to %u", USHRT_MAX);
+      unroll = 0;
+    }
+  else
+    {
+      unroll = (unsigned short) lunroll;
+      if (unroll == 0)
+	unroll = 1;
+    }
+
+  c_parser_skip_to_pragma_eol (parser);
+  return unroll;
+}
+
 /* Handle pragmas.  Some OpenMP pragmas are associated with, and therefore
    should be considered, statements.  ALLOW_STMT is true if we're within
    the context of a function and such pragmas are to be allowed.  Returns
@@ -9714,21 +9778,46 @@  c_parser_pragma (c_parser *parser, enum
       c_parser_omp_declare (parser, context);
       return false;
     case PRAGMA_IVDEP:
-      c_parser_consume_pragma (parser);
-      c_parser_skip_to_pragma_eol (parser);
-      if (!c_parser_next_token_is_keyword (parser, RID_FOR)
-	  && !c_parser_next_token_is_keyword (parser, RID_WHILE)
-	  && !c_parser_next_token_is_keyword (parser, RID_DO))
-	{
-	  c_parser_error (parser, "for, while or do statement expected");
-	  return false;
-	}
-      if (c_parser_next_token_is_keyword (parser, RID_FOR))
-	c_parser_for_statement (parser, true);
-      else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
-	c_parser_while_statement (parser, true);
-      else
-	c_parser_do_statement (parser, true);
+      {
+	bool ivdep = c_parse_pragma_ivdep (parser);
+	unsigned short unroll = 0;
+	if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_UNROLL)
+	  unroll = c_parser_pragma_unroll (parser);
+	if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+	    && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+	    && !c_parser_next_token_is_keyword (parser, RID_DO))
+	  {
+	    c_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	if (c_parser_next_token_is_keyword (parser, RID_FOR))
+	  c_parser_for_statement (parser, ivdep, unroll);
+	else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+	  c_parser_while_statement (parser, ivdep, unroll);
+	else
+	  c_parser_do_statement (parser, ivdep, unroll);
+      }
+      return false;
+    case PRAGMA_UNROLL:
+      {
+	unsigned short unroll = c_parser_pragma_unroll (parser);
+	bool ivdep = false;
+	if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_IVDEP)
+	  ivdep = c_parse_pragma_ivdep (parser);
+	if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+	    && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+	    && !c_parser_next_token_is_keyword (parser, RID_DO))
+	  {
+	    c_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	if (c_parser_next_token_is_keyword (parser, RID_FOR))
+	  c_parser_for_statement (parser, ivdep, unroll);
+	else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+	  c_parser_while_statement (parser, ivdep, unroll);
+	else
+	  c_parser_do_statement (parser, ivdep, unroll);
+      }
       return false;
 
     case PRAGMA_GCC_PCH_PREPROCESS:
Index: c-family/c-pragma.c
===================================================================
--- c-family/c-pragma.c	(revision 220084)
+++ c-family/c-pragma.c	(working copy)
@@ -1456,6 +1456,10 @@  init_pragma (void)
     cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, false,
 				  false);
 
+  if (!flag_preprocess_only)
+    cpp_register_deferred_pragma (parse_in, "GCC", "unroll", PRAGMA_UNROLL, false,
+				  false);
+
   if (flag_cilkplus && !flag_preprocess_only)
     cpp_register_deferred_pragma (parse_in, "cilk", "grainsize",
 				  PRAGMA_CILK_GRAINSIZE, true, false);
Index: c-family/c-pragma.h
===================================================================
--- c-family/c-pragma.h	(revision 220084)
+++ c-family/c-pragma.h	(working copy)
@@ -69,6 +69,7 @@  typedef enum pragma_kind {
 
   PRAGMA_GCC_PCH_PREPROCESS,
   PRAGMA_IVDEP,
+  PRAGMA_UNROLL,
 
   PRAGMA_FIRST_EXTERNAL
 } pragma_kind;
Index: cfgloop.h
===================================================================
--- cfgloop.h	(revision 220084)
+++ cfgloop.h	(working copy)
@@ -189,6 +189,11 @@  struct GTY ((chain_next ("%h.next"))) lo
      of the loop can be safely evaluated concurrently.  */
   int safelen;
 
+  /* The number of times to unroll the loop.  0, means no information
+     given, just do what we always do.  A value of 1, means don't unroll
+     the loop.  */
+  unsigned short unroll;
+
   /* True if this loop should never be vectorized.  */
   bool dont_vectorize;
 
Index: cfgloopmanip.c
===================================================================
--- cfgloopmanip.c	(revision 220084)
+++ cfgloopmanip.c	(working copy)
@@ -1038,6 +1038,7 @@  copy_loop_info (struct loop *loop, struc
   target->estimate_state = loop->estimate_state;
   target->warned_aggressive_loop_optimizations
     |= loop->warned_aggressive_loop_optimizations;
+  target->unroll = loop->unroll;
 }
 
 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
Index: cp/cp-array-notation.c
===================================================================
--- cp/cp-array-notation.c	(revision 220084)
+++ cp/cp-array-notation.c	(working copy)
@@ -81,7 +81,7 @@  create_an_loop (tree init, tree cond, tr
   finish_expr_stmt (init);
   for_stmt = begin_for_stmt (NULL_TREE, NULL_TREE);
   finish_for_init_stmt (for_stmt);
-  finish_for_cond (cond, for_stmt, false);
+  finish_for_cond (cond, for_stmt, false, 0);
   finish_for_expr (incr, for_stmt);
   finish_expr_stmt (body);
   finish_for_stmt (for_stmt);
Index: cp/cp-tree.h
===================================================================
--- cp/cp-tree.h	(revision 220084)
+++ cp/cp-tree.h	(working copy)
@@ -5644,7 +5644,7 @@  extern tree implicitly_declare_fn
 extern bool maybe_clone_body			(tree);
 
 /* In parser.c */
-extern tree cp_convert_range_for (tree, tree, tree, bool);
+extern tree cp_convert_range_for (tree, tree, tree, bool, unsigned short);
 extern bool parsing_nsdmi (void);
 extern void inject_this_parameter (tree, cp_cv_quals);
 
@@ -5880,16 +5880,16 @@  extern void begin_else_clause			(tree);
 extern void finish_else_clause			(tree);
 extern void finish_if_stmt			(tree);
 extern tree begin_while_stmt			(void);
-extern void finish_while_stmt_cond		(tree, tree, bool);
+extern void finish_while_stmt_cond		(tree, tree, bool, unsigned short);
 extern void finish_while_stmt			(tree);
 extern tree begin_do_stmt			(void);
 extern void finish_do_body			(tree);
-extern void finish_do_stmt			(tree, tree, bool);
+extern void finish_do_stmt			(tree, tree, bool, unsigned short);
 extern tree finish_return_stmt			(tree);
 extern tree begin_for_scope			(tree *);
 extern tree begin_for_stmt			(tree, tree);
 extern void finish_for_init_stmt		(tree);
-extern void finish_for_cond			(tree, tree, bool);
+extern void finish_for_cond			(tree, tree, bool, unsigned short);
 extern void finish_for_expr			(tree, tree);
 extern void finish_for_stmt			(tree);
 extern tree begin_range_for_stmt		(tree, tree);
Index: cp/init.c
===================================================================
--- cp/init.c	(revision 220084)
+++ cp/init.c	(working copy)
@@ -3693,7 +3693,7 @@  build_vec_init (tree base, tree maxindex
       finish_for_init_stmt (for_stmt);
       finish_for_cond (build2 (NE_EXPR, boolean_type_node, iterator,
 			       build_int_cst (TREE_TYPE (iterator), -1)),
-		       for_stmt, false);
+		       for_stmt, false, 0);
       elt_init = cp_build_unary_op (PREDECREMENT_EXPR, iterator, 0,
 				    complain);
       if (elt_init == error_mark_node)
Index: cp/parser.c
===================================================================
--- cp/parser.c	(revision 220084)
+++ cp/parser.c	(working copy)
@@ -2044,15 +2044,15 @@  static tree cp_parser_selection_statemen
 static tree cp_parser_condition
   (cp_parser *);
 static tree cp_parser_iteration_statement
-  (cp_parser *, bool);
+  (cp_parser *, bool, unsigned short);
 static bool cp_parser_for_init_statement
   (cp_parser *, tree *decl);
 static tree cp_parser_for
-  (cp_parser *, bool);
+  (cp_parser *, bool, unsigned short);
 static tree cp_parser_c_for
-  (cp_parser *, tree, tree, bool);
+  (cp_parser *, tree, tree, bool, unsigned short);
 static tree cp_parser_range_for
-  (cp_parser *, tree, tree, tree, bool);
+  (cp_parser *, tree, tree, tree, bool, unsigned short);
 static void do_range_for_auto_deduction
   (tree, tree);
 static tree cp_parser_perform_range_for_lookup
@@ -9698,7 +9698,7 @@  cp_parser_statement (cp_parser* parser,
 	case RID_WHILE:
 	case RID_DO:
 	case RID_FOR:
-	  statement = cp_parser_iteration_statement (parser, false);
+	  statement = cp_parser_iteration_statement (parser, false, 0);
 	  break;
 
 	case RID_CILK_FOR:
@@ -10390,7 +10390,7 @@  cp_parser_condition (cp_parser* parser)
    not included. */
 
 static tree
-cp_parser_for (cp_parser *parser, bool ivdep)
+cp_parser_for (cp_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree init, scope, decl;
   bool is_range_for;
@@ -10402,13 +10402,14 @@  cp_parser_for (cp_parser *parser, bool i
   is_range_for = cp_parser_for_init_statement (parser, &decl);
 
   if (is_range_for)
-    return cp_parser_range_for (parser, scope, init, decl, ivdep);
+    return cp_parser_range_for (parser, scope, init, decl, ivdep, unroll);
   else
-    return cp_parser_c_for (parser, scope, init, ivdep);
+    return cp_parser_c_for (parser, scope, init, ivdep, unroll);
 }
 
 static tree
-cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep)
+cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep,
+		 unsigned short unroll)
 {
   /* Normal for loop */
   tree condition = NULL_TREE;
@@ -10429,7 +10430,13 @@  cp_parser_c_for (cp_parser *parser, tree
 		       "%<GCC ivdep%> pragma");
       condition = error_mark_node;
     }
-  finish_for_cond (condition, stmt, ivdep);
+  else if (unroll)
+    {
+      cp_parser_error (parser, "missing loop condition in loop with "
+		       "%<GCC unroll%> pragma");
+      condition = error_mark_node;
+    }
+  finish_for_cond (condition, stmt, ivdep, unroll);
   /* Look for the `;'.  */
   cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
 
@@ -10453,7 +10460,7 @@  cp_parser_c_for (cp_parser *parser, tree
 
 static tree
 cp_parser_range_for (cp_parser *parser, tree scope, tree init, tree range_decl,
-		     bool ivdep)
+		     bool ivdep, unsigned short unroll)
 {
   tree stmt, range_expr;
 
@@ -10474,6 +10481,8 @@  cp_parser_range_for (cp_parser *parser,
       stmt = begin_range_for_stmt (scope, init);
       if (ivdep)
 	RANGE_FOR_IVDEP (stmt) = 1;
+      if (unroll)
+	/* TODO */(void)0;
       finish_range_for_decl (stmt, range_decl, range_expr);
       if (!type_dependent_expression_p (range_expr)
 	  /* do_auto_deduction doesn't mess with template init-lists.  */
@@ -10483,7 +10492,7 @@  cp_parser_range_for (cp_parser *parser,
   else
     {
       stmt = begin_for_stmt (scope, init);
-      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep);
+      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep, unroll);
     }
   return stmt;
 }
@@ -10575,7 +10584,7 @@  do_range_for_auto_deduction (tree decl,
 
 tree
 cp_convert_range_for (tree statement, tree range_decl, tree range_expr,
-		      bool ivdep)
+		      bool ivdep, unsigned short unroll)
 {
   tree begin, end;
   tree iter_type, begin_expr, end_expr;
@@ -10632,7 +10641,7 @@  cp_convert_range_for (tree statement, tr
 				 begin, ERROR_MARK,
 				 end, ERROR_MARK,
 				 NULL, tf_warning_or_error);
-  finish_for_cond (condition, statement, ivdep);
+  finish_for_cond (condition, statement, ivdep, unroll);
 
   /* The new increment expression.  */
   expression = finish_unary_op_expr (input_location,
@@ -10793,7 +10802,8 @@  cp_parser_range_for_member_function (tre
    Returns the new WHILE_STMT, DO_STMT, FOR_STMT or RANGE_FOR_STMT.  */
 
 static tree
-cp_parser_iteration_statement (cp_parser* parser, bool ivdep)
+cp_parser_iteration_statement (cp_parser* parser, bool ivdep,
+			       unsigned short unroll)
 {
   cp_token *token;
   enum rid keyword;
@@ -10823,7 +10833,7 @@  cp_parser_iteration_statement (cp_parser
 	cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 	/* Parse the condition.  */
 	condition = cp_parser_condition (parser);
-	finish_while_stmt_cond (condition, statement, ivdep);
+	finish_while_stmt_cond (condition, statement, ivdep, unroll);
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
 	/* Parse the dependent statement.  */
@@ -10853,7 +10863,7 @@  cp_parser_iteration_statement (cp_parser
 	/* Parse the expression.  */
 	expression = cp_parser_expression (parser);
 	/* We're done with the do-statement.  */
-	finish_do_stmt (expression, statement, ivdep);
+	finish_do_stmt (expression, statement, ivdep, unroll);
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
 	/* Look for the `;'.  */
@@ -10866,7 +10876,7 @@  cp_parser_iteration_statement (cp_parser
 	/* Look for the `('.  */
 	cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 
-	statement = cp_parser_for (parser, ivdep);
+	statement = cp_parser_for (parser, ivdep, unroll);
 
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
@@ -32901,6 +32911,41 @@  cp_parser_cilk_grainsize (cp_parser *par
   cp_parser_skip_to_pragma_eol (parser, pragma_tok);
 }
 
+static bool
+cp_parser_pragma_ivdep (cp_parser *parser, cp_token *pragma_tok)
+{
+  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+  return true;
+}
+
+static unsigned short
+cp_parser_pragma_unroll (cp_parser *parser, cp_token *pragma_tok)
+{
+  location_t location = cp_lexer_peek_token (parser->lexer)->location;
+  tree expr = cp_parser_constant_expression (parser);
+  unsigned short unroll;
+  expr = maybe_constant_value (expr);
+  cp_parser_require_pragma_eol (parser, pragma_tok);
+  HOST_WIDE_INT lunroll = 0;
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+      || TREE_CODE (expr) != INTEGER_CST
+      || (lunroll = tree_to_shwi (expr)) < 0
+      || lunroll > USHRT_MAX)
+    {
+      error_at (location, "%<#pragma GCC unroll%> requires an"
+		" assignment-expression that evaluates to a non-negative"
+		" integral constant less than or equal to %u", USHRT_MAX);
+      unroll = 0;
+    }
+  else
+    {
+      unroll = (unsigned short) lunroll;
+      if (unroll == 0)
+	unroll = 1;
+    }
+  return unroll;
+}
+
 /* Normal parsing of a pragma token.  Here we can (and must) use the
    regular lexer.  */
 
@@ -33068,9 +33113,39 @@  cp_parser_pragma (cp_parser *parser, enu
 
     case PRAGMA_IVDEP:
       {
-	cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+	bool ivdep = cp_parser_pragma_ivdep (parser, pragma_tok);
+	unsigned short unroll = 0;
 	cp_token *tok;
 	tok = cp_lexer_peek_token (the_parser->lexer);
+	if (tok->type == CPP_PRAGMA &&
+	    tok->pragma_kind == PRAGMA_UNROLL)
+	  {
+	    unroll = cp_parser_pragma_unroll (parser, pragma_tok);
+	    tok = cp_lexer_peek_token (the_parser->lexer);
+	  }
+	if (tok->type != CPP_KEYWORD
+	    || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
+		&& tok->keyword != RID_DO))
+	  {
+	    cp_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	cp_parser_iteration_statement (parser, ivdep, unroll);
+	return true;
+      }
+
+    case PRAGMA_UNROLL:
+      {
+	unsigned short unroll = cp_parser_pragma_unroll (parser, pragma_tok);
+	bool ivdep = false;
+	cp_token *tok;
+	tok = cp_lexer_peek_token (the_parser->lexer);
+	if (tok->type == CPP_PRAGMA &&
+	    tok->pragma_kind == PRAGMA_IVDEP)
+	  {
+	    ivdep = cp_parser_pragma_ivdep (parser, tok);
+	    tok = cp_lexer_peek_token (the_parser->lexer);
+	  }
 	if (tok->type != CPP_KEYWORD
 	    || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
 		&& tok->keyword != RID_DO))
@@ -33078,7 +33153,7 @@  cp_parser_pragma (cp_parser *parser, enu
 	    cp_parser_error (parser, "for, while or do statement expected");
 	    return false;
 	  }
-	cp_parser_iteration_statement (parser, true);
+	cp_parser_iteration_statement (parser, ivdep, unroll);
 	return true;
       }
 
Index: cp/pt.c
===================================================================
--- cp/pt.c	(revision 220084)
+++ cp/pt.c	(working copy)
@@ -13886,7 +13886,7 @@  tsubst_expr (tree t, tree args, tsubst_f
       RECUR (FOR_INIT_STMT (t));
       finish_for_init_stmt (stmt);
       tmp = RECUR (FOR_COND (t));
-      finish_for_cond (tmp, stmt, false);
+      finish_for_cond (tmp, stmt, false, 0);
       tmp = RECUR (FOR_EXPR (t));
       finish_for_expr (tmp, stmt);
       RECUR (FOR_BODY (t));
@@ -13901,7 +13901,7 @@  tsubst_expr (tree t, tree args, tsubst_f
         decl = tsubst (decl, args, complain, in_decl);
         maybe_push_decl (decl);
         expr = RECUR (RANGE_FOR_EXPR (t));
-        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t));
+        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t), 0);
         RECUR (RANGE_FOR_BODY (t));
         finish_for_stmt (stmt);
       }
@@ -13910,7 +13910,7 @@  tsubst_expr (tree t, tree args, tsubst_f
     case WHILE_STMT:
       stmt = begin_while_stmt ();
       tmp = RECUR (WHILE_COND (t));
-      finish_while_stmt_cond (tmp, stmt, false);
+      finish_while_stmt_cond (tmp, stmt, false, 0);
       RECUR (WHILE_BODY (t));
       finish_while_stmt (stmt);
       break;
@@ -13920,7 +13920,7 @@  tsubst_expr (tree t, tree args, tsubst_f
       RECUR (DO_BODY (t));
       finish_do_body (stmt);
       tmp = RECUR (DO_COND (t));
-      finish_do_stmt (tmp, stmt, false);
+      finish_do_stmt (tmp, stmt, false, 0);
       break;
 
     case IF_STMT:
@@ -14358,8 +14358,10 @@  tsubst_expr (tree t, tree args, tsubst_f
 
     case ANNOTATE_EXPR:
       tmp = RECUR (TREE_OPERAND (t, 0));
-      RETURN (build2_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
-			  TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1))));
+      RETURN (build3_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
+			  TREE_TYPE (tmp), tmp,
+			  RECUR (TREE_OPERAND (t, 1)),
+			  RECUR (TREE_OPERAND (t, 2))));
 
     default:
       gcc_assert (!STATEMENT_CODE_P (TREE_CODE (t)));
Index: cp/semantics.c
===================================================================
--- cp/semantics.c	(revision 220084)
+++ cp/semantics.c	(working copy)
@@ -802,7 +802,8 @@  begin_while_stmt (void)
    WHILE_STMT.  */
 
 void
-finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep)
+finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep,
+			unsigned short unroll)
 {
   if (check_no_cilk (cond,
       "Cilk array notation cannot be used as a condition for while statement",
@@ -812,11 +813,19 @@  finish_while_stmt_cond (tree cond, tree
   finish_cond (&WHILE_COND (while_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    WHILE_COND (while_stmt) = build2 (ANNOTATE_EXPR,
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
 				      TREE_TYPE (WHILE_COND (while_stmt)),
 				      WHILE_COND (while_stmt),
 				      build_int_cst (integer_type_node,
-						     annot_expr_ivdep_kind));
+						     annot_expr_ivdep_kind),
+				      NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
+				      TREE_TYPE (WHILE_COND (while_stmt)),
+				      WHILE_COND (while_stmt),
+				      build_int_cst (integer_type_node,
+						     annot_expr_unroll_kind),
+				      build_int_cst (integer_type_node, unroll));
   simplify_loop_decl_cond (&WHILE_COND (while_stmt), WHILE_BODY (while_stmt));
 }
 
@@ -861,7 +870,7 @@  finish_do_body (tree do_stmt)
    COND is as indicated.  */
 
 void
-finish_do_stmt (tree cond, tree do_stmt, bool ivdep)
+finish_do_stmt (tree cond, tree do_stmt, bool ivdep, unsigned short unroll)
 {
   if (check_no_cilk (cond,
   "Cilk array notation cannot be used as a condition for a do-while statement",
@@ -870,8 +879,13 @@  finish_do_stmt (tree cond, tree do_stmt,
   cond = maybe_convert_cond (cond);
   end_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
-		   build_int_cst (integer_type_node, annot_expr_ivdep_kind));
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node, annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node, annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node, unroll));
   DO_COND (do_stmt) = cond;
 }
 
@@ -974,7 +988,7 @@  finish_for_init_stmt (tree for_stmt)
    FOR_STMT.  */
 
 void
-finish_for_cond (tree cond, tree for_stmt, bool ivdep)
+finish_for_cond (tree cond, tree for_stmt, bool ivdep, unsigned short unroll)
 {
   if (check_no_cilk (cond,
 	 "Cilk array notation cannot be used in a condition for a for-loop",
@@ -984,11 +998,20 @@  finish_for_cond (tree cond, tree for_stm
   finish_cond (&FOR_COND (for_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    FOR_COND (for_stmt) = build2 (ANNOTATE_EXPR,
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
 				  TREE_TYPE (FOR_COND (for_stmt)),
 				  FOR_COND (for_stmt),
 				  build_int_cst (integer_type_node,
-						 annot_expr_ivdep_kind));
+						 annot_expr_ivdep_kind),
+				  NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
+				  TREE_TYPE (FOR_COND (for_stmt)),
+				  FOR_COND (for_stmt),
+				  build_int_cst (integer_type_node,
+						 annot_expr_unroll_kind),
+				  build_int_cst (integer_type_node,
+						 unroll));
   simplify_loop_decl_cond (&FOR_COND (for_stmt), FOR_BODY (for_stmt));
 }
 
Index: doc/extend.texi
===================================================================
--- doc/extend.texi	(revision 220084)
+++ doc/extend.texi	(working copy)
@@ -17881,6 +17881,18 @@  void ignore_vec_dep (int *a, int k, int
 @}
 @end smallexample
 
+@table @code
+@item #pragma GCC unroll @var{n}
+@cindex pragma GCC unroll @var{n}
+
+With this pragma, the programmer informs the optimizer how many times
+a loop should be unrolled.  A 0 or 1 informs the compiler to not
+perform any loop unrolling.  The pragma must be immediately before
+@samp{#pragma ivdep} or a @code{for}, @code{while} or @code{do} loop
+and applies only to the loop that follows.  @var{n} is an
+assignment-expression that evaluates to an integer constant.
+
+@end table
 
 @node Unnamed Fields
 @section Unnamed struct/union fields within structs/unions
Index: fortran/trans-stmt.c
===================================================================
--- fortran/trans-stmt.c	(revision 220084)
+++ fortran/trans-stmt.c	(working copy)
@@ -2876,9 +2876,10 @@  gfc_trans_forall_loop (forall_info *fora
       cond = fold_build2_loc (input_location, LE_EXPR, boolean_type_node,
 			      count, build_int_cst (TREE_TYPE (count), 0));
       if (forall_tmp->do_concurrent)
-	cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		       build_int_cst (integer_type_node,
-				      annot_expr_ivdep_kind));
+				      annot_expr_ivdep_kind),
+		       NULL_TREE);
 
       tmp = build1_v (GOTO_EXPR, exit_label);
       tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node,
Index: function.h
===================================================================
--- function.h	(revision 220084)
+++ function.h	(working copy)
@@ -670,6 +670,10 @@  struct GTY(()) function {
 
   /* Set when the tail call has been identified.  */
   unsigned int tail_call_marked : 1;
+
+  /* Set when #pragma unroll has been used in the body.  Used by rtl
+     unrolling to know when to perform unrolling in the function.  */
+  unsigned int has_unroll : 1;
 };
 
 /* Add the decl D to the local_decls list of FUN.  */
Index: gimple-low.c
===================================================================
--- gimple-low.c	(revision 220084)
+++ gimple-low.c	(working copy)
@@ -347,7 +347,7 @@  lower_stmt (gimple_stmt_iterator *gsi, s
 	for (i = 0; i < gimple_call_num_args (stmt); i++)
 	  {
 	    tree arg = gimple_call_arg (stmt, i);
-	    if (EXPR_P (arg))
+	    if (arg && EXPR_P (arg))
 	      TREE_SET_BLOCK (arg, data->block);
 	  }
 
Index: gimple-walk.c
===================================================================
--- gimple-walk.c	(revision 220084)
+++ gimple-walk.c	(working copy)
@@ -261,7 +261,7 @@  walk_gimple_op (gimple stmt, walk_tree_f
 
       for (i = 0; i < gimple_call_num_args (stmt); i++)
 	{
-	  if (wi)
+	  if (wi && gimple_call_arg (stmt, i))
 	    wi->val_only
 	      = is_gimple_reg_type (TREE_TYPE (gimple_call_arg (stmt, i)));
 	  ret = walk_tree (gimple_call_arg_ptr (stmt, i), callback_op, wi,
Index: gimplify.c
===================================================================
--- gimplify.c	(revision 220084)
+++ gimplify.c	(working copy)
@@ -2908,6 +2908,9 @@  gimple_boolify (tree expr)
     case ANNOTATE_EXPR:
       switch ((enum annot_expr_kind) TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)))
 	{
+	case annot_expr_unroll_kind:
+	  cfun->has_unroll = 1;
+	  /* fall-through */
 	case annot_expr_ivdep_kind:
 	case annot_expr_no_vector_kind:
 	case annot_expr_vector_kind:
@@ -7947,6 +7950,7 @@  gimplify_expr (tree *expr_p, gimple_seq
 	  {
 	    tree cond = TREE_OPERAND (*expr_p, 0);
 	    tree kind = TREE_OPERAND (*expr_p, 1);
+	    tree data = TREE_OPERAND (*expr_p, 2);
 	    tree type = TREE_TYPE (cond);
 	    if (!INTEGRAL_TYPE_P (type))
 	      {
@@ -7957,7 +7961,7 @@  gimplify_expr (tree *expr_p, gimple_seq
 	    tree tmp = create_tmp_var (type);
 	    gimplify_arg (&cond, pre_p, EXPR_LOCATION (*expr_p));
 	    gcall *call
-	      = gimple_build_call_internal (IFN_ANNOTATE, 2, cond, kind);
+	      = gimple_build_call_internal (IFN_ANNOTATE, 3, cond, kind, data);
 	    gimple_call_set_lhs (call, tmp);
 	    gimplify_seq_add_stmt (pre_p, call);
 	    *expr_p = tmp;
Index: loop-init.c
===================================================================
--- loop-init.c	(revision 220084)
+++ loop-init.c	(working copy)
@@ -375,6 +375,7 @@  pass_loop2::gate (function *fun)
       && (flag_move_loop_invariants
 	  || flag_unswitch_loops
 	  || flag_unroll_loops
+	  || cfun->has_unroll
 #ifdef HAVE_doloop_end
 	  || (flag_branch_on_count_reg && HAVE_doloop_end)
 #endif
@@ -576,7 +577,8 @@  public:
   /* opt_pass methods: */
   virtual bool gate (function *)
     {
-      return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops);
+      return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops
+	      || cfun->has_unroll);
     }
 
   virtual unsigned int execute (function *);
Index: loop-unroll.c
===================================================================
--- loop-unroll.c	(revision 220084)
+++ loop-unroll.c	(working copy)
@@ -243,16 +243,26 @@  report_unroll (struct loop *loop, locati
 
 /* Decide whether unroll loops and how much.  */
 static void
-decide_unrolling (int flags)
+decide_unrolling (int base_flags)
 {
   struct loop *loop;
 
   /* Scan the loops, inner ones first.  */
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
     {
+      int flags = base_flags;
+      if (loop->unroll > 1)
+	flags = UAP_UNROLL | UAP_UNROLL_ALL;
       loop->lpt_decision.decision = LPT_NONE;
       location_t locus = get_loop_location (loop);
 
+      if (loop->unroll == 1)
+	{
+	  dump_printf_loc (TDF_RTL, locus,
+			   "not unrolling loop, user didn't want it unrolled\n");
+	  continue;
+	}
+
       if (dump_enabled_p ())
 	dump_printf_loc (TDF_RTL, locus,
                          ";; *** Considering loop %d at BB %d for "
@@ -422,6 +432,19 @@  decide_unroll_constant_iterations (struc
       return;
     }
 
+  if (loop->unroll)
+    {
+      loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
+      loop->lpt_decision.times = loop->unroll - 1;
+      if (loop->lpt_decision.times > desc->niter - 2)
+	{
+	  /* They won't do this for us.  */
+	  loop->lpt_decision.decision = LPT_NONE;
+	  loop->lpt_decision.times = desc->niter - 2;
+	}
+      return;
+    }
+
   /* Check whether the loop rolls enough to consider.  
      Consult also loop bounds and profile; in the case the loop has more
      than one exit it may well loop less than determined maximal number
@@ -443,7 +466,7 @@  decide_unroll_constant_iterations (struc
   best_copies = 2 * nunroll + 10;
 
   i = 2 * nunroll + 2;
-  if (i - 1 >= desc->niter)
+  if (i > desc->niter - 2)
     i = desc->niter - 2;
 
   for (; i >= nunroll - 1; i--)
@@ -695,6 +718,9 @@  decide_unroll_runtime_iterations (struct
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
@@ -733,8 +759,9 @@  decide_unroll_runtime_iterations (struct
       return;
     }
 
-  /* Success; now force nunroll to be power of 2, as we are unable to
-     cope with overflows in computation of number of iterations.  */
+  /* Success; now force nunroll to be power of 2, as code-gen
+     requires it, we are unable to cope with overflows in
+     computation of number of iterations.  */
   for (i = 1; 2 * i <= nunroll; i *= 2)
     continue;
 
@@ -843,9 +870,10 @@  compare_and_jump_seq (rtx op0, rtx op1,
   return seq;
 }
 
-/* Unroll LOOP for which we are able to count number of iterations in runtime
-   LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
-   extra care for case n < 0):
+/* Unroll LOOP for which we are able to count number of iterations in
+   runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
+   power of two.  The transformation does this (with some extra care
+   for case n < 0):
 
    for (i = 0; i < n; i++)
      body;
@@ -1142,6 +1170,9 @@  decide_unroll_stupid (struct loop *loop,
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
Index: lto-streamer-in.c
===================================================================
--- lto-streamer-in.c	(revision 220084)
+++ lto-streamer-in.c	(working copy)
@@ -751,6 +751,7 @@  input_cfg (struct lto_input_block *ib, s
 
       /* Read OMP SIMD related info.  */
       loop->safelen = streamer_read_hwi (ib);
+      loop->unroll = streamer_read_hwi (ib);
       loop->dont_vectorize = streamer_read_hwi (ib);
       loop->force_vectorize = streamer_read_hwi (ib);
       loop->simduid = stream_read_tree (ib, data_in);
Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c	(revision 220084)
+++ lto-streamer-out.c	(working copy)
@@ -1884,6 +1884,7 @@  output_cfg (struct output_block *ob, str
 
       /* Write OMP SIMD related info.  */
       streamer_write_hwi (ob, loop->safelen);
+      streamer_write_hwi (ob, loop->unroll);
       streamer_write_hwi (ob, loop->dont_vectorize);
       streamer_write_hwi (ob, loop->force_vectorize);
       stream_write_tree (ob, loop->simduid, true);
Index: testsuite/c-c++-common/unroll-1.c
===================================================================
--- testsuite/c-c++-common/unroll-1.c	(revision 0)
+++ testsuite/c-c++-common/unroll-1.c	(working copy)
@@ -0,0 +1,40 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdisable-tree-cunroll -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "loop with 9 iterations completely unrolled" "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:\(5|11\): note: loop unrolled 7 times" "loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:\(5|11\): note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "3\[31\]:3: note: loop unrolled 2 times" "loop2_unroll" } } */
+  i = 0;
+  #pragma GCC unroll 3
+  do {
+    bar(i);
+  } while (++i < 9);
+
+  #pragma GCC unroll 4+4
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+}
+
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-2.c
===================================================================
--- testsuite/c-c++-common/unroll-2.c	(revision 0)
+++ testsuite/c-c++-common/unroll-2.c	(working copy)
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "15:\[0-9\]*: note: loop turned into non-loop; it never loops"  "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "\(19|21\):\(5|11\): note: loop unrolled 7 times" "loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:\[0-9\]*: note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "3\[13\]:\[0-9\]*: note: loop unrolled 2 times" "loop2_unroll" } } */
+  i = 0;
+  #pragma GCC unroll 3
+  do {
+    bar(i);
+  } while (++i < 9);
+}
+
+void test2 () {
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "\[424\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 7; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "4\[79\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 9
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "52:\[0-9\]*: note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 4
+  for (unsigned long i = 1; i <= 15; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "5\[79\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 709
+  for (unsigned long i = 1; i <= 709; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "6\[24\]:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= 3; ++i)
+    bar(i);
+}
+
+
+/* { dg-final { cleanup-tree-dump "cunrolli" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-3.c
===================================================================
--- testsuite/c-c++-common/unroll-3.c	(revision 0)
+++ testsuite/c-c++-common/unroll-3.c	(working copy)
@@ -0,0 +1,26 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-all-loops -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "16:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } } */
+  /* { dg-final { scan-rtl-dump "16:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled" "loop2_unroll" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= 3; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled" "loop2_unroll" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= m; ++i)
+    bar(i);
+}
+
+/* { dg-final { cleanup-tree-dump "cunrolli" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-4.c
===================================================================
--- testsuite/c-c++-common/unroll-4.c	(revision 0)
+++ testsuite/c-c++-common/unroll-4.c	(working copy)
@@ -0,0 +1,38 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdisable-tree-cunroll" } */
+
+void bar(int);
+
+int j;
+
+void test1() {
+  unsigned long m = j;
+  unsigned long i;
+
+  #pragma GCC unroll 20000000000	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll i	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "cannot appear in a constant-expression|is not usable in a constant expression" "" { target c++ } 16 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll n	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "declared" "" { target *-*-* } 21 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll 1+i	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "cannot appear in a constant-expression|is not usable in a constant expression" "" { target c++ } 26 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll  4,4		/* { dg-error "expected end of line before" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll  4.2	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+}
Index: tree-cfg.c
===================================================================
--- tree-cfg.c	(revision 220084)
+++ tree-cfg.c	(working copy)
@@ -316,6 +316,10 @@  replace_loop_annotate_in_block (basic_bl
 	  loop->force_vectorize = true;
 	  cfun->has_force_vectorize_loops = true;
 	  break;
+	case annot_expr_unroll_kind:
+	  loop->unroll = (unsigned short)tree_to_shwi (gimple_call_arg (stmt,
+									2));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -365,6 +369,7 @@  replace_loop_annotate (void)
 	    case annot_expr_ivdep_kind:
 	    case annot_expr_no_vector_kind:
 	    case annot_expr_vector_kind:
+	    case annot_expr_unroll_kind:
 	      break;
 	    default:
 	      gcc_unreachable ();
@@ -3385,6 +3390,8 @@  verify_gimple_call (gcall *stmt)
   for (i = 0; i < gimple_call_num_args (stmt); ++i)
     {
       tree arg = gimple_call_arg (stmt, i);
+      if (! arg)
+	continue;
       if ((is_gimple_reg_type (TREE_TYPE (arg))
 	   && !is_gimple_val (arg))
 	  || (!is_gimple_reg_type (TREE_TYPE (arg))
@@ -7512,6 +7519,8 @@  print_loop (FILE *file, struct loop *loo
       fprintf (file, ", estimate = ");
       print_decu (loop->nb_iterations_estimate, file);
     }
+  if (loop->unroll)
+    fprintf (file, ", unroll = %d", loop->unroll);
   fprintf (file, ")\n");
 
   /* Print loop's body.  */
Index: tree-core.h
===================================================================
--- tree-core.h	(revision 220084)
+++ tree-core.h	(working copy)
@@ -725,6 +725,7 @@  enum annot_expr_kind {
   annot_expr_ivdep_kind,
   annot_expr_no_vector_kind,
   annot_expr_vector_kind,
+  annot_expr_unroll_kind,
   annot_expr_kind_last
 };
 
Index: tree-pretty-print.c
===================================================================
--- tree-pretty-print.c	(revision 220084)
+++ tree-pretty-print.c	(working copy)
@@ -2313,6 +2313,10 @@  dump_generic_node (pretty_printer *pp, t
 	case annot_expr_vector_kind:
 	  pp_string (pp, ", vector");
 	  break;
+	case annot_expr_unroll_kind:
+	  pp_printf (pp, ", unroll %d",
+		     (int)TREE_INT_CST_LOW (TREE_OPERAND (node, 2)));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c	(revision 220084)
+++ tree-ssa-loop-ivcanon.c	(working copy)
@@ -686,8 +686,7 @@  try_unroll_loop_completely (struct loop
 			    HOST_WIDE_INT maxiter,
 			    location_t locus)
 {
-  unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
-  struct loop_size size;
+  unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
   edge edge_to_cancel = NULL;
   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
@@ -731,7 +730,8 @@  try_unroll_loop_completely (struct loop
   if (!n_unroll_found)
     return false;
 
-  if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+  if (loop->unroll == 0 &&
+      n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Not unrolling loop %d "
@@ -753,107 +753,130 @@  try_unroll_loop_completely (struct loop
       if (ul == UL_SINGLE_ITER)
 	return false;
 
-      large = tree_estimate_loop_size
-		 (loop, exit, edge_to_cancel, &size,
-		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
-      ninsns = size.overall;
-      if (large)
+      if (loop->unroll)
 	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
-		     loop->num);
-	  return false;
+	  /* If they wanted to unroll more than we want, don't unroll
+	     it completely.  */
+	  if (n_unroll > (unsigned)loop->unroll)
+	    {
+	      dump_printf_loc (report_flags, locus,
+	        "not unrolling loop, "
+		"user didn't want it unrolled completely.\n");
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file,
+		  "Not unrolling loop %d: "
+		  "user didn't want it unrolled completely.\n",
+			 loop->num);
+	      return false;
+	    }
 	}
-
-      unr_insns = estimated_unrolled_size (&size, n_unroll);
-      if (dump_file && (dump_flags & TDF_DETAILS))
+      else
 	{
-	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
-	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
-		   (int) unr_insns);
-	}
+	  struct loop_size size;
+	  large = tree_estimate_loop_size
+	            (loop, exit, edge_to_cancel, &size,
+		     PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
+	  unsigned HOST_WIDE_INT ninsns = size.overall;
+	  if (large)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+			 loop->num);
+	      return false;
+	    }
 
-      /* If the code is going to shrink, we don't need to be extra cautious
-	 on guessing if the unrolling is going to be profitable.  */
-      if (unr_insns
-	  /* If there is IV variable that will become constant, we save
-	     one instruction in the loop prologue we do not account
-	     otherwise.  */
-	  <= ninsns + (size.constant_iv != false))
-	;
-      /* We unroll only inner loops, because we do not consider it profitable
-	 otheriwse.  We still can cancel loopback edge of not rolling loop;
-	 this is always a good idea.  */
-      else if (ul == UL_NO_GROWTH)
-	{
+	  unsigned HOST_WIDE_INT unr_insns
+	    = estimated_unrolled_size (&size, n_unroll);
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* Outer loops tend to be less interesting candidates for complete
-	 unrolling unless we can do a lot of propagation into the inner loop
-	 body.  For now we disable outer loop unrolling when the code would
-	 grow.  */
-      else if (loop->inner)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "it is not innermost and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* If there is call on a hot path through the loop, then
-	 there is most probably not much to optimize.  */
-      else if (size.num_non_pure_calls_on_hot_path)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains call and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* If there is pure/const call in the function, then we
-	 can still optimize the unrolled loop body if it contains
-	 some other interesting code than the calls and code
-	 storing or cumulating the return value.  */
-      else if (size.num_pure_calls_on_hot_path
-	       /* One IV increment, one test, one ivtmp store
-		  and one useful stmt.  That is about minimal loop
-		  doing pure call.  */
-	       && (size.non_call_stmts_on_hot_path
-		   <= 3 + size.num_pure_calls_on_hot_path))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains just pure calls and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* Complette unrolling is major win when control flow is removed and
-	 one big basic block is created.  If the loop contains control flow
-	 the optimization may still be a win because of eliminating the loop
-	 overhead but it also may blow the branch predictor tables.
-	 Limit number of branches on the hot path through the peeled
-	 sequence.  */
-      else if (size.num_branches_on_hot_path * (int)n_unroll
-	       > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     " number of branches on hot path in the unrolled sequence"
-		     " reach --param max-peel-branches limit.\n",
-		     loop->num);
-	  return false;
-	}
-      else if (unr_insns
-	       > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "(--param max-completely-peeled-insns limit reached).\n",
-		     loop->num);
-	  return false;
+	    {
+	      fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
+	      fprintf (dump_file, "  Estimated size after unrolling: %d\n",
+		       (int) unr_insns);
+	    }
+
+	  /* If the code is going to shrink, we don't need to be extra
+	     cautious on guessing if the unrolling is going to be
+	     profitable.  */
+	  if (unr_insns
+	      /* If there is IV variable that will become constant, we
+		 save one instruction in the loop prologue we do not
+		 account otherwise.  */
+	      <= ninsns + (size.constant_iv != false))
+	    ;
+	  /* We unroll only inner loops, because we do not consider it
+	     profitable otherwise.  We still can cancel loopback edge
+	     of not rolling loop; this is always a good idea.  */
+	  else if (ul == UL_NO_GROWTH)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Outer loops tend to be less interesting candidates for
+	     complete unrolling unless we can do a lot of propagation
+	     into the inner loop body.  For now we disable outer loop
+	     unrolling when the code would grow.  */
+	  else if (loop->inner)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "it is not innermost and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is call on a hot path through the loop, then
+	     there is most probably not much to optimize.  */
+	  else if (size.num_non_pure_calls_on_hot_path)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains call and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is pure/const call in the function, then we can
+	     still optimize the unrolled loop body if it contains some
+	     other interesting code than the calls and code storing or
+	     cumulating the return value.  */
+	  else if (size.num_pure_calls_on_hot_path
+		   /* One IV increment, one test, one ivtmp store and
+		      one useful stmt.  That is about minimal loop
+		      doing pure call.  */
+		   && (size.non_call_stmts_on_hot_path
+		       <= 3 + size.num_pure_calls_on_hot_path))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains just pure calls and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Complete unrolling is major win when control flow is
+	     removed and one big basic block is created.  If the loop
+	     contains control flow the optimization may still be a win
+	     because of eliminating the loop overhead but it also may
+	     blow the branch predictor tables.  Limit number of
+	     branches on the hot path through the peeled sequence.  */
+	  else if (size.num_branches_on_hot_path * (int)n_unroll
+		   > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 " number of branches on hot path in the unrolled sequence"
+			 " reach --param max-peel-branches limit.\n",
+			 loop->num);
+	      return false;
+	    }
+	  else if (unr_insns
+		   > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "(--param max-completely-peeled-insns limit reached).\n",
+			 loop->num);
+	      return false;
+	    }
 	}
       dump_printf_loc (report_flags, locus,
                        "loop turned into non-loop; it never loops.\n");
@@ -897,8 +920,9 @@  try_unroll_loop_completely (struct loop
       else
 	gimple_cond_make_true (cond);
       update_stmt (cond);
-      /* Do not remove the path. Doing so may remove outer loop
-	 and confuse bookkeeping code in tree_unroll_loops_completelly.  */
+      /* Do not remove the path. Doing so may remove outer loop and
+	 confuse bookkeeping code in
+	 tree_unroll_loops_completelly.  */
     }
 
   /* Store the loop for later unlooping and exit removal.  */
@@ -974,23 +998,33 @@  try_peel_loop (struct loop *loop,
   if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
     return false;
 
+  /* We don't peel loops that will be unrolled as this can duplicate a
+     loop more times than the user requested.  */
+  if (loop->unroll)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
+      return false;
+    }
+
   /* Peel only innermost loops.  */
   if (loop->inner)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: outer loop\n");
+	fprintf (dump_file, "Not peeling: outer loop\n");
       return false;
     }
 
   if (!optimize_loop_for_speed_p (loop))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: cold loop\n");
+	fprintf (dump_file, "Not peeling: cold loop\n");
       return false;
     }
 
   /* Check if there is an estimate on the number of iterations.  */
   npeel = estimated_loop_iterations_int (loop);
+
   if (npeel < 0)
     {
       if (dump_file)
@@ -998,10 +1032,11 @@  try_peel_loop (struct loop *loop,
 	         "estimated\n");
       return false;
     }
+
   if (maxiter >= 0 && maxiter <= npeel)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: upper bound is known so can "
+	fprintf (dump_file, "Not peeling: upper bound is known so can "
 		 "unroll completely\n");
       return false;
     }
@@ -1012,7 +1047,7 @@  try_peel_loop (struct loop *loop,
   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: rolls too much "
+	fprintf (dump_file, "Not peeling: rolls too much "
 		 "(%i + 1 > --param max-peel-times)\n", npeel);
       return false;
     }
@@ -1025,7 +1060,7 @@  try_peel_loop (struct loop *loop,
       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+	fprintf (dump_file, "Not peeling: peeled sequence size is too large "
 		 "(%i insns > --param max-peel-insns)", peeled_size);
       return false;
     }
@@ -1302,7 +1337,9 @@  tree_unroll_loops_completely_1 (bool may
   if (!loop_father)
     return false;
 
-  if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+  if (loop->unroll > 1)
+    ul = UL_ALL;
+  else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
       /* Unroll outermost loops only if asked to do so or they do
 	 not cause code growth.  */
       && (unroll_outer || loop_outer (loop_father)))
@@ -1539,7 +1576,9 @@  public:
   {}
 
   /* opt_pass methods: */
-  virtual bool gate (function *) { return optimize >= 2; }
+  virtual bool gate (function *) {
+    return optimize >= 2 || cfun->has_unroll;
+  }
   virtual unsigned int execute (function *);
 
 }; // class pass_complete_unrolli
Index: tree.def
===================================================================
--- tree.def	(revision 220084)
+++ tree.def	(working copy)
@@ -1365,8 +1365,9 @@  DEFTREECODE (TARGET_OPTION_NODE, "target
 
 /* ANNOTATE_EXPR.
    Operand 0 is the expression to be annotated.
-   Operand 1 is the annotation kind.  */
-DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 2)
+   Operand 1 is the annotation kind.
+   Operand 2 is optional data.  */
+DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 3)
 
 /* Cilk spawn statement
    Operand 0 is the CALL_EXPR.  */