From patchwork Fri Jan 30 16:27:06 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mike Stump X-Patchwork-Id: 434955 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 380661402AE for ; Sat, 31 Jan 2015 03:29:15 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :content-type:mime-version:subject:from:in-reply-to:date:cc :message-id:references:to; q=dns; s=default; b=DIVMbVsSkMck0yrjJ Vdq1isuvvJQvkqhGYOCCcGJ243b9RjyyRkUH8Z0jvndQq+VfeN5gkB7wbws5OOHN fGjfZJtnMw6Lb+plzQMRDbCuGRTxLqazNAkRzlT655DVdunHtpSRvHwjRKu5zbo3 hYp3wxyGKTb6XGAOhN4Wd1SHxA= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :content-type:mime-version:subject:from:in-reply-to:date:cc :message-id:references:to; s=default; bh=x4swcHzYbrWX+2N+9OoCqoA ItTI=; b=czF5sWmBJpIYgyQ6febpZADEIEN80mc+2aLRi/0Diis8774OUv2Mrks wsYIL7d57kREvUS5OxlOmF8VNciZh+Ydi77nfz/9QOhJ6uju+6clH81SzGE93Ik4 7lfb6zQqjd20chpSeT13kMBiKjt77NxTCke4UPJ5LKUP4GqBK/JM= Received: (qmail 23165 invoked by alias); 30 Jan 2015 16:28:37 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 23035 invoked by uid 89); 30 Jan 2015 16:28:34 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.9 required=5.0 tests=BAYES_00, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE, SPF_PASS, T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 X-HELO: resqmta-po-09v.sys.comcast.net Received: from resqmta-po-09v.sys.comcast.net (HELO resqmta-po-09v.sys.comcast.net) (96.114.154.168) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Fri, 30 Jan 2015 16:28:24 +0000 Received: from resomta-po-08v.sys.comcast.net ([96.114.154.232]) by resqmta-po-09v.sys.comcast.net with comcast id mGU21p002516pyw01GUMus; Fri, 30 Jan 2015 16:28:21 +0000 Received: from [IPv6:2001:558:6045:a4:40c6:7199:cd03:b02d] ([IPv6:2001:558:6045:a4:40c6:7199:cd03:b02d]) by resomta-po-08v.sys.comcast.net with comcast id mGUK1p0072ztT3H01GUKkk; Fri, 30 Jan 2015 16:28:20 +0000 Mime-Version: 1.0 (Mac OS X Mail 7.3 \(1878.6\)) Subject: Re: #pragma GCC unroll support From: Mike Stump In-Reply-To: Date: Fri, 30 Jan 2015 08:27:06 -0800 Cc: Richard Biener , gcc-patches Patches , Jason Merrill Message-Id: References: <0596944B-9DDC-4299-8CBC-9B6EB06BEF68@comcast.net> <9FE5AF27-EB8A-4CC3-A345-A69C2BFD8F30@comcast.net> To: Joseph Myers X-IsSubscribed: yes On Jan 30, 2015, at 7:49 AM, Joseph Myers wrote: > Use error_at, and %u directly in the format. Done. Index: ada/gcc-interface/trans.c =================================================================== --- ada/gcc-interface/trans.c (revision 220084) +++ ada/gcc-interface/trans.c (working copy) @@ -7870,17 +7870,20 @@ gnat_gimplify_stmt (tree *stmt_p) { /* Deal with the optimization hints. */ if (LOOP_STMT_IVDEP (stmt)) - gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, + gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + NULL_TREE); if (LOOP_STMT_NO_VECTOR (stmt)) - gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, + gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, build_int_cst (integer_type_node, - annot_expr_no_vector_kind)); + annot_expr_no_vector_kind), + NULL_TREE); if (LOOP_STMT_VECTOR (stmt)) - gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, + gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, build_int_cst (integer_type_node, - annot_expr_vector_kind)); + annot_expr_vector_kind), + NULL_TREE); gnu_cond = build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE, Index: c/c-parser.c =================================================================== --- c/c-parser.c (revision 220084) +++ c/c-parser.c (working copy) @@ -1217,9 +1217,9 @@ static void c_parser_statement (c_parser static void c_parser_statement_after_labels (c_parser *); static void c_parser_if_statement (c_parser *); static void c_parser_switch_statement (c_parser *); -static void c_parser_while_statement (c_parser *, bool); -static void c_parser_do_statement (c_parser *, bool); -static void c_parser_for_statement (c_parser *, bool); +static void c_parser_while_statement (c_parser *, bool, unsigned short); +static void c_parser_do_statement (c_parser *, bool, unsigned short); +static void c_parser_for_statement (c_parser *, bool, unsigned short); static tree c_parser_asm_statement (c_parser *); static tree c_parser_asm_operands (c_parser *); static tree c_parser_asm_goto_operands (c_parser *); @@ -4972,13 +4972,13 @@ c_parser_statement_after_labels (c_parse c_parser_switch_statement (parser); break; case RID_WHILE: - c_parser_while_statement (parser, false); + c_parser_while_statement (parser, false, 0); break; case RID_DO: - c_parser_do_statement (parser, false); + c_parser_do_statement (parser, false, 0); break; case RID_FOR: - c_parser_for_statement (parser, false); + c_parser_for_statement (parser, false, 0); break; case RID_CILK_FOR: if (!flag_cilkplus) @@ -5340,7 +5340,7 @@ c_parser_switch_statement (c_parser *par */ static void -c_parser_while_statement (c_parser *parser, bool ivdep) +c_parser_while_statement (c_parser *parser, bool ivdep, unsigned short unroll) { tree block, cond, body, save_break, save_cont; location_t loc; @@ -5354,9 +5354,15 @@ c_parser_while_statement (c_parser *pars "%<_Cilk_spawn%> statement cannot be used as a condition for while statement")) cond = error_mark_node; if (ivdep && cond != error_mark_node) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + NULL_TREE); + if (unroll && cond != error_mark_node) + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + build_int_cst (integer_type_node, + annot_expr_unroll_kind), + build_int_cst (integer_type_node, unroll)); save_break = c_break_label; c_break_label = NULL_TREE; save_cont = c_cont_label; @@ -5375,7 +5381,7 @@ c_parser_while_statement (c_parser *pars */ static void -c_parser_do_statement (c_parser *parser, bool ivdep) +c_parser_do_statement (c_parser *parser, bool ivdep, unsigned short unroll) { tree block, cond, body, save_break, save_cont, new_break, new_cont; location_t loc; @@ -5403,9 +5409,16 @@ c_parser_do_statement (c_parser *parser, "%<_Cilk_spawn%> statement cannot be used as a condition for a do-while statement")) cond = error_mark_node; if (ivdep && cond != error_mark_node) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + build_int_cst (integer_type_node, + annot_expr_ivdep_kind), + NULL_TREE); + if (unroll && cond != error_mark_node) + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_unroll_kind), + build_int_cst (integer_type_node, + unroll)); if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>")) c_parser_skip_to_end_of_block_or_statement (parser); c_finish_loop (loc, cond, NULL, body, new_break, new_cont, false); @@ -5469,7 +5482,7 @@ c_parser_do_statement (c_parser *parser, */ static void -c_parser_for_statement (c_parser *parser, bool ivdep) +c_parser_for_statement (c_parser *parser, bool ivdep, unsigned short unroll) { tree block, cond, incr, save_break, save_cont, body; /* The following are only used when parsing an ObjC foreach statement. */ @@ -5587,6 +5600,12 @@ c_parser_for_statement (c_parser *parser "% pragma"); cond = error_mark_node; } + else if (unroll) + { + c_parser_error (parser, "missing loop condition in loop with " + "% pragma"); + cond = error_mark_node; + } else { c_parser_consume_token (parser); @@ -5604,9 +5623,15 @@ c_parser_for_statement (c_parser *parser "expected %<;%>"); } if (ivdep && cond != error_mark_node) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + build_int_cst (integer_type_node, + annot_expr_ivdep_kind), + NULL_TREE); + if (unroll && cond != error_mark_node) + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_unroll_kind), + build_int_cst (integer_type_node, unroll)); } /* Parse the increment expression (the third expression in a for-statement). In the case of a foreach-statement, this is @@ -9592,6 +9617,45 @@ c_parser_objc_at_dynamic_declaration (c_ } +static bool +c_parse_pragma_ivdep (c_parser *parser) +{ + c_parser_consume_pragma (parser); + c_parser_skip_to_pragma_eol (parser); + return true; +} + +static unsigned short +c_parser_pragma_unroll (c_parser *parser) +{ + unsigned short unroll; + c_parser_consume_pragma (parser); + location_t location = c_parser_peek_token (parser)->location; + tree expr = c_parser_expr_no_commas (parser, NULL).value; + mark_exp_read (expr); + expr = c_fully_fold (expr, false, NULL); + HOST_WIDE_INT lunroll = 0; + if (!INTEGRAL_TYPE_P (TREE_TYPE (expr)) + || TREE_CODE (expr) != INTEGER_CST + || (lunroll = tree_to_shwi (expr)) < 0 + || lunroll > USHRT_MAX) + { + error_at (location, "%<#pragma GCC unroll%> requires an" + " assignment-expression that evaluates to a non-negative" + " integral constant less than or equal to %u", USHRT_MAX); + unroll = 0; + } + else + { + unroll = (unsigned short) lunroll; + if (unroll == 0) + unroll = 1; + } + + c_parser_skip_to_pragma_eol (parser); + return unroll; +} + /* Handle pragmas. Some OpenMP pragmas are associated with, and therefore should be considered, statements. ALLOW_STMT is true if we're within the context of a function and such pragmas are to be allowed. Returns @@ -9714,21 +9778,46 @@ c_parser_pragma (c_parser *parser, enum c_parser_omp_declare (parser, context); return false; case PRAGMA_IVDEP: - c_parser_consume_pragma (parser); - c_parser_skip_to_pragma_eol (parser); - if (!c_parser_next_token_is_keyword (parser, RID_FOR) - && !c_parser_next_token_is_keyword (parser, RID_WHILE) - && !c_parser_next_token_is_keyword (parser, RID_DO)) - { - c_parser_error (parser, "for, while or do statement expected"); - return false; - } - if (c_parser_next_token_is_keyword (parser, RID_FOR)) - c_parser_for_statement (parser, true); - else if (c_parser_next_token_is_keyword (parser, RID_WHILE)) - c_parser_while_statement (parser, true); - else - c_parser_do_statement (parser, true); + { + bool ivdep = c_parse_pragma_ivdep (parser); + unsigned short unroll = 0; + if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_UNROLL) + unroll = c_parser_pragma_unroll (parser); + if (!c_parser_next_token_is_keyword (parser, RID_FOR) + && !c_parser_next_token_is_keyword (parser, RID_WHILE) + && !c_parser_next_token_is_keyword (parser, RID_DO)) + { + c_parser_error (parser, "for, while or do statement expected"); + return false; + } + if (c_parser_next_token_is_keyword (parser, RID_FOR)) + c_parser_for_statement (parser, ivdep, unroll); + else if (c_parser_next_token_is_keyword (parser, RID_WHILE)) + c_parser_while_statement (parser, ivdep, unroll); + else + c_parser_do_statement (parser, ivdep, unroll); + } + return false; + case PRAGMA_UNROLL: + { + unsigned short unroll = c_parser_pragma_unroll (parser); + bool ivdep = false; + if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_IVDEP) + ivdep = c_parse_pragma_ivdep (parser); + if (!c_parser_next_token_is_keyword (parser, RID_FOR) + && !c_parser_next_token_is_keyword (parser, RID_WHILE) + && !c_parser_next_token_is_keyword (parser, RID_DO)) + { + c_parser_error (parser, "for, while or do statement expected"); + return false; + } + if (c_parser_next_token_is_keyword (parser, RID_FOR)) + c_parser_for_statement (parser, ivdep, unroll); + else if (c_parser_next_token_is_keyword (parser, RID_WHILE)) + c_parser_while_statement (parser, ivdep, unroll); + else + c_parser_do_statement (parser, ivdep, unroll); + } return false; case PRAGMA_GCC_PCH_PREPROCESS: Index: c-family/c-pragma.c =================================================================== --- c-family/c-pragma.c (revision 220084) +++ c-family/c-pragma.c (working copy) @@ -1456,6 +1456,10 @@ init_pragma (void) cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, false, false); + if (!flag_preprocess_only) + cpp_register_deferred_pragma (parse_in, "GCC", "unroll", PRAGMA_UNROLL, false, + false); + if (flag_cilkplus && !flag_preprocess_only) cpp_register_deferred_pragma (parse_in, "cilk", "grainsize", PRAGMA_CILK_GRAINSIZE, true, false); Index: c-family/c-pragma.h =================================================================== --- c-family/c-pragma.h (revision 220084) +++ c-family/c-pragma.h (working copy) @@ -69,6 +69,7 @@ typedef enum pragma_kind { PRAGMA_GCC_PCH_PREPROCESS, PRAGMA_IVDEP, + PRAGMA_UNROLL, PRAGMA_FIRST_EXTERNAL } pragma_kind; Index: cfgloop.h =================================================================== --- cfgloop.h (revision 220084) +++ cfgloop.h (working copy) @@ -189,6 +189,11 @@ struct GTY ((chain_next ("%h.next"))) lo of the loop can be safely evaluated concurrently. */ int safelen; + /* The number of times to unroll the loop. 0, means no information + given, just do what we always do. A value of 1, means don't unroll + the loop. */ + unsigned short unroll; + /* True if this loop should never be vectorized. */ bool dont_vectorize; Index: cfgloopmanip.c =================================================================== --- cfgloopmanip.c (revision 220084) +++ cfgloopmanip.c (working copy) @@ -1038,6 +1038,7 @@ copy_loop_info (struct loop *loop, struc target->estimate_state = loop->estimate_state; target->warned_aggressive_loop_optimizations |= loop->warned_aggressive_loop_optimizations; + target->unroll = loop->unroll; } /* Copies copy of LOOP as subloop of TARGET loop, placing newly Index: cp/cp-array-notation.c =================================================================== --- cp/cp-array-notation.c (revision 220084) +++ cp/cp-array-notation.c (working copy) @@ -81,7 +81,7 @@ create_an_loop (tree init, tree cond, tr finish_expr_stmt (init); for_stmt = begin_for_stmt (NULL_TREE, NULL_TREE); finish_for_init_stmt (for_stmt); - finish_for_cond (cond, for_stmt, false); + finish_for_cond (cond, for_stmt, false, 0); finish_for_expr (incr, for_stmt); finish_expr_stmt (body); finish_for_stmt (for_stmt); Index: cp/cp-tree.h =================================================================== --- cp/cp-tree.h (revision 220084) +++ cp/cp-tree.h (working copy) @@ -5644,7 +5644,7 @@ extern tree implicitly_declare_fn extern bool maybe_clone_body (tree); /* In parser.c */ -extern tree cp_convert_range_for (tree, tree, tree, bool); +extern tree cp_convert_range_for (tree, tree, tree, bool, unsigned short); extern bool parsing_nsdmi (void); extern void inject_this_parameter (tree, cp_cv_quals); @@ -5880,16 +5880,16 @@ extern void begin_else_clause (tree); extern void finish_else_clause (tree); extern void finish_if_stmt (tree); extern tree begin_while_stmt (void); -extern void finish_while_stmt_cond (tree, tree, bool); +extern void finish_while_stmt_cond (tree, tree, bool, unsigned short); extern void finish_while_stmt (tree); extern tree begin_do_stmt (void); extern void finish_do_body (tree); -extern void finish_do_stmt (tree, tree, bool); +extern void finish_do_stmt (tree, tree, bool, unsigned short); extern tree finish_return_stmt (tree); extern tree begin_for_scope (tree *); extern tree begin_for_stmt (tree, tree); extern void finish_for_init_stmt (tree); -extern void finish_for_cond (tree, tree, bool); +extern void finish_for_cond (tree, tree, bool, unsigned short); extern void finish_for_expr (tree, tree); extern void finish_for_stmt (tree); extern tree begin_range_for_stmt (tree, tree); Index: cp/init.c =================================================================== --- cp/init.c (revision 220084) +++ cp/init.c (working copy) @@ -3693,7 +3693,7 @@ build_vec_init (tree base, tree maxindex finish_for_init_stmt (for_stmt); finish_for_cond (build2 (NE_EXPR, boolean_type_node, iterator, build_int_cst (TREE_TYPE (iterator), -1)), - for_stmt, false); + for_stmt, false, 0); elt_init = cp_build_unary_op (PREDECREMENT_EXPR, iterator, 0, complain); if (elt_init == error_mark_node) Index: cp/parser.c =================================================================== --- cp/parser.c (revision 220084) +++ cp/parser.c (working copy) @@ -2044,15 +2044,15 @@ static tree cp_parser_selection_statemen static tree cp_parser_condition (cp_parser *); static tree cp_parser_iteration_statement - (cp_parser *, bool); + (cp_parser *, bool, unsigned short); static bool cp_parser_for_init_statement (cp_parser *, tree *decl); static tree cp_parser_for - (cp_parser *, bool); + (cp_parser *, bool, unsigned short); static tree cp_parser_c_for - (cp_parser *, tree, tree, bool); + (cp_parser *, tree, tree, bool, unsigned short); static tree cp_parser_range_for - (cp_parser *, tree, tree, tree, bool); + (cp_parser *, tree, tree, tree, bool, unsigned short); static void do_range_for_auto_deduction (tree, tree); static tree cp_parser_perform_range_for_lookup @@ -9698,7 +9698,7 @@ cp_parser_statement (cp_parser* parser, case RID_WHILE: case RID_DO: case RID_FOR: - statement = cp_parser_iteration_statement (parser, false); + statement = cp_parser_iteration_statement (parser, false, 0); break; case RID_CILK_FOR: @@ -10390,7 +10390,7 @@ cp_parser_condition (cp_parser* parser) not included. */ static tree -cp_parser_for (cp_parser *parser, bool ivdep) +cp_parser_for (cp_parser *parser, bool ivdep, unsigned short unroll) { tree init, scope, decl; bool is_range_for; @@ -10402,13 +10402,14 @@ cp_parser_for (cp_parser *parser, bool i is_range_for = cp_parser_for_init_statement (parser, &decl); if (is_range_for) - return cp_parser_range_for (parser, scope, init, decl, ivdep); + return cp_parser_range_for (parser, scope, init, decl, ivdep, unroll); else - return cp_parser_c_for (parser, scope, init, ivdep); + return cp_parser_c_for (parser, scope, init, ivdep, unroll); } static tree -cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep) +cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep, + unsigned short unroll) { /* Normal for loop */ tree condition = NULL_TREE; @@ -10429,7 +10430,13 @@ cp_parser_c_for (cp_parser *parser, tree "% pragma"); condition = error_mark_node; } - finish_for_cond (condition, stmt, ivdep); + else if (unroll) + { + cp_parser_error (parser, "missing loop condition in loop with " + "% pragma"); + condition = error_mark_node; + } + finish_for_cond (condition, stmt, ivdep, unroll); /* Look for the `;'. */ cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON); @@ -10453,7 +10460,7 @@ cp_parser_c_for (cp_parser *parser, tree static tree cp_parser_range_for (cp_parser *parser, tree scope, tree init, tree range_decl, - bool ivdep) + bool ivdep, unsigned short unroll) { tree stmt, range_expr; @@ -10474,6 +10481,8 @@ cp_parser_range_for (cp_parser *parser, stmt = begin_range_for_stmt (scope, init); if (ivdep) RANGE_FOR_IVDEP (stmt) = 1; + if (unroll) + /* TODO */(void)0; finish_range_for_decl (stmt, range_decl, range_expr); if (!type_dependent_expression_p (range_expr) /* do_auto_deduction doesn't mess with template init-lists. */ @@ -10483,7 +10492,7 @@ cp_parser_range_for (cp_parser *parser, else { stmt = begin_for_stmt (scope, init); - stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep); + stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep, unroll); } return stmt; } @@ -10575,7 +10584,7 @@ do_range_for_auto_deduction (tree decl, tree cp_convert_range_for (tree statement, tree range_decl, tree range_expr, - bool ivdep) + bool ivdep, unsigned short unroll) { tree begin, end; tree iter_type, begin_expr, end_expr; @@ -10632,7 +10641,7 @@ cp_convert_range_for (tree statement, tr begin, ERROR_MARK, end, ERROR_MARK, NULL, tf_warning_or_error); - finish_for_cond (condition, statement, ivdep); + finish_for_cond (condition, statement, ivdep, unroll); /* The new increment expression. */ expression = finish_unary_op_expr (input_location, @@ -10793,7 +10802,8 @@ cp_parser_range_for_member_function (tre Returns the new WHILE_STMT, DO_STMT, FOR_STMT or RANGE_FOR_STMT. */ static tree -cp_parser_iteration_statement (cp_parser* parser, bool ivdep) +cp_parser_iteration_statement (cp_parser* parser, bool ivdep, + unsigned short unroll) { cp_token *token; enum rid keyword; @@ -10823,7 +10833,7 @@ cp_parser_iteration_statement (cp_parser cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN); /* Parse the condition. */ condition = cp_parser_condition (parser); - finish_while_stmt_cond (condition, statement, ivdep); + finish_while_stmt_cond (condition, statement, ivdep, unroll); /* Look for the `)'. */ cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN); /* Parse the dependent statement. */ @@ -10853,7 +10863,7 @@ cp_parser_iteration_statement (cp_parser /* Parse the expression. */ expression = cp_parser_expression (parser); /* We're done with the do-statement. */ - finish_do_stmt (expression, statement, ivdep); + finish_do_stmt (expression, statement, ivdep, unroll); /* Look for the `)'. */ cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN); /* Look for the `;'. */ @@ -10866,7 +10876,7 @@ cp_parser_iteration_statement (cp_parser /* Look for the `('. */ cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN); - statement = cp_parser_for (parser, ivdep); + statement = cp_parser_for (parser, ivdep, unroll); /* Look for the `)'. */ cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN); @@ -32901,6 +32911,41 @@ cp_parser_cilk_grainsize (cp_parser *par cp_parser_skip_to_pragma_eol (parser, pragma_tok); } +static bool +cp_parser_pragma_ivdep (cp_parser *parser, cp_token *pragma_tok) +{ + cp_parser_skip_to_pragma_eol (parser, pragma_tok); + return true; +} + +static unsigned short +cp_parser_pragma_unroll (cp_parser *parser, cp_token *pragma_tok) +{ + location_t location = cp_lexer_peek_token (parser->lexer)->location; + tree expr = cp_parser_constant_expression (parser); + unsigned short unroll; + expr = maybe_constant_value (expr); + cp_parser_require_pragma_eol (parser, pragma_tok); + HOST_WIDE_INT lunroll = 0; + if (!INTEGRAL_TYPE_P (TREE_TYPE (expr)) + || TREE_CODE (expr) != INTEGER_CST + || (lunroll = tree_to_shwi (expr)) < 0 + || lunroll > USHRT_MAX) + { + error_at (location, "%<#pragma GCC unroll%> requires an" + " assignment-expression that evaluates to a non-negative" + " integral constant less than or equal to %u", USHRT_MAX); + unroll = 0; + } + else + { + unroll = (unsigned short) lunroll; + if (unroll == 0) + unroll = 1; + } + return unroll; +} + /* Normal parsing of a pragma token. Here we can (and must) use the regular lexer. */ @@ -33068,9 +33113,39 @@ cp_parser_pragma (cp_parser *parser, enu case PRAGMA_IVDEP: { - cp_parser_skip_to_pragma_eol (parser, pragma_tok); + bool ivdep = cp_parser_pragma_ivdep (parser, pragma_tok); + unsigned short unroll = 0; cp_token *tok; tok = cp_lexer_peek_token (the_parser->lexer); + if (tok->type == CPP_PRAGMA && + tok->pragma_kind == PRAGMA_UNROLL) + { + unroll = cp_parser_pragma_unroll (parser, pragma_tok); + tok = cp_lexer_peek_token (the_parser->lexer); + } + if (tok->type != CPP_KEYWORD + || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE + && tok->keyword != RID_DO)) + { + cp_parser_error (parser, "for, while or do statement expected"); + return false; + } + cp_parser_iteration_statement (parser, ivdep, unroll); + return true; + } + + case PRAGMA_UNROLL: + { + unsigned short unroll = cp_parser_pragma_unroll (parser, pragma_tok); + bool ivdep = false; + cp_token *tok; + tok = cp_lexer_peek_token (the_parser->lexer); + if (tok->type == CPP_PRAGMA && + tok->pragma_kind == PRAGMA_IVDEP) + { + ivdep = cp_parser_pragma_ivdep (parser, tok); + tok = cp_lexer_peek_token (the_parser->lexer); + } if (tok->type != CPP_KEYWORD || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE && tok->keyword != RID_DO)) @@ -33078,7 +33153,7 @@ cp_parser_pragma (cp_parser *parser, enu cp_parser_error (parser, "for, while or do statement expected"); return false; } - cp_parser_iteration_statement (parser, true); + cp_parser_iteration_statement (parser, ivdep, unroll); return true; } Index: cp/pt.c =================================================================== --- cp/pt.c (revision 220084) +++ cp/pt.c (working copy) @@ -13886,7 +13886,7 @@ tsubst_expr (tree t, tree args, tsubst_f RECUR (FOR_INIT_STMT (t)); finish_for_init_stmt (stmt); tmp = RECUR (FOR_COND (t)); - finish_for_cond (tmp, stmt, false); + finish_for_cond (tmp, stmt, false, 0); tmp = RECUR (FOR_EXPR (t)); finish_for_expr (tmp, stmt); RECUR (FOR_BODY (t)); @@ -13901,7 +13901,7 @@ tsubst_expr (tree t, tree args, tsubst_f decl = tsubst (decl, args, complain, in_decl); maybe_push_decl (decl); expr = RECUR (RANGE_FOR_EXPR (t)); - stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t)); + stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t), 0); RECUR (RANGE_FOR_BODY (t)); finish_for_stmt (stmt); } @@ -13910,7 +13910,7 @@ tsubst_expr (tree t, tree args, tsubst_f case WHILE_STMT: stmt = begin_while_stmt (); tmp = RECUR (WHILE_COND (t)); - finish_while_stmt_cond (tmp, stmt, false); + finish_while_stmt_cond (tmp, stmt, false, 0); RECUR (WHILE_BODY (t)); finish_while_stmt (stmt); break; @@ -13920,7 +13920,7 @@ tsubst_expr (tree t, tree args, tsubst_f RECUR (DO_BODY (t)); finish_do_body (stmt); tmp = RECUR (DO_COND (t)); - finish_do_stmt (tmp, stmt, false); + finish_do_stmt (tmp, stmt, false, 0); break; case IF_STMT: @@ -14358,8 +14358,10 @@ tsubst_expr (tree t, tree args, tsubst_f case ANNOTATE_EXPR: tmp = RECUR (TREE_OPERAND (t, 0)); - RETURN (build2_loc (EXPR_LOCATION (t), ANNOTATE_EXPR, - TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1)))); + RETURN (build3_loc (EXPR_LOCATION (t), ANNOTATE_EXPR, + TREE_TYPE (tmp), tmp, + RECUR (TREE_OPERAND (t, 1)), + RECUR (TREE_OPERAND (t, 2)))); default: gcc_assert (!STATEMENT_CODE_P (TREE_CODE (t))); Index: cp/semantics.c =================================================================== --- cp/semantics.c (revision 220084) +++ cp/semantics.c (working copy) @@ -802,7 +802,8 @@ begin_while_stmt (void) WHILE_STMT. */ void -finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep) +finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep, + unsigned short unroll) { if (check_no_cilk (cond, "Cilk array notation cannot be used as a condition for while statement", @@ -812,11 +813,19 @@ finish_while_stmt_cond (tree cond, tree finish_cond (&WHILE_COND (while_stmt), cond); begin_maybe_infinite_loop (cond); if (ivdep && cond != error_mark_node) - WHILE_COND (while_stmt) = build2 (ANNOTATE_EXPR, + WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR, TREE_TYPE (WHILE_COND (while_stmt)), WHILE_COND (while_stmt), build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + NULL_TREE); + if (unroll && cond != error_mark_node) + WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR, + TREE_TYPE (WHILE_COND (while_stmt)), + WHILE_COND (while_stmt), + build_int_cst (integer_type_node, + annot_expr_unroll_kind), + build_int_cst (integer_type_node, unroll)); simplify_loop_decl_cond (&WHILE_COND (while_stmt), WHILE_BODY (while_stmt)); } @@ -861,7 +870,7 @@ finish_do_body (tree do_stmt) COND is as indicated. */ void -finish_do_stmt (tree cond, tree do_stmt, bool ivdep) +finish_do_stmt (tree cond, tree do_stmt, bool ivdep, unsigned short unroll) { if (check_no_cilk (cond, "Cilk array notation cannot be used as a condition for a do-while statement", @@ -870,8 +879,13 @@ finish_do_stmt (tree cond, tree do_stmt, cond = maybe_convert_cond (cond); end_maybe_infinite_loop (cond); if (ivdep && cond != error_mark_node) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, - build_int_cst (integer_type_node, annot_expr_ivdep_kind)); + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + build_int_cst (integer_type_node, annot_expr_ivdep_kind), + NULL_TREE); + if (unroll && cond != error_mark_node) + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + build_int_cst (integer_type_node, annot_expr_unroll_kind), + build_int_cst (integer_type_node, unroll)); DO_COND (do_stmt) = cond; } @@ -974,7 +988,7 @@ finish_for_init_stmt (tree for_stmt) FOR_STMT. */ void -finish_for_cond (tree cond, tree for_stmt, bool ivdep) +finish_for_cond (tree cond, tree for_stmt, bool ivdep, unsigned short unroll) { if (check_no_cilk (cond, "Cilk array notation cannot be used in a condition for a for-loop", @@ -984,11 +998,20 @@ finish_for_cond (tree cond, tree for_stm finish_cond (&FOR_COND (for_stmt), cond); begin_maybe_infinite_loop (cond); if (ivdep && cond != error_mark_node) - FOR_COND (for_stmt) = build2 (ANNOTATE_EXPR, + FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR, TREE_TYPE (FOR_COND (for_stmt)), FOR_COND (for_stmt), build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + NULL_TREE); + if (unroll && cond != error_mark_node) + FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR, + TREE_TYPE (FOR_COND (for_stmt)), + FOR_COND (for_stmt), + build_int_cst (integer_type_node, + annot_expr_unroll_kind), + build_int_cst (integer_type_node, + unroll)); simplify_loop_decl_cond (&FOR_COND (for_stmt), FOR_BODY (for_stmt)); } Index: doc/extend.texi =================================================================== --- doc/extend.texi (revision 220084) +++ doc/extend.texi (working copy) @@ -17881,6 +17881,18 @@ void ignore_vec_dep (int *a, int k, int @} @end smallexample +@table @code +@item #pragma GCC unroll @var{n} +@cindex pragma GCC unroll @var{n} + +With this pragma, the programmer informs the optimizer how many times +a loop should be unrolled. A 0 or 1 informs the compiler to not +perform any loop unrolling. The pragma must be immediately before +@samp{#pragma ivdep} or a @code{for}, @code{while} or @code{do} loop +and applies only to the loop that follows. @var{n} is an +assignment-expression that evaluates to an integer constant. + +@end table @node Unnamed Fields @section Unnamed struct/union fields within structs/unions Index: fortran/trans-stmt.c =================================================================== --- fortran/trans-stmt.c (revision 220084) +++ fortran/trans-stmt.c (working copy) @@ -2876,9 +2876,10 @@ gfc_trans_forall_loop (forall_info *fora cond = fold_build2_loc (input_location, LE_EXPR, boolean_type_node, count, build_int_cst (TREE_TYPE (count), 0)); if (forall_tmp->do_concurrent) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + NULL_TREE); tmp = build1_v (GOTO_EXPR, exit_label); tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, Index: function.h =================================================================== --- function.h (revision 220084) +++ function.h (working copy) @@ -670,6 +670,10 @@ struct GTY(()) function { /* Set when the tail call has been identified. */ unsigned int tail_call_marked : 1; + + /* Set when #pragma unroll has been used in the body. Used by rtl + unrolling to know when to perform unrolling in the function. */ + unsigned int has_unroll : 1; }; /* Add the decl D to the local_decls list of FUN. */ Index: gimple-low.c =================================================================== --- gimple-low.c (revision 220084) +++ gimple-low.c (working copy) @@ -347,7 +347,7 @@ lower_stmt (gimple_stmt_iterator *gsi, s for (i = 0; i < gimple_call_num_args (stmt); i++) { tree arg = gimple_call_arg (stmt, i); - if (EXPR_P (arg)) + if (arg && EXPR_P (arg)) TREE_SET_BLOCK (arg, data->block); } Index: gimple-walk.c =================================================================== --- gimple-walk.c (revision 220084) +++ gimple-walk.c (working copy) @@ -261,7 +261,7 @@ walk_gimple_op (gimple stmt, walk_tree_f for (i = 0; i < gimple_call_num_args (stmt); i++) { - if (wi) + if (wi && gimple_call_arg (stmt, i)) wi->val_only = is_gimple_reg_type (TREE_TYPE (gimple_call_arg (stmt, i))); ret = walk_tree (gimple_call_arg_ptr (stmt, i), callback_op, wi, Index: gimplify.c =================================================================== --- gimplify.c (revision 220084) +++ gimplify.c (working copy) @@ -2908,6 +2908,9 @@ gimple_boolify (tree expr) case ANNOTATE_EXPR: switch ((enum annot_expr_kind) TREE_INT_CST_LOW (TREE_OPERAND (expr, 1))) { + case annot_expr_unroll_kind: + cfun->has_unroll = 1; + /* fall-through */ case annot_expr_ivdep_kind: case annot_expr_no_vector_kind: case annot_expr_vector_kind: @@ -7947,6 +7950,7 @@ gimplify_expr (tree *expr_p, gimple_seq { tree cond = TREE_OPERAND (*expr_p, 0); tree kind = TREE_OPERAND (*expr_p, 1); + tree data = TREE_OPERAND (*expr_p, 2); tree type = TREE_TYPE (cond); if (!INTEGRAL_TYPE_P (type)) { @@ -7957,7 +7961,7 @@ gimplify_expr (tree *expr_p, gimple_seq tree tmp = create_tmp_var (type); gimplify_arg (&cond, pre_p, EXPR_LOCATION (*expr_p)); gcall *call - = gimple_build_call_internal (IFN_ANNOTATE, 2, cond, kind); + = gimple_build_call_internal (IFN_ANNOTATE, 3, cond, kind, data); gimple_call_set_lhs (call, tmp); gimplify_seq_add_stmt (pre_p, call); *expr_p = tmp; Index: loop-init.c =================================================================== --- loop-init.c (revision 220084) +++ loop-init.c (working copy) @@ -375,6 +375,7 @@ pass_loop2::gate (function *fun) && (flag_move_loop_invariants || flag_unswitch_loops || flag_unroll_loops + || cfun->has_unroll #ifdef HAVE_doloop_end || (flag_branch_on_count_reg && HAVE_doloop_end) #endif @@ -576,7 +577,8 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops); + return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops + || cfun->has_unroll); } virtual unsigned int execute (function *); Index: loop-unroll.c =================================================================== --- loop-unroll.c (revision 220084) +++ loop-unroll.c (working copy) @@ -243,16 +243,26 @@ report_unroll (struct loop *loop, locati /* Decide whether unroll loops and how much. */ static void -decide_unrolling (int flags) +decide_unrolling (int base_flags) { struct loop *loop; /* Scan the loops, inner ones first. */ FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) { + int flags = base_flags; + if (loop->unroll > 1) + flags = UAP_UNROLL | UAP_UNROLL_ALL; loop->lpt_decision.decision = LPT_NONE; location_t locus = get_loop_location (loop); + if (loop->unroll == 1) + { + dump_printf_loc (TDF_RTL, locus, + "not unrolling loop, user didn't want it unrolled\n"); + continue; + } + if (dump_enabled_p ()) dump_printf_loc (TDF_RTL, locus, ";; *** Considering loop %d at BB %d for " @@ -422,6 +432,19 @@ decide_unroll_constant_iterations (struc return; } + if (loop->unroll) + { + loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; + loop->lpt_decision.times = loop->unroll - 1; + if (loop->lpt_decision.times > desc->niter - 2) + { + /* They won't do this for us. */ + loop->lpt_decision.decision = LPT_NONE; + loop->lpt_decision.times = desc->niter - 2; + } + return; + } + /* Check whether the loop rolls enough to consider. Consult also loop bounds and profile; in the case the loop has more than one exit it may well loop less than determined maximal number @@ -443,7 +466,7 @@ decide_unroll_constant_iterations (struc best_copies = 2 * nunroll + 10; i = 2 * nunroll + 2; - if (i - 1 >= desc->niter) + if (i > desc->niter - 2) i = desc->niter - 2; for (; i >= nunroll - 1; i--) @@ -695,6 +718,9 @@ decide_unroll_runtime_iterations (struct if (targetm.loop_unroll_adjust) nunroll = targetm.loop_unroll_adjust (nunroll, loop); + if (loop->unroll) + nunroll = loop->unroll; + /* Skip big loops. */ if (nunroll <= 1) { @@ -733,8 +759,9 @@ decide_unroll_runtime_iterations (struct return; } - /* Success; now force nunroll to be power of 2, as we are unable to - cope with overflows in computation of number of iterations. */ + /* Success; now force nunroll to be power of 2, as code-gen + requires it, we are unable to cope with overflows in + computation of number of iterations. */ for (i = 1; 2 * i <= nunroll; i *= 2) continue; @@ -843,9 +870,10 @@ compare_and_jump_seq (rtx op0, rtx op1, return seq; } -/* Unroll LOOP for which we are able to count number of iterations in runtime - LOOP->LPT_DECISION.TIMES times. The transformation does this (with some - extra care for case n < 0): +/* Unroll LOOP for which we are able to count number of iterations in + runtime LOOP->LPT_DECISION.TIMES times. The times value must be a + power of two. The transformation does this (with some extra care + for case n < 0): for (i = 0; i < n; i++) body; @@ -1142,6 +1170,9 @@ decide_unroll_stupid (struct loop *loop, if (targetm.loop_unroll_adjust) nunroll = targetm.loop_unroll_adjust (nunroll, loop); + if (loop->unroll) + nunroll = loop->unroll; + /* Skip big loops. */ if (nunroll <= 1) { Index: lto-streamer-in.c =================================================================== --- lto-streamer-in.c (revision 220084) +++ lto-streamer-in.c (working copy) @@ -751,6 +751,7 @@ input_cfg (struct lto_input_block *ib, s /* Read OMP SIMD related info. */ loop->safelen = streamer_read_hwi (ib); + loop->unroll = streamer_read_hwi (ib); loop->dont_vectorize = streamer_read_hwi (ib); loop->force_vectorize = streamer_read_hwi (ib); loop->simduid = stream_read_tree (ib, data_in); Index: lto-streamer-out.c =================================================================== --- lto-streamer-out.c (revision 220084) +++ lto-streamer-out.c (working copy) @@ -1884,6 +1884,7 @@ output_cfg (struct output_block *ob, str /* Write OMP SIMD related info. */ streamer_write_hwi (ob, loop->safelen); + streamer_write_hwi (ob, loop->unroll); streamer_write_hwi (ob, loop->dont_vectorize); streamer_write_hwi (ob, loop->force_vectorize); stream_write_tree (ob, loop->simduid, true); Index: testsuite/c-c++-common/unroll-1.c =================================================================== --- testsuite/c-c++-common/unroll-1.c (revision 0) +++ testsuite/c-c++-common/unroll-1.c (working copy) @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdisable-tree-cunroll -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */ + +void bar(int); + +int j; + +void test1() +{ + unsigned long m = j; + unsigned long i; + + /* { dg-final { scan-tree-dump "loop with 9 iterations completely unrolled" "cunrolli" } } */ + #pragma GCC unroll 8 + for (unsigned long i = 1; i <= 8; ++i) + bar(i); + + /* { dg-final { scan-rtl-dump "21:\(5|11\): note: loop unrolled 7 times" "loop2_unroll" } } */ + #pragma GCC unroll 8 + for (unsigned long i = 1; i <= j; ++i) + bar(i); + + /* { dg-final { scan-rtl-dump "26:\(5|11\): note: loop unrolled 3 times" "loop2_unroll" } } */ + #pragma GCC unroll 7 + for (unsigned long i = 1; i <= j; ++i) + bar(i); + + /* { dg-final { scan-rtl-dump "3\[31\]:3: note: loop unrolled 2 times" "loop2_unroll" } } */ + i = 0; + #pragma GCC unroll 3 + do { + bar(i); + } while (++i < 9); + + #pragma GCC unroll 4+4 + for (unsigned long i = 1; i <= 8; ++i) + bar(i); +} + +/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */ Index: testsuite/c-c++-common/unroll-2.c =================================================================== --- testsuite/c-c++-common/unroll-2.c (revision 0) +++ testsuite/c-c++-common/unroll-2.c (working copy) @@ -0,0 +1,68 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */ + +void bar(int); + +int j; + +void test1() +{ + unsigned long m = j; + unsigned long i; + + /* { dg-final { scan-tree-dump "15:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */ + #pragma GCC unroll 8 + for (unsigned long i = 1; i <= 8; ++i) + bar(i); + + /* { dg-final { scan-rtl-dump "\(19|21\):\(5|11\): note: loop unrolled 7 times" "loop2_unroll" } } */ + #pragma GCC unroll 8 + for (unsigned long i = 1; i <= j; ++i) + bar(i); + + /* { dg-final { scan-rtl-dump "26:\[0-9\]*: note: loop unrolled 3 times" "loop2_unroll" } } */ + #pragma GCC unroll 7 + for (unsigned long i = 1; i <= j; ++i) + bar(i); + + /* { dg-final { scan-rtl-dump "3\[13\]:\[0-9\]*: note: loop unrolled 2 times" "loop2_unroll" } } */ + i = 0; + #pragma GCC unroll 3 + do { + bar(i); + } while (++i < 9); +} + +void test2 () { + unsigned long m = j; + unsigned long i; + + /* { dg-final { scan-tree-dump "\[424\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */ + #pragma GCC unroll 8 + for (unsigned long i = 1; i <= 7; ++i) + bar(i); + + /* { dg-final { scan-tree-dump "4\[79\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */ + #pragma GCC unroll 9 + for (unsigned long i = 1; i <= 8; ++i) + bar(i); + + /* { dg-final { scan-rtl-dump "52:\[0-9\]*: note: loop unrolled 3 times" "loop2_unroll" } } */ + #pragma GCC unroll 4 + for (unsigned long i = 1; i <= 15; ++i) + bar(i); + + /* { dg-final { scan-tree-dump "5\[79\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */ + #pragma GCC unroll 709 + for (unsigned long i = 1; i <= 709; ++i) + bar(i); + + /* { dg-final { scan-tree-dump "6\[24\]:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } } */ + #pragma GCC unroll 0 + for (unsigned long i = 1; i <= 3; ++i) + bar(i); +} + + +/* { dg-final { cleanup-tree-dump "cunrolli" } } */ +/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */ Index: testsuite/c-c++-common/unroll-3.c =================================================================== --- testsuite/c-c++-common/unroll-3.c (revision 0) +++ testsuite/c-c++-common/unroll-3.c (working copy) @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -funroll-all-loops -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */ + +void bar(int); + +int j; + +void test1() +{ + unsigned long m = j; + unsigned long i; + + /* { dg-final { scan-tree-dump "16:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } } */ + /* { dg-final { scan-rtl-dump "16:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled" "loop2_unroll" } } */ + #pragma GCC unroll 0 + for (unsigned long i = 1; i <= 3; ++i) + bar(i); + + /* { dg-final { scan-rtl-dump "21:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled" "loop2_unroll" } } */ + #pragma GCC unroll 0 + for (unsigned long i = 1; i <= m; ++i) + bar(i); +} + +/* { dg-final { cleanup-tree-dump "cunrolli" } } */ +/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */ Index: testsuite/c-c++-common/unroll-4.c =================================================================== --- testsuite/c-c++-common/unroll-4.c (revision 0) +++ testsuite/c-c++-common/unroll-4.c (working copy) @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdisable-tree-cunroll" } */ + +void bar(int); + +int j; + +void test1() { + unsigned long m = j; + unsigned long i; + + #pragma GCC unroll 20000000000 /* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */ + for (unsigned long i = 1; i <= 8; ++i) + bar(i); + + #pragma GCC unroll i /* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */ + /* { dg-error "cannot appear in a constant-expression|is not usable in a constant expression" "" { target c++ } 16 } */ + for (unsigned long i = 1; i <= 8; ++i) + bar(i); + + #pragma GCC unroll n /* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */ + /* { dg-error "declared" "" { target *-*-* } 21 } */ + for (unsigned long i = 1; i <= 8; ++i) + bar(i); + + #pragma GCC unroll 1+i /* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */ + /* { dg-error "cannot appear in a constant-expression|is not usable in a constant expression" "" { target c++ } 26 } */ + for (unsigned long i = 1; i <= 8; ++i) + bar(i); + + #pragma GCC unroll 4,4 /* { dg-error "expected end of line before" } */ + for (unsigned long i = 1; i <= 8; ++i) + bar(i); + + #pragma GCC unroll 4.2 /* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */ + for (unsigned long i = 1; i <= 8; ++i) + bar(i); +} Index: tree-cfg.c =================================================================== --- tree-cfg.c (revision 220084) +++ tree-cfg.c (working copy) @@ -316,6 +316,10 @@ replace_loop_annotate_in_block (basic_bl loop->force_vectorize = true; cfun->has_force_vectorize_loops = true; break; + case annot_expr_unroll_kind: + loop->unroll = (unsigned short)tree_to_shwi (gimple_call_arg (stmt, + 2)); + break; default: gcc_unreachable (); } @@ -365,6 +369,7 @@ replace_loop_annotate (void) case annot_expr_ivdep_kind: case annot_expr_no_vector_kind: case annot_expr_vector_kind: + case annot_expr_unroll_kind: break; default: gcc_unreachable (); @@ -3385,6 +3390,8 @@ verify_gimple_call (gcall *stmt) for (i = 0; i < gimple_call_num_args (stmt); ++i) { tree arg = gimple_call_arg (stmt, i); + if (! arg) + continue; if ((is_gimple_reg_type (TREE_TYPE (arg)) && !is_gimple_val (arg)) || (!is_gimple_reg_type (TREE_TYPE (arg)) @@ -7512,6 +7519,8 @@ print_loop (FILE *file, struct loop *loo fprintf (file, ", estimate = "); print_decu (loop->nb_iterations_estimate, file); } + if (loop->unroll) + fprintf (file, ", unroll = %d", loop->unroll); fprintf (file, ")\n"); /* Print loop's body. */ Index: tree-core.h =================================================================== --- tree-core.h (revision 220084) +++ tree-core.h (working copy) @@ -725,6 +725,7 @@ enum annot_expr_kind { annot_expr_ivdep_kind, annot_expr_no_vector_kind, annot_expr_vector_kind, + annot_expr_unroll_kind, annot_expr_kind_last }; Index: tree-pretty-print.c =================================================================== --- tree-pretty-print.c (revision 220084) +++ tree-pretty-print.c (working copy) @@ -2313,6 +2313,10 @@ dump_generic_node (pretty_printer *pp, t case annot_expr_vector_kind: pp_string (pp, ", vector"); break; + case annot_expr_unroll_kind: + pp_printf (pp, ", unroll %d", + (int)TREE_INT_CST_LOW (TREE_OPERAND (node, 2))); + break; default: gcc_unreachable (); } Index: tree-ssa-loop-ivcanon.c =================================================================== --- tree-ssa-loop-ivcanon.c (revision 220084) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -686,8 +686,7 @@ try_unroll_loop_completely (struct loop HOST_WIDE_INT maxiter, location_t locus) { - unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns; - struct loop_size size; + unsigned HOST_WIDE_INT n_unroll = 0; bool n_unroll_found = false; edge edge_to_cancel = NULL; int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS; @@ -731,7 +730,8 @@ try_unroll_loop_completely (struct loop if (!n_unroll_found) return false; - if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) + if (loop->unroll == 0 && + n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d " @@ -753,107 +753,130 @@ try_unroll_loop_completely (struct loop if (ul == UL_SINGLE_ITER) return false; - large = tree_estimate_loop_size - (loop, exit, edge_to_cancel, &size, - PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); - ninsns = size.overall; - if (large) + if (loop->unroll) { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", - loop->num); - return false; + /* If they wanted to unroll more than we want, don't unroll + it completely. */ + if (n_unroll > (unsigned)loop->unroll) + { + dump_printf_loc (report_flags, locus, + "not unrolling loop, " + "user didn't want it unrolled completely.\n"); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Not unrolling loop %d: " + "user didn't want it unrolled completely.\n", + loop->num); + return false; + } } - - unr_insns = estimated_unrolled_size (&size, n_unroll); - if (dump_file && (dump_flags & TDF_DETAILS)) + else { - fprintf (dump_file, " Loop size: %d\n", (int) ninsns); - fprintf (dump_file, " Estimated size after unrolling: %d\n", - (int) unr_insns); - } + struct loop_size size; + large = tree_estimate_loop_size + (loop, exit, edge_to_cancel, &size, + PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); + unsigned HOST_WIDE_INT ninsns = size.overall; + if (large) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", + loop->num); + return false; + } - /* If the code is going to shrink, we don't need to be extra cautious - on guessing if the unrolling is going to be profitable. */ - if (unr_insns - /* If there is IV variable that will become constant, we save - one instruction in the loop prologue we do not account - otherwise. */ - <= ninsns + (size.constant_iv != false)) - ; - /* We unroll only inner loops, because we do not consider it profitable - otheriwse. We still can cancel loopback edge of not rolling loop; - this is always a good idea. */ - else if (ul == UL_NO_GROWTH) - { + unsigned HOST_WIDE_INT unr_insns + = estimated_unrolled_size (&size, n_unroll); if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", - loop->num); - return false; - } - /* Outer loops tend to be less interesting candidates for complete - unrolling unless we can do a lot of propagation into the inner loop - body. For now we disable outer loop unrolling when the code would - grow. */ - else if (loop->inner) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "it is not innermost and code would grow.\n", - loop->num); - return false; - } - /* If there is call on a hot path through the loop, then - there is most probably not much to optimize. */ - else if (size.num_non_pure_calls_on_hot_path) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "contains call and code would grow.\n", - loop->num); - return false; - } - /* If there is pure/const call in the function, then we - can still optimize the unrolled loop body if it contains - some other interesting code than the calls and code - storing or cumulating the return value. */ - else if (size.num_pure_calls_on_hot_path - /* One IV increment, one test, one ivtmp store - and one useful stmt. That is about minimal loop - doing pure call. */ - && (size.non_call_stmts_on_hot_path - <= 3 + size.num_pure_calls_on_hot_path)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "contains just pure calls and code would grow.\n", - loop->num); - return false; - } - /* Complette unrolling is major win when control flow is removed and - one big basic block is created. If the loop contains control flow - the optimization may still be a win because of eliminating the loop - overhead but it also may blow the branch predictor tables. - Limit number of branches on the hot path through the peeled - sequence. */ - else if (size.num_branches_on_hot_path * (int)n_unroll - > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - " number of branches on hot path in the unrolled sequence" - " reach --param max-peel-branches limit.\n", - loop->num); - return false; - } - else if (unr_insns - > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "(--param max-completely-peeled-insns limit reached).\n", - loop->num); - return false; + { + fprintf (dump_file, " Loop size: %d\n", (int) ninsns); + fprintf (dump_file, " Estimated size after unrolling: %d\n", + (int) unr_insns); + } + + /* If the code is going to shrink, we don't need to be extra + cautious on guessing if the unrolling is going to be + profitable. */ + if (unr_insns + /* If there is IV variable that will become constant, we + save one instruction in the loop prologue we do not + account otherwise. */ + <= ninsns + (size.constant_iv != false)) + ; + /* We unroll only inner loops, because we do not consider it + profitable otherwise. We still can cancel loopback edge + of not rolling loop; this is always a good idea. */ + else if (ul == UL_NO_GROWTH) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", + loop->num); + return false; + } + /* Outer loops tend to be less interesting candidates for + complete unrolling unless we can do a lot of propagation + into the inner loop body. For now we disable outer loop + unrolling when the code would grow. */ + else if (loop->inner) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "it is not innermost and code would grow.\n", + loop->num); + return false; + } + /* If there is call on a hot path through the loop, then + there is most probably not much to optimize. */ + else if (size.num_non_pure_calls_on_hot_path) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "contains call and code would grow.\n", + loop->num); + return false; + } + /* If there is pure/const call in the function, then we can + still optimize the unrolled loop body if it contains some + other interesting code than the calls and code storing or + cumulating the return value. */ + else if (size.num_pure_calls_on_hot_path + /* One IV increment, one test, one ivtmp store and + one useful stmt. That is about minimal loop + doing pure call. */ + && (size.non_call_stmts_on_hot_path + <= 3 + size.num_pure_calls_on_hot_path)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "contains just pure calls and code would grow.\n", + loop->num); + return false; + } + /* Complete unrolling is major win when control flow is + removed and one big basic block is created. If the loop + contains control flow the optimization may still be a win + because of eliminating the loop overhead but it also may + blow the branch predictor tables. Limit number of + branches on the hot path through the peeled sequence. */ + else if (size.num_branches_on_hot_path * (int)n_unroll + > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + " number of branches on hot path in the unrolled sequence" + " reach --param max-peel-branches limit.\n", + loop->num); + return false; + } + else if (unr_insns + > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "(--param max-completely-peeled-insns limit reached).\n", + loop->num); + return false; + } } dump_printf_loc (report_flags, locus, "loop turned into non-loop; it never loops.\n"); @@ -897,8 +920,9 @@ try_unroll_loop_completely (struct loop else gimple_cond_make_true (cond); update_stmt (cond); - /* Do not remove the path. Doing so may remove outer loop - and confuse bookkeeping code in tree_unroll_loops_completelly. */ + /* Do not remove the path. Doing so may remove outer loop and + confuse bookkeeping code in + tree_unroll_loops_completelly. */ } /* Store the loop for later unlooping and exit removal. */ @@ -974,23 +998,33 @@ try_peel_loop (struct loop *loop, if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0) return false; + /* We don't peel loops that will be unrolled as this can duplicate a + loop more times than the user requested. */ + if (loop->unroll) + { + if (dump_file) + fprintf (dump_file, "Not peeling: user didn't want it peeled.\n"); + return false; + } + /* Peel only innermost loops. */ if (loop->inner) { if (dump_file) - fprintf (dump_file, "Not peeling: outer loop\n"); + fprintf (dump_file, "Not peeling: outer loop\n"); return false; } if (!optimize_loop_for_speed_p (loop)) { if (dump_file) - fprintf (dump_file, "Not peeling: cold loop\n"); + fprintf (dump_file, "Not peeling: cold loop\n"); return false; } /* Check if there is an estimate on the number of iterations. */ npeel = estimated_loop_iterations_int (loop); + if (npeel < 0) { if (dump_file) @@ -998,10 +1032,11 @@ try_peel_loop (struct loop *loop, "estimated\n"); return false; } + if (maxiter >= 0 && maxiter <= npeel) { if (dump_file) - fprintf (dump_file, "Not peeling: upper bound is known so can " + fprintf (dump_file, "Not peeling: upper bound is known so can " "unroll completely\n"); return false; } @@ -1012,7 +1047,7 @@ try_peel_loop (struct loop *loop, if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) { if (dump_file) - fprintf (dump_file, "Not peeling: rolls too much " + fprintf (dump_file, "Not peeling: rolls too much " "(%i + 1 > --param max-peel-times)\n", npeel); return false; } @@ -1025,7 +1060,7 @@ try_peel_loop (struct loop *loop, > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) { if (dump_file) - fprintf (dump_file, "Not peeling: peeled sequence size is too large " + fprintf (dump_file, "Not peeling: peeled sequence size is too large " "(%i insns > --param max-peel-insns)", peeled_size); return false; } @@ -1302,7 +1337,9 @@ tree_unroll_loops_completely_1 (bool may if (!loop_father) return false; - if (may_increase_size && optimize_loop_nest_for_speed_p (loop) + if (loop->unroll > 1) + ul = UL_ALL; + else if (may_increase_size && optimize_loop_nest_for_speed_p (loop) /* Unroll outermost loops only if asked to do so or they do not cause code growth. */ && (unroll_outer || loop_outer (loop_father))) @@ -1539,7 +1576,9 @@ public: {} /* opt_pass methods: */ - virtual bool gate (function *) { return optimize >= 2; } + virtual bool gate (function *) { + return optimize >= 2 || cfun->has_unroll; + } virtual unsigned int execute (function *); }; // class pass_complete_unrolli Index: tree.def =================================================================== --- tree.def (revision 220084) +++ tree.def (working copy) @@ -1365,8 +1365,9 @@ DEFTREECODE (TARGET_OPTION_NODE, "target /* ANNOTATE_EXPR. Operand 0 is the expression to be annotated. - Operand 1 is the annotation kind. */ -DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 2) + Operand 1 is the annotation kind. + Operand 2 is optional data. */ +DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 3) /* Cilk spawn statement Operand 0 is the CALL_EXPR. */