From patchwork Wed Oct 12 15:54:18 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jakub Jelinek X-Patchwork-Id: 119242 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 23389B6F62 for ; Thu, 13 Oct 2011 02:54:58 +1100 (EST) Received: (qmail 19434 invoked by alias); 12 Oct 2011 15:54:50 -0000 Received: (qmail 19423 invoked by uid 22791); 12 Oct 2011 15:54:45 -0000 X-SWARE-Spam-Status: No, hits=-6.8 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, SPF_HELO_PASS, TW_TM X-Spam-Check-By: sourceware.org Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Wed, 12 Oct 2011 15:54:23 +0000 Received: from int-mx10.intmail.prod.int.phx2.redhat.com (int-mx10.intmail.prod.int.phx2.redhat.com [10.5.11.23]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id p9CFsLvw021787 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Wed, 12 Oct 2011 11:54:21 -0400 Received: from tyan-ft48-01.lab.bos.redhat.com (tyan-ft48-01.lab.bos.redhat.com [10.16.42.4]) by int-mx10.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id p9CFsKOl002301 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Wed, 12 Oct 2011 11:54:21 -0400 Received: from tyan-ft48-01.lab.bos.redhat.com (tyan-ft48-01.lab.bos.redhat.com [127.0.0.1]) by tyan-ft48-01.lab.bos.redhat.com (8.14.4/8.14.4) with ESMTP id p9CFsJR7007992; Wed, 12 Oct 2011 17:54:20 +0200 Received: (from jakub@localhost) by tyan-ft48-01.lab.bos.redhat.com (8.14.4/8.14.4/Submit) id p9CFsIKr007989; Wed, 12 Oct 2011 17:54:18 +0200 Date: Wed, 12 Oct 2011 17:54:18 +0200 From: Jakub Jelinek To: Ira Rosen , Richard Guenther Cc: gcc-patches@gcc.gnu.org Subject: [PATCH] Optimize some loops using bool types (PR tree-optimization/50596) Message-ID: <20111012155418.GB2210@tyan-ft48-01.lab.bos.redhat.com> Reply-To: Jakub Jelinek MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hi! This patch allows vectorization of some loops that use bool (which is especially important now that we use bool more often even for stmts that weren't originally using bool in the sources), in particular (when bool is cast to an integer type, and the bool rhs has def stmts within the loop as either BIT_{AND,IOR,XOR}_EXPR, or just SSA_NAME assigns or bool -> another bool casts, or comparisons (tested recursively). In that case the pattern recognizer transforms the comparisons into COND_EXPRs using suitable integer type (the same width as the comparison operands) and other bools to suitable integer types with casts added where needed. The patch doesn't yet handle vectorization of storing into a bool array, I'll work on that later. Bootstrapped/regtested on x86_64-linux and i686-linux. Ok for trunk? 2011-10-12 Jakub Jelinek PR tree-optimization/50596 * tree-vectorizer.h (NUM_PATTERNS): Increase to 7. * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add vect_recog_bool_pattern. (check_bool_pattern, adjust_bool_pattern_cast, adjust_bool_pattern, vect_recog_bool_pattern): New functions. * gcc.dg/vect/vect-cond-9.c: New test. Jakub --- gcc/tree-vectorizer.h.jj 2011-10-10 09:41:29.000000000 +0200 +++ gcc/tree-vectorizer.h 2011-10-10 10:12:03.000000000 +0200 @@ -902,7 +902,7 @@ extern void vect_slp_transform_bb (basic Additional pattern recognition functions can (and will) be added in the future. */ typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); -#define NUM_PATTERNS 6 +#define NUM_PATTERNS 7 void vect_pattern_recog (loop_vec_info); /* In tree-vectorizer.c. */ --- gcc/tree-vect-patterns.c.jj 2011-10-10 09:41:29.000000000 +0200 +++ gcc/tree-vect-patterns.c 2011-10-10 18:23:41.000000000 +0200 @@ -51,13 +51,15 @@ static gimple vect_recog_over_widening_p tree *); static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **, tree *, tree *); +static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *); static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern, vect_recog_dot_prod_pattern, vect_recog_pow_pattern, vect_recog_over_widening_pattern, - vect_recog_mixed_size_cond_pattern}; + vect_recog_mixed_size_cond_pattern, + vect_recog_bool_pattern}; /* Function widened_name_p @@ -1068,10 +1070,8 @@ vect_operation_fits_smaller_type (gimple constants. Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either be 'type' or some intermediate type. For now, we expect S5 to be a type - demotion operation. We also check that S3 and S4 have only one use. -. + demotion operation. We also check that S3 and S4 have only one use. */ -*/ static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts, tree *type_in, tree *type_out) @@ -1333,6 +1333,356 @@ vect_recog_mixed_size_cond_pattern (VEC } +/* Helper function of vect_recog_bool_pattern. Called recursively, return + true if bool VAR can be optimized that way. */ + +static bool +check_bool_pattern (tree var, loop_vec_info loop_vinfo) +{ + gimple def_stmt; + enum vect_def_type dt; + tree def, rhs1; + enum tree_code rhs_code; + + if (!vect_is_simple_use (var, loop_vinfo, NULL, &def_stmt, &def, &dt)) + return false; + + if (dt != vect_internal_def) + return false; + + if (!is_gimple_assign (def_stmt)) + return false; + + if (!has_single_use (def)) + return false; + + rhs1 = gimple_assign_rhs1 (def_stmt); + rhs_code = gimple_assign_rhs_code (def_stmt); + switch (rhs_code) + { + case SSA_NAME: + return check_bool_pattern (rhs1, loop_vinfo); + + CASE_CONVERT: + if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1 + || !TYPE_UNSIGNED (TREE_TYPE (rhs1))) + && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE) + return false; + return check_bool_pattern (rhs1, loop_vinfo); + + case BIT_NOT_EXPR: + return check_bool_pattern (rhs1, loop_vinfo); + + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + if (!check_bool_pattern (rhs1, loop_vinfo)) + return false; + return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo); + + default: + if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) + { + tree vecitype, comp_vectype; + + comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1)); + if (comp_vectype == NULL_TREE) + return false; + + if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE) + { + enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); + tree itype + = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0); + vecitype = get_vectype_for_scalar_type (itype); + if (vecitype == NULL_TREE) + return false; + } + else + vecitype = comp_vectype; + return expand_vec_cond_expr_p (vecitype, comp_vectype); + } + return false; + } +} + + +/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous + stmt (SSA_NAME_DEF_STMT of VAR), but moving the COND_EXPR from RELATED_STMT + to PATTERN_DEF_STMT and adding a cast as RELATED_STMT. */ + +static tree +adjust_bool_pattern_cast (tree type, tree var) +{ + stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var)); + gimple cast_stmt, pattern_stmt; + + gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo)); + pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); + STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = pattern_stmt; + cast_stmt + = gimple_build_assign_with_ops (NOP_EXPR, + vect_recog_temp_ssa_var (type, NULL), + gimple_assign_lhs (pattern_stmt), + NULL_TREE); + STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt; + return gimple_assign_lhs (cast_stmt); +} + + +/* Helper function of vect_recog_bool_pattern. Do the actual transformations, + recursively. VAR is an SSA_NAME that should be transformed from bool + to a wider integer type, OUT_TYPE is the desired final integer type of + the whole pattern, TRUEVAL should be NULL unless optimizing + BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands + in the then_clause, STMTS is where statements with added pattern stmts + should be pushed to. */ + +static tree +adjust_bool_pattern (tree var, tree out_type, tree trueval, + VEC (gimple, heap) **stmts) +{ + gimple stmt = SSA_NAME_DEF_STMT (var); + enum tree_code rhs_code, def_rhs_code; + tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2; + location_t loc; + gimple pattern_stmt, def_stmt; + + rhs1 = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + rhs_code = gimple_assign_rhs_code (stmt); + loc = gimple_location (stmt); + switch (rhs_code) + { + case SSA_NAME: + CASE_CONVERT: + irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); + itype = TREE_TYPE (irhs1); + pattern_stmt + = gimple_build_assign_with_ops (SSA_NAME, + vect_recog_temp_ssa_var (itype, NULL), + irhs1, NULL_TREE); + break; + + case BIT_NOT_EXPR: + irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); + itype = TREE_TYPE (irhs1); + pattern_stmt + = gimple_build_assign_with_ops (BIT_XOR_EXPR, + vect_recog_temp_ssa_var (itype, NULL), + irhs1, build_int_cst (itype, 1)); + break; + + case BIT_AND_EXPR: + /* Try to optimize x = y & (a < b ? 1 : 0); into + x = (a < b ? y : 0); */ + def_stmt = SSA_NAME_DEF_STMT (rhs2); + def_rhs_code = gimple_assign_rhs_code (def_stmt); + if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison) + { + tree def_rhs1 = gimple_assign_rhs1 (def_stmt); + irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); + if (TYPE_PRECISION (TREE_TYPE (irhs1)) + == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1)))) + { + gimple tstmt; + stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt); + irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts); + tstmt = VEC_pop (gimple, *stmts); + gcc_assert (tstmt == def_stmt); + VEC_quick_push (gimple, *stmts, stmt); + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) + = STMT_VINFO_RELATED_STMT (stmt_def_vinfo); + gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo)); + STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL; + return irhs2; + } + else + irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts); + goto and_ior_xor; + } + def_stmt = SSA_NAME_DEF_STMT (rhs1); + def_rhs_code = gimple_assign_rhs_code (def_stmt); + if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison) + { + tree def_rhs1 = gimple_assign_rhs1 (def_stmt); + irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts); + if (TYPE_PRECISION (TREE_TYPE (irhs2)) + == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1)))) + { + gimple tstmt; + stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt); + irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts); + tstmt = VEC_pop (gimple, *stmts); + gcc_assert (tstmt == def_stmt); + VEC_quick_push (gimple, *stmts, stmt); + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) + = STMT_VINFO_RELATED_STMT (stmt_def_vinfo); + gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo)); + STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL; + return irhs1; + } + else + irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); + goto and_ior_xor; + } + /* FALLTHRU */ + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); + irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts); + and_ior_xor: + if (TYPE_PRECISION (TREE_TYPE (irhs1)) + != TYPE_PRECISION (TREE_TYPE (irhs2))) + { + int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1)); + int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2)); + int out_prec = TYPE_PRECISION (out_type); + if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2)) + irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2); + else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2)) + irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1); + else + { + irhs1 = adjust_bool_pattern_cast (out_type, rhs1); + irhs2 = adjust_bool_pattern_cast (out_type, rhs2); + } + } + itype = TREE_TYPE (irhs1); + pattern_stmt + = gimple_build_assign_with_ops (rhs_code, + vect_recog_temp_ssa_var (itype, NULL), + irhs1, irhs2); + break; + + default: + gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison); + if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE + || TYPE_UNSIGNED (TREE_TYPE (rhs1))) + { + enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); + itype + = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0); + } + else + itype = TREE_TYPE (rhs1); + cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2); + if (trueval == NULL_TREE) + trueval = build_int_cst (itype, 1); + else + gcc_checking_assert (useless_type_conversion_p (itype, + TREE_TYPE (trueval))); + pattern_stmt + = gimple_build_assign_with_ops3 (COND_EXPR, + vect_recog_temp_ssa_var (itype, NULL), + cond_expr, trueval, + build_int_cst (itype, 0)); + break; + } + + VEC_safe_push (gimple, heap, *stmts, stmt); + gimple_set_location (pattern_stmt, loc); + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt; + return gimple_assign_lhs (pattern_stmt); +} + + +/* Function vect_recog_bool_pattern + + Try to find pattern like following: + + bool a_b, b_b, c_b, d_b, e_b; + TYPE f_T; + loop: + S1 a_b = x1 CMP1 y1; + S2 b_b = x2 CMP2 y2; + S3 c_b = a_b & b_b; + S4 d_b = x3 CMP3 y3; + S5 e_b = c_b | d_b; + S6 f_T = (TYPE) e_b; + + where type 'TYPE' is an integral type. + + Input: + + * LAST_STMT: A stmt at the end from which the pattern + search begins, i.e. cast of a bool to + an integer type. + + Output: + + * TYPE_IN: The type of the input arguments to the pattern. + + * TYPE_OUT: The type of the output of this pattern. + + * Return value: A new stmt that will be used to replace the pattern. + + Assuming size of TYPE is the same as size of all comparisons + (otherwise some casts would be added where needed), the above + sequence we create related pattern stmts: + S1' a_T = x1 CMP1 y1 ? 1 : 0; + S3' c_T = x2 CMP2 y2 ? a_T : 0; + S4' d_T = x3 CMP3 y3 ? 1 : 0; + S5' e_T = c_T | d_T; + S6' f_T = e_T; + + Instead of the above S3' we could emit: + S2' b_T = x2 CMP2 y2 ? 1 : 0; + S3' c_T = a_T | b_T; + but the above is more efficient. */ + +static gimple +vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in, + tree *type_out) +{ + gimple last_stmt = VEC_pop (gimple, *stmts); + enum tree_code rhs_code; + tree var, lhs, rhs, vectype; + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + gimple pattern_stmt; + + if (!is_gimple_assign (last_stmt)) + return NULL; + + var = gimple_assign_rhs1 (last_stmt); + lhs = gimple_assign_lhs (last_stmt); + + if ((TYPE_PRECISION (TREE_TYPE (var)) != 1 + || !TYPE_UNSIGNED (TREE_TYPE (var))) + && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE) + return NULL; + + rhs_code = gimple_assign_rhs_code (last_stmt); + if (CONVERT_EXPR_CODE_P (rhs_code)) + { + if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE) + return NULL; + vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs)); + if (vectype == NULL_TREE) + return NULL; + + if (!check_bool_pattern (var, loop_vinfo)) + return NULL; + + rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts); + lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); + if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) + pattern_stmt + = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE); + else + pattern_stmt + = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE); + *type_out = vectype; + *type_in = vectype; + VEC_safe_push (gimple, heap, *stmts, last_stmt); + return pattern_stmt; + } + else + return NULL; +} + + /* Mark statements that are involved in a pattern. */ static inline void --- gcc/testsuite/gcc.dg/vect/vect-cond-9.c.jj 2011-10-12 14:39:13.000000000 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-cond-9.c 2011-10-12 14:56:17.000000000 +0200 @@ -0,0 +1,200 @@ +/* { dg-require-effective-target vect_cond_mixed } */ + +#include "tree-vect.h" + +#define N 1024 +float a[N], b[N], c[N], d[N]; +int j[N]; +unsigned char k[N]; + +__attribute__((noinline, noclone)) void +f1 (void) +{ + int i; + for (i = 0; i < N; ++i) + { + unsigned int x = a[i] < b[i] ? -1 : 0; + unsigned int y = c[i] < d[i] ? -1 : 0; + j[i] = (x & y) >> 31; + } +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + int i; + for (i = 0; i < N; ++i) + { + int x = a[i] < b[i]; + int y = c[i] < d[i]; + j[i] = x & y; + } +} + +__attribute__((noinline, noclone)) void +f3 (void) +{ + int i; + for (i = 0; i < N; ++i) + j[i] = (a[i] < b[i]) & (c[i] < d[i]); +} + +__attribute__((noinline, noclone)) void +f4 (void) +{ + int i; + for (i = 0; i < N; ++i) + { + int x = a[i] < b[i]; + int y = c[i] < d[i]; + k[i] = x & y; + } +} + +__attribute__((noinline, noclone)) void +f5 (void) +{ + int i; + for (i = 0; i < N; ++i) + k[i] = (a[i] < b[i]) & (c[i] < d[i]); +} + +__attribute__((noinline, noclone)) void +f6 (void) +{ + int i; + for (i = 0; i < N; ++i) + { + unsigned int x = a[i] < b[i] ? -1 : 0; + unsigned int y = c[i] < d[i] ? -1 : 0; + j[i] = (x | y) >> 31; + } +} + +__attribute__((noinline, noclone)) void +f7 (void) +{ + int i; + for (i = 0; i < N; ++i) + { + int x = a[i] < b[i]; + int y = c[i] < d[i]; + j[i] = x | y; + } +} + +__attribute__((noinline, noclone)) void +f8 (void) +{ + int i; + for (i = 0; i < N; ++i) + j[i] = (a[i] < b[i]) | (c[i] < d[i]); +} + +__attribute__((noinline, noclone)) void +f9 (void) +{ + int i; + for (i = 0; i < N; ++i) + { + int x = a[i] < b[i]; + int y = c[i] < d[i]; + k[i] = x | y; + } +} + +__attribute__((noinline, noclone)) void +f10 (void) +{ + int i; + for (i = 0; i < N; ++i) + k[i] = (a[i] < b[i]) | (c[i] < d[i]); +} + +int +main () +{ + int i; + + check_vect (); + + for (i = 0; i < N; i++) + { + switch (i % 9) + { + case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break; + case 1: a[i] = 0; b[i] = 0; break; + case 2: a[i] = i + 1; b[i] = - i - 1; break; + case 3: a[i] = i; b[i] = i + 7; break; + case 4: a[i] = i; b[i] = i; break; + case 5: a[i] = i + 16; b[i] = i + 3; break; + case 6: a[i] = - i - 5; b[i] = - i; break; + case 7: a[i] = - i; b[i] = - i; break; + case 8: a[i] = - i; b[i] = - i - 7; break; + } + } + for (i = 0; i < N; i++) + { + switch ((i / 9) % 3) + { + case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break; + case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break; + case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break; + } + } + f1 (); + for (i = 0; i < N; i++) + if (j[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (j, -6, sizeof (j)); + f2 (); + for (i = 0; i < N; i++) + if (j[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (j, -6, sizeof (j)); + f3 (); + for (i = 0; i < N; i++) + if (j[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (j, -6, sizeof (j)); + f4 (); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (k, -6, sizeof (k)); + f5 (); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (k, -6, sizeof (k)); + f6 (); + for (i = 0; i < N; i++) + if (j[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (j, -6, sizeof (j)); + f7 (); + for (i = 0; i < N; i++) + if (j[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (j, -6, sizeof (j)); + f8 (); + for (i = 0; i < N; i++) + if (j[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (j, -6, sizeof (j)); + f9 (); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (k, -6, sizeof (k)); + f10 (); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (k, -6, sizeof (k)); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 10 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */