diff mbox series

[committed] Add OpenMP 5 exclusive scan support for simd constructs

Message ID 20190621065729.GW815@tucnak
State New
Headers show
Series [committed] Add OpenMP 5 exclusive scan support for simd constructs | expand

Commit Message

Jakub Jelinek June 21, 2019, 6:57 a.m. UTC
Hi!

The following patch adds exclusive scan support for simd, it is similar to
the inclusive scan, just we need to swap the input and scan phases and
use slightly different pattern at the start of the scan phase, so that it
computes what we need.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-06-21  Jakub Jelinek  <jakub@redhat.com>

	* omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument,
	create another "omp scan inscan exclusive" array if
	!ctx->scan_inclusive.
	(lower_rec_input_clauses): Handle exclusive scan inscan reductions.
	(lower_omp_scan): Likewise.
	* tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of
	2-bit bitfield for simd_lane_access_p member.
	* tree-vect-data-refs.c (vect_analyze_data_refs): Also handle
	aux == (void *)-4 as simd lane access.
	* tree-vect-stmts.c (check_scan_store): Handle exclusive scan.  Update
	comment with permutations to show the canonical permutation order.
	(vectorizable_scan_store): Handle exclusive scan.
	(vectorizable_store): Call vectorizable_scan_store even for
	STMT_VINFO_SIMD_LANE_ACCESS_P > 3.

	* gcc.dg/vect/vect-simd-12.c: New test.
	* gcc.dg/vect/vect-simd-13.c: New test.
	* gcc.dg/vect/vect-simd-14.c: New test.
	* gcc.dg/vect/vect-simd-15.c: New test.
	* gcc.target/i386/sse2-vect-simd-12.c: New test.
	* gcc.target/i386/sse2-vect-simd-13.c: New test.
	* gcc.target/i386/sse2-vect-simd-14.c: New test.
	* gcc.target/i386/sse2-vect-simd-15.c: New test.
	* gcc.target/i386/avx2-vect-simd-12.c: New test.
	* gcc.target/i386/avx2-vect-simd-13.c: New test.
	* gcc.target/i386/avx2-vect-simd-14.c: New test.
	* gcc.target/i386/avx2-vect-simd-15.c: New test.
	* gcc.target/i386/avx512f-vect-simd-12.c: New test.
	* gcc.target/i386/avx512f-vect-simd-13.c: New test.
	* gcc.target/i386/avx512f-vect-simd-14.c: New test.
	* gcc.target/i386/avx512bw-vect-simd-15.c: New test.
	* g++.dg/vect/simd-6.cc: New test.
	* g++.dg/vect/simd-7.cc: New test.
	* g++.dg/vect/simd-8.cc: New test.
	* g++.dg/vect/simd-9.cc: New test.
	* c-c++-common/gomp/scan-2.c: Don't expect any diagnostics.


	Jakub

Comments

Christophe Lyon June 24, 2019, 11:43 a.m. UTC | #1
On Fri, 21 Jun 2019 at 08:57, Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> The following patch adds exclusive scan support for simd, it is similar to
> the inclusive scan, just we need to swap the input and scan phases and
> use slightly different pattern at the start of the scan phase, so that it
> computes what we need.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.
>
> 2019-06-21  Jakub Jelinek  <jakub@redhat.com>
>
>         * omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument,
>         create another "omp scan inscan exclusive" array if
>         !ctx->scan_inclusive.
>         (lower_rec_input_clauses): Handle exclusive scan inscan reductions.
>         (lower_omp_scan): Likewise.
>         * tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of
>         2-bit bitfield for simd_lane_access_p member.
>         * tree-vect-data-refs.c (vect_analyze_data_refs): Also handle
>         aux == (void *)-4 as simd lane access.
>         * tree-vect-stmts.c (check_scan_store): Handle exclusive scan.  Update
>         comment with permutations to show the canonical permutation order.
>         (vectorizable_scan_store): Handle exclusive scan.
>         (vectorizable_store): Call vectorizable_scan_store even for
>         STMT_VINFO_SIMD_LANE_ACCESS_P > 3.
>
>         * gcc.dg/vect/vect-simd-12.c: New test.
>         * gcc.dg/vect/vect-simd-13.c: New test.
>         * gcc.dg/vect/vect-simd-14.c: New test.
>         * gcc.dg/vect/vect-simd-15.c: New test.
>         * gcc.target/i386/sse2-vect-simd-12.c: New test.
>         * gcc.target/i386/sse2-vect-simd-13.c: New test.
>         * gcc.target/i386/sse2-vect-simd-14.c: New test.
>         * gcc.target/i386/sse2-vect-simd-15.c: New test.
>         * gcc.target/i386/avx2-vect-simd-12.c: New test.
>         * gcc.target/i386/avx2-vect-simd-13.c: New test.
>         * gcc.target/i386/avx2-vect-simd-14.c: New test.
>         * gcc.target/i386/avx2-vect-simd-15.c: New test.
>         * gcc.target/i386/avx512f-vect-simd-12.c: New test.
>         * gcc.target/i386/avx512f-vect-simd-13.c: New test.
>         * gcc.target/i386/avx512f-vect-simd-14.c: New test.
>         * gcc.target/i386/avx512bw-vect-simd-15.c: New test.
>         * g++.dg/vect/simd-6.cc: New test.
>         * g++.dg/vect/simd-7.cc: New test.
>         * g++.dg/vect/simd-8.cc: New test.
>         * g++.dg/vect/simd-9.cc: New test.
>         * c-c++-common/gomp/scan-2.c: Don't expect any diagnostics.
>
> --- gcc/omp-low.c.jj    2019-06-20 13:26:29.085150770 +0200
> +++ gcc/omp-low.c       2019-06-20 15:46:25.964253058 +0200
> @@ -3692,7 +3692,8 @@ struct omplow_simd_context {
>  static bool
>  lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
>                               omplow_simd_context *sctx, tree &ivar,
> -                             tree &lvar, tree *rvar = NULL)
> +                             tree &lvar, tree *rvar = NULL,
> +                             tree *rvar2 = NULL)
>  {
>    if (known_eq (sctx->max_vf, 0U))
>      {
> @@ -3767,6 +3768,25 @@ lower_rec_simd_input_clauses (tree new_v
>           *rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar,
>                           sctx->lastlane, NULL_TREE, NULL_TREE);
>           TREE_THIS_NOTRAP (*rvar) = 1;
> +
> +         if (!ctx->scan_inclusive)
> +           {
> +             /* And for exclusive scan yet another one, which will
> +                hold the value during the scan phase.  */
> +             tree savar = create_tmp_var_raw (atype);
> +             if (TREE_ADDRESSABLE (new_var))
> +               TREE_ADDRESSABLE (savar) = 1;
> +             DECL_ATTRIBUTES (savar)
> +               = tree_cons (get_identifier ("omp simd array"), NULL,
> +                            tree_cons (get_identifier ("omp simd inscan "
> +                                                       "exclusive"), NULL,
> +                                       DECL_ATTRIBUTES (savar)));
> +             gimple_add_tmp_var (savar);
> +             ctx->cb.decl_map->put (iavar, savar);
> +             *rvar2 = build4 (ARRAY_REF, TREE_TYPE (new_var), savar,
> +                              sctx->idx, NULL_TREE, NULL_TREE);
> +             TREE_THIS_NOTRAP (*rvar2) = 1;
> +           }
>         }
>        ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx,
>                      NULL_TREE, NULL_TREE);
> @@ -5185,14 +5205,15 @@ lower_rec_input_clauses (tree clauses, g
>                       new_vard = TREE_OPERAND (new_var, 0);
>                       gcc_assert (DECL_P (new_vard));
>                     }
> -                 tree rvar = NULL_TREE, *rvarp = NULL;
> +                 tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
>                   if (is_simd
>                       && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
>                       && OMP_CLAUSE_REDUCTION_INSCAN (c))
>                     rvarp = &rvar;
>                   if (is_simd
>                       && lower_rec_simd_input_clauses (new_var, ctx, &sctx,
> -                                                      ivar, lvar, rvarp))
> +                                                      ivar, lvar, rvarp,
> +                                                      &rvar2))
>                     {
>                       if (new_vard == new_var)
>                         {
> @@ -5220,6 +5241,14 @@ lower_rec_input_clauses (tree clauses, g
>                                     (c, ivar2, build_outer_var_ref (var, ctx));
>                               gimplify_and_add (x, &llist[0]);
>
> +                             if (rvar2)
> +                               {
> +                                 x = lang_hooks.decls.omp_clause_default_ctor
> +                                       (c, unshare_expr (rvar2),
> +                                        build_outer_var_ref (var, ctx));
> +                                 gimplify_and_add (x, &llist[0]);
> +                               }
> +
>                               /* For types that need construction, add another
>                                  private var which will be default constructed
>                                  and optionally initialized with
> @@ -5229,7 +5258,9 @@ lower_rec_input_clauses (tree clauses, g
>                                  iteration.  */
>                               tree nv = create_tmp_var_raw (TREE_TYPE (ivar));
>                               gimple_add_tmp_var (nv);
> -                             ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0),
> +                             ctx->cb.decl_map->put (TREE_OPERAND (rvar2
> +                                                                  ? rvar2
> +                                                                  : ivar, 0),
>                                                      nv);
>                               x = lang_hooks.decls.omp_clause_default_ctor
>                                     (c, nv, build_outer_var_ref (var, ctx));
> @@ -5296,6 +5327,18 @@ lower_rec_input_clauses (tree clauses, g
>                               gimplify_stmt (&dtor, &tseq);
>                               gimple_seq_add_seq (&llist[1], tseq);
>                             }
> +
> +                         if (rvar2)
> +                           {
> +                             x = lang_hooks.decls.omp_clause_dtor (c, rvar2);
> +                             if (x)
> +                               {
> +                                 tseq = NULL;
> +                                 dtor = x;
> +                                 gimplify_stmt (&dtor, &tseq);
> +                                 gimple_seq_add_seq (&llist[1], tseq);
> +                               }
> +                           }
>                           break;
>                         }
>                       if (x)
> @@ -5390,6 +5433,24 @@ lower_rec_input_clauses (tree clauses, g
>                               gimple_seq_add_seq (ilist, tseq);
>                             }
>                           OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
> +                         if (!ctx->scan_inclusive)
> +                           {
> +                             tree nv2
> +                               = create_tmp_var_raw (TREE_TYPE (new_var));
> +                             gimple_add_tmp_var (nv2);
> +                             ctx->cb.decl_map->put (nv, nv2);
> +                             x = lang_hooks.decls.omp_clause_default_ctor
> +                                   (c, nv2, build_outer_var_ref (var, ctx));
> +                             gimplify_and_add (x, ilist);
> +                             x = lang_hooks.decls.omp_clause_dtor (c, nv2);
> +                             if (x)
> +                               {
> +                                 tseq = NULL;
> +                                 dtor = x;
> +                                 gimplify_stmt (&dtor, &tseq);
> +                                 gimple_seq_add_seq (dlist, tseq);
> +                               }
> +                           }
>                           x = lang_hooks.decls.omp_clause_dtor (c, nv);
>                           if (x)
>                             {
> @@ -5399,6 +5460,21 @@ lower_rec_input_clauses (tree clauses, g
>                               gimple_seq_add_seq (dlist, tseq);
>                             }
>                         }
> +                     else if (!ctx->scan_inclusive
> +                              && TREE_ADDRESSABLE (TREE_TYPE (new_var)))
> +                       {
> +                         tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var));
> +                         gimple_add_tmp_var (nv2);
> +                         ctx->cb.decl_map->put (new_vard, nv2);
> +                         x = lang_hooks.decls.omp_clause_dtor (c, nv2);
> +                         if (x)
> +                           {
> +                             tseq = NULL;
> +                             dtor = x;
> +                             gimplify_stmt (&dtor, &tseq);
> +                             gimple_seq_add_seq (dlist, tseq);
> +                           }
> +                       }
>                       DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
>                       goto do_dtor;
>                     }
> @@ -5487,14 +5563,15 @@ lower_rec_input_clauses (tree clauses, g
>                       new_vard = TREE_OPERAND (new_var, 0);
>                       gcc_assert (DECL_P (new_vard));
>                     }
> -                 tree rvar = NULL_TREE, *rvarp = NULL;
> +                 tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
>                   if (is_simd
>                       && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
>                       && OMP_CLAUSE_REDUCTION_INSCAN (c))
>                     rvarp = &rvar;
>                   if (is_simd
>                       && lower_rec_simd_input_clauses (new_var, ctx, &sctx,
> -                                                      ivar, lvar, rvarp))
> +                                                      ivar, lvar, rvarp,
> +                                                      &rvar2))
>                     {
>                       if (new_vard != new_var)
>                         {
> @@ -8573,18 +8650,40 @@ lower_omp_scan (gimple_stmt_iterator *gs
>    gimple_seq before = NULL;
>    omp_context *octx = ctx->outer;
>    gcc_assert (octx);
> +  if (!octx->scan_inclusive && !has_clauses)
> +    {
> +      gimple_stmt_iterator gsi2 = *gsi_p;
> +      gsi_next (&gsi2);
> +      gimple *stmt2 = gsi_stmt (gsi2);
> +      /* For exclusive scan, swap GIMPLE_OMP_SCAN without clauses
> +        with following GIMPLE_OMP_SCAN with clauses, so that input_phase,
> +        the one with exclusive clause(s), comes first.  */
> +      if (stmt2
> +         && gimple_code (stmt2) == GIMPLE_OMP_SCAN
> +         && gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt2)) != NULL)
> +       {
> +         gsi_remove (gsi_p, false);
> +         gsi_insert_after (gsi_p, stmt, GSI_SAME_STMT);
> +         ctx = maybe_lookup_ctx (stmt2);
> +         gcc_assert (ctx);
> +         lower_omp_scan (gsi_p, ctx);
> +         return;
> +       }
> +    }
> +
>    bool input_phase = has_clauses ^ octx->scan_inclusive;
>    if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
>        && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
> -      && !gimple_omp_for_combined_into_p (octx->stmt)
> -      && octx->scan_inclusive)
> +      && !gimple_omp_for_combined_into_p (octx->stmt))
>      {
>        if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
>                                     OMP_CLAUSE__SIMDUID_))
>         {
>           tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
>           lane = create_tmp_var (unsigned_type_node);
> -         tree t = build_int_cst (integer_type_node, 1 + !input_phase);
> +         tree t = build_int_cst (integer_type_node,
> +                                 input_phase ? 1
> +                                 : octx->scan_inclusive ? 2 : 3);
>           gimple *g
>             = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
>           gimple_call_set_lhs (g, lane);
> @@ -8601,6 +8700,8 @@ lower_omp_scan (gimple_stmt_iterator *gs
>             tree val = new_var;
>             tree var2 = NULL_TREE;
>             tree var3 = NULL_TREE;
> +           tree var4 = NULL_TREE;
> +           tree lane0 = NULL_TREE;
>             tree new_vard = new_var;
>             if (omp_is_reference (var))
>               {
> @@ -8623,16 +8724,26 @@ lower_omp_scan (gimple_stmt_iterator *gs
>                                           DECL_ATTRIBUTES (v)))
>                       {
>                         val = unshare_expr (val);
> +                       lane0 = TREE_OPERAND (val, 1);
>                         TREE_OPERAND (val, 1) = lane;
>                         var2 = lookup_decl (v, octx);
> +                       if (!octx->scan_inclusive)
> +                         var4 = lookup_decl (var2, octx);
>                         if (input_phase
>                             && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
> -                         var3 = maybe_lookup_decl (var2, octx);
> +                         var3 = maybe_lookup_decl (var4 ? var4 : var2, octx);
>                         if (!input_phase)
>                           {
>                             var2 = build4 (ARRAY_REF, TREE_TYPE (val),
>                                            var2, lane, NULL_TREE, NULL_TREE);
>                             TREE_THIS_NOTRAP (var2) = 1;
> +                           if (!octx->scan_inclusive)
> +                             {
> +                               var4 = build4 (ARRAY_REF, TREE_TYPE (val),
> +                                              var4, lane, NULL_TREE,
> +                                              NULL_TREE);
> +                               TREE_THIS_NOTRAP (var4) = 1;
> +                             }
>                           }
>                         else
>                           var2 = val;
> @@ -8643,12 +8754,28 @@ lower_omp_scan (gimple_stmt_iterator *gs
>             else
>               {
>                 var2 = build_outer_var_ref (var, octx);
> -               if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
> +               if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
>                   {
>                     var3 = maybe_lookup_decl (new_vard, octx);
> -                   if (var3 == new_vard)
> +                   if (var3 == new_vard || var3 == NULL_TREE)
>                       var3 = NULL_TREE;
> +                   else if (!octx->scan_inclusive && !input_phase)
> +                     {
> +                       var4 = maybe_lookup_decl (var3, octx);
> +                       if (var4 == var3 || var4 == NULL_TREE)
> +                         {
> +                           if (TREE_ADDRESSABLE (TREE_TYPE (new_var)))
> +                             {
> +                               var4 = var3;
> +                               var3 = NULL_TREE;
> +                             }
> +                           else
> +                             var4 = NULL_TREE;
> +                         }
> +                     }
>                   }
> +               if (!octx->scan_inclusive && !input_phase && var4 == NULL_TREE)
> +                 var4 = create_tmp_var (TREE_TYPE (val));
>               }
>             if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
>               {
> @@ -8689,9 +8816,17 @@ lower_omp_scan (gimple_stmt_iterator *gs
>                   }
>                 else
>                   {
> +                   tree x;
> +                   if (!octx->scan_inclusive)
> +                     {
> +                       tree v4 = unshare_expr (var4);
> +                       tree v2 = unshare_expr (var2);
> +                       x = lang_hooks.decls.omp_clause_assign_op (c, v4, v2);
> +                       gimplify_and_add (x, &before);
> +                     }
>                     gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
> -                   tree x = (DECL_HAS_VALUE_EXPR_P (new_vard)
> -                             ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
> +                   x = (DECL_HAS_VALUE_EXPR_P (new_vard)
> +                        ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
>                     tree vexpr = val;
>                     if (x && omp_is_reference (var))
>                       vexpr = build_fold_addr_expr_loc (clause_loc, val);
> @@ -8706,8 +8841,18 @@ lower_omp_scan (gimple_stmt_iterator *gs
>                       SET_DECL_VALUE_EXPR (new_vard, x);
>                     SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
>                     DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
> -                   x = lang_hooks.decls.omp_clause_assign_op (c, val, var2);
> -                   gimplify_and_add (x, &before);
> +                   if (octx->scan_inclusive)
> +                     {
> +                       x = lang_hooks.decls.omp_clause_assign_op (c, val,
> +                                                                  var2);
> +                       gimplify_and_add (x, &before);
> +                     }
> +                   else if (lane0 == NULL_TREE)
> +                     {
> +                       x = lang_hooks.decls.omp_clause_assign_op (c, val,
> +                                                                  var4);
> +                       gimplify_and_add (x, &before);
> +                     }
>                   }
>               }
>             else
> @@ -8728,10 +8873,29 @@ lower_omp_scan (gimple_stmt_iterator *gs
>
>                     tree x = build2 (code, TREE_TYPE (var2),
>                                      unshare_expr (var2), unshare_expr (val));
> -                   gimplify_assign (unshare_expr (var2), x, &before);
> -                   gimplify_assign (val, var2, &before);
> +                   if (octx->scan_inclusive)
> +                     {
> +                       gimplify_assign (unshare_expr (var2), x, &before);
> +                       gimplify_assign (val, var2, &before);
> +                     }
> +                   else
> +                     {
> +                       gimplify_assign (unshare_expr (var4),
> +                                        unshare_expr (var2), &before);
> +                       gimplify_assign (var2, x, &before);
> +                       if (lane0 == NULL_TREE)
> +                         gimplify_assign (val, var4, &before);
> +                     }
>                   }
>               }
> +           if (!octx->scan_inclusive && !input_phase && lane0)
> +             {
> +               tree vexpr = unshare_expr (var4);
> +               TREE_OPERAND (vexpr, 1) = lane0;
> +               if (omp_is_reference (var))
> +                 vexpr = build_fold_addr_expr_loc (clause_loc, vexpr);
> +               SET_DECL_VALUE_EXPR (new_vard, vexpr);
> +             }
>           }
>      }
>    else if (has_clauses)
> --- gcc/tree-vectorizer.h.jj    2019-06-20 13:26:29.078150879 +0200
> +++ gcc/tree-vectorizer.h       2019-06-20 14:18:04.241075200 +0200
> @@ -917,7 +917,7 @@ struct _stmt_vec_info {
>    bool strided_p;
>
>    /* For both loads and stores.  */
> -  unsigned simd_lane_access_p : 2;
> +  unsigned simd_lane_access_p : 3;
>
>    /* Classifies how the load or store is going to be implemented
>       for loop vectorization.  */
> --- gcc/tree-vect-data-refs.c.jj        2019-06-20 13:55:35.421150589 +0200
> +++ gcc/tree-vect-data-refs.c   2019-06-20 14:18:04.240075216 +0200
> @@ -4223,7 +4223,8 @@ vect_analyze_data_refs (vec_info *vinfo,
>        /* See if this was detected as SIMD lane access.  */
>        if (dr->aux == (void *)-1
>           || dr->aux == (void *)-2
> -         || dr->aux == (void *)-3)
> +         || dr->aux == (void *)-3
> +         || dr->aux == (void *)-4)
>         {
>           if (nested_in_vect_loop_p (loop, stmt_info))
>             return opt_result::failure_at (stmt_info->stmt,
> --- gcc/tree-vect-stmts.c.jj    2019-06-20 13:26:29.084150785 +0200
> +++ gcc/tree-vect-stmts.c       2019-06-20 14:18:04.239075231 +0200
> @@ -6512,7 +6512,37 @@ check_scan_store (stmt_vec_info stmt_inf
>       kinds are there in order to allow optimizing the initializer store
>       and combiner sequence, e.g. if it is originally some C++ish user
>       defined reduction, but allow the vectorizer to pattern recognize it
> -     and turn into the appropriate vectorized scan.  */
> +     and turn into the appropriate vectorized scan.
> +
> +     For exclusive scan, this is slightly different:
> +     #pragma omp simd reduction(inscan,+:r)
> +     for (...)
> +       {
> +        use (r);
> +        #pragma omp scan exclusive (r)
> +        r += something ();
> +       }
> +     shall have body with:
> +       // Initialization for input phase, store the reduction initializer:
> +       _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
> +       _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
> +       D.2042[_21] = 0;
> +       // Actual input phase:
> +       ...
> +       r.0_5 = D.2042[_20];
> +       _6 = _4 + r.0_5;
> +       D.2042[_20] = _6;
> +       // Initialization for scan phase:
> +       _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
> +       _26 = D.2043[_25];
> +       D.2044[_25] = _26;
> +       _27 = D.2042[_25];
> +       _28 = _26 + _27;
> +       D.2043[_25] = _28;
> +       // Actual scan phase:
> +       ...
> +       r.1_8 = D.2044[_20];
> +       ...  */
>
>    if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
>      {
> @@ -6553,26 +6583,52 @@ check_scan_store (stmt_vec_info stmt_inf
>    if (TREE_CODE (rhs) != SSA_NAME)
>      goto fail;
>
> -  use_operand_p use_p;
> -  imm_use_iterator iter;
>    gimple *other_store_stmt = NULL;
> -  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
> +  tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
> +  bool inscan_var_store
> +    = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
> +
> +  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
>      {
> -      gimple *use_stmt = USE_STMT (use_p);
> -      if (use_stmt == stmt || is_gimple_debug (use_stmt))
> -       continue;
> -      if (gimple_bb (use_stmt) != gimple_bb (stmt)
> -         || !gimple_store_p (use_stmt)
> -         || other_store_stmt)
> -       goto fail;
> -      other_store_stmt = use_stmt;
> +      if (!inscan_var_store)
> +       {
> +         use_operand_p use_p;
> +         imm_use_iterator iter;
> +         FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
> +           {
> +             gimple *use_stmt = USE_STMT (use_p);
> +             if (use_stmt == stmt || is_gimple_debug (use_stmt))
> +               continue;
> +             if (gimple_bb (use_stmt) != gimple_bb (stmt)
> +                 || !is_gimple_assign (use_stmt)
> +                 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
> +                 || other_store_stmt
> +                 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
> +               goto fail;
> +             other_store_stmt = use_stmt;
> +           }
> +         if (other_store_stmt == NULL)
> +           goto fail;
> +         rhs = gimple_assign_lhs (other_store_stmt);
> +         if (!single_imm_use (rhs, &use_p, &other_store_stmt))
> +           goto fail;
> +       }
>      }
> -  if (other_store_stmt == NULL)
> -    goto fail;
> -  stmt_vec_info other_store_stmt_info
> -    = loop_vinfo->lookup_stmt (other_store_stmt);
> -  if (other_store_stmt_info == NULL
> -      || STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3)
> +  else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
> +    {
> +      use_operand_p use_p;
> +      imm_use_iterator iter;
> +      FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
> +       {
> +         gimple *use_stmt = USE_STMT (use_p);
> +         if (use_stmt == stmt || is_gimple_debug (use_stmt))
> +           continue;
> +         if (other_store_stmt)
> +           goto fail;
> +         other_store_stmt = use_stmt;
> +       }
> +    }
> +  else
>      goto fail;
>
>    gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
> @@ -6599,8 +6655,7 @@ check_scan_store (stmt_vec_info stmt_inf
>
>    tree rhs1 = gimple_assign_rhs1 (def_stmt);
>    tree rhs2 = gimple_assign_rhs2 (def_stmt);
> -  if (TREE_CODE (rhs1) != SSA_NAME
> -      || TREE_CODE (rhs2) != SSA_NAME)
> +  if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
>      goto fail;
>
>    gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
> @@ -6615,22 +6670,83 @@ check_scan_store (stmt_vec_info stmt_inf
>    stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
>    if (load1_stmt_info == NULL
>        || load2_stmt_info == NULL
> -      || STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3
> -      || STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3)
> +      || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
> +         != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
> +      || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
> +         != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
>      goto fail;
>
> -  if (scan_operand_equal_p (gimple_assign_lhs (stmt),
> +  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
> +    {
> +      dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
> +      if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
> +         || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
> +       goto fail;
> +      tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
> +      tree lrhs;
> +      if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
> +       lrhs = rhs1;
> +      else
> +       lrhs = rhs2;
> +      use_operand_p use_p;
> +      imm_use_iterator iter;
> +      FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
> +       {
> +         gimple *use_stmt = USE_STMT (use_p);
> +         if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
> +           continue;
> +         if (other_store_stmt)
> +           goto fail;
> +         other_store_stmt = use_stmt;
> +       }
> +    }
> +
> +  if (other_store_stmt == NULL)
> +    goto fail;
> +  if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
> +      || !gimple_store_p (other_store_stmt))
> +    goto fail;
> +
> +  stmt_vec_info other_store_stmt_info
> +    = loop_vinfo->lookup_stmt (other_store_stmt);
> +  if (other_store_stmt_info == NULL
> +      || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
> +         != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
> +    goto fail;
> +
> +  gimple *stmt1 = stmt;
> +  gimple *stmt2 = other_store_stmt;
> +  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
> +    std::swap (stmt1, stmt2);
> +  if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
>                             gimple_assign_rhs1 (load2_stmt)))
>      {
>        std::swap (rhs1, rhs2);
>        std::swap (load1_stmt, load2_stmt);
>        std::swap (load1_stmt_info, load2_stmt_info);
>      }
> -  if (!scan_operand_equal_p (gimple_assign_lhs (stmt),
> -                            gimple_assign_rhs1 (load1_stmt))
> -      || !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt),
> +  if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
> +                            gimple_assign_rhs1 (load1_stmt)))
> +    goto fail;
> +
> +  tree var3 = NULL_TREE;
> +  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
> +      && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
>                                 gimple_assign_rhs1 (load2_stmt)))
>      goto fail;
> +  else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
> +    {
> +      dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
> +      if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
> +         || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
> +       goto fail;
> +      var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
> +      if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
> +         || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
> +         || lookup_attribute ("omp simd inscan exclusive",
> +                              DECL_ATTRIBUTES (var3)))
> +       goto fail;
> +    }
>
>    dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
>    if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
> @@ -6648,6 +6764,14 @@ check_scan_store (stmt_vec_info stmt_inf
>    if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
>      std::swap (var1, var2);
>
> +  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
> +    {
> +      if (!lookup_attribute ("omp simd inscan exclusive",
> +                            DECL_ATTRIBUTES (var1)))
> +       goto fail;
> +      var1 = var3;
> +    }
> +
>    if (loop_vinfo->scan_map == NULL)
>      goto fail;
>    tree *init = loop_vinfo->scan_map->get (var1);
> @@ -6655,6 +6779,7 @@ check_scan_store (stmt_vec_info stmt_inf
>      goto fail;
>
>    /* The IL is as expected, now check if we can actually vectorize it.
> +     Inclusive scan:
>         _26 = D.2043[_25];
>         _27 = D.2042[_25];
>         _28 = _26 + _27;
> @@ -6664,21 +6789,49 @@ check_scan_store (stmt_vec_info stmt_inf
>       from the D.2042[_21] = 0; store):
>         _30 = MEM <vector(8) int> [(int *)&D.2043];
>         _31 = MEM <vector(8) int> [(int *)&D.2042];
> -       _32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>;
> +       _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
>         _33 = _31 + _32;
>         // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
> -       _34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>;
> +       _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
>         _35 = _33 + _34;
>         // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
>         //         _31[1]+.._31[4], ... _31[4]+.._31[7] };
> -       _36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>;
> +       _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
>         _37 = _35 + _36;
>         // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
>         //         _31[0]+.._31[4], ... _31[0]+.._31[7] };
>         _38 = _30 + _37;
>         _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
>         MEM <vector(8) int> [(int *)&D.2043] = _39;
> -       MEM <vector(8) int> [(int *)&D.2042] = _38;  */
> +       MEM <vector(8) int> [(int *)&D.2042] = _38;
> +     Exclusive scan:
> +       _26 = D.2043[_25];
> +       D.2044[_25] = _26;
> +       _27 = D.2042[_25];
> +       _28 = _26 + _27;
> +       D.2043[_25] = _28;
> +     should be vectorized as (where _40 is the vectorized rhs
> +     from the D.2042[_21] = 0; store):
> +       _30 = MEM <vector(8) int> [(int *)&D.2043];
> +       _31 = MEM <vector(8) int> [(int *)&D.2042];
> +       _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
> +       _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
> +       _34 = _32 + _33;
> +       // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
> +       //         _31[3]+_31[4], ... _31[5]+.._31[6] };
> +       _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
> +       _36 = _34 + _35;
> +       // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
> +       //         _31[1]+.._31[4], ... _31[3]+.._31[6] };
> +       _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
> +       _38 = _36 + _37;
> +       // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
> +       //         _31[0]+.._31[4], ... _31[0]+.._31[6] };
> +       _39 = _30 + _38;
> +       _50 = _31 + _39;
> +       _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
> +       MEM <vector(8) int> [(int *)&D.2044] = _39;
> +       MEM <vector(8) int> [(int *)&D.2042] = _51;  */
>    enum machine_mode vec_mode = TYPE_MODE (vectype);
>    optab optab = optab_for_tree_code (code, vectype, optab_default);
>    if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
> @@ -6715,6 +6868,24 @@ vectorizable_scan_store (stmt_vec_info s
>    tree rhs = gimple_assign_rhs1 (stmt);
>    gcc_assert (TREE_CODE (rhs) == SSA_NAME);
>
> +  tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
> +  bool inscan_var_store
> +    = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
> +
> +  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
> +    {
> +      use_operand_p use_p;
> +      imm_use_iterator iter;
> +      FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
> +       {
> +         gimple *use_stmt = USE_STMT (use_p);
> +         if (use_stmt == stmt || is_gimple_debug (use_stmt))
> +           continue;
> +         rhs = gimple_assign_lhs (use_stmt);
> +         break;
> +       }
> +    }
> +
>    gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
>    enum tree_code code = gimple_assign_rhs_code (def_stmt);
>    if (code == POINTER_PLUS_EXPR)
> @@ -6737,15 +6908,12 @@ vectorizable_scan_store (stmt_vec_info s
>      {
>        std::swap (rhs1, rhs2);
>        std::swap (var1, var2);
> +      std::swap (load1_dr_info, load2_dr_info);
>      }
>
>    tree *init = loop_vinfo->scan_map->get (var1);
>    gcc_assert (init);
>
> -  tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
> -  bool inscan_var_store
> -    = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
> -
>    unsigned HOST_WIDE_INT nunits;
>    if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
>      gcc_unreachable ();
> @@ -6789,29 +6957,50 @@ vectorizable_scan_store (stmt_vec_info s
>    tree vec_oprnd1 = NULL_TREE;
>    tree vec_oprnd2 = NULL_TREE;
>    tree vec_oprnd3 = NULL_TREE;
> -  tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr));
> +  tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
>    tree dataref_offset = build_int_cst (ref_type, 0);
>    tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
> +  tree ldataref_ptr = NULL_TREE;
>    tree orig = NULL_TREE;
> +  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
> +    ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
>    for (int j = 0; j < ncopies; j++)
>      {
>        stmt_vec_info new_stmt_info;
>        if (j == 0)
>         {
>           vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
> -         vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
> +         if (ldataref_ptr == NULL)
> +           vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
>           vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
>           orig = vec_oprnd3;
>         }
>        else
>         {
>           vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
> -         vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
> +         if (ldataref_ptr == NULL)
> +           vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
>           vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
>           if (!inscan_var_store)
>             dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
>         }
>
> +      if (ldataref_ptr)
> +       {
> +         vec_oprnd2 = make_ssa_name (vectype);
> +         tree data_ref = fold_build2 (MEM_REF, vectype,
> +                                      unshare_expr (ldataref_ptr),
> +                                      dataref_offset);
> +         vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
> +         gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
> +         new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
> +         if (prev_stmt_info == NULL)
> +           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
> +         else
> +           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
> +         prev_stmt_info = new_stmt_info;
> +       }
> +
>        tree v = vec_oprnd2;
>        for (int i = 0; i < units_log2; ++i)
>         {
> @@ -6848,6 +7037,17 @@ vectorizable_scan_store (stmt_vec_info s
>               new_temp = new_temp2;
>             }
>
> +         /* For exclusive scan, perform the perms[i] permutation once
> +            more.  */
> +         if (i == 0
> +             && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
> +             && v == vec_oprnd2)
> +           {
> +             v = new_temp;
> +             --i;
> +             continue;
> +           }
> +
>           tree new_temp2 = make_ssa_name (vectype);
>           g = gimple_build_assign (new_temp2, code, v, new_temp);
>           new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
> @@ -6863,16 +7063,30 @@ vectorizable_scan_store (stmt_vec_info s
>        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
>        prev_stmt_info = new_stmt_info;
>
> +      tree last_perm_arg = new_temp;
> +      /* For exclusive scan, new_temp computed above is the exclusive scan
> +        prefix sum.  Turn it into inclusive prefix sum for the broadcast
> +        of the last element into orig.  */
> +      if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
> +       {
> +         last_perm_arg = make_ssa_name (vectype);
> +         g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
> +         new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
> +         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
> +         prev_stmt_info = new_stmt_info;
> +       }
> +
>        orig = make_ssa_name (vectype);
> -      g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp,
> -                              perms[units_log2]);
> +      g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
> +                              last_perm_arg, perms[units_log2]);
>        new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
>        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
>        prev_stmt_info = new_stmt_info;
>
>        if (!inscan_var_store)
>         {
> -         tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
> +         tree data_ref = fold_build2 (MEM_REF, vectype,
> +                                      unshare_expr (dataref_ptr),
>                                        dataref_offset);
>           vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
>           g = gimple_build_assign (data_ref, new_temp);
> @@ -6888,7 +7102,8 @@ vectorizable_scan_store (stmt_vec_info s
>         if (j != 0)
>           dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
>
> -       tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
> +       tree data_ref = fold_build2 (MEM_REF, vectype,
> +                                    unshare_expr (dataref_ptr),
>                                      dataref_offset);
>         vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
>         gimple *g = gimple_build_assign (data_ref, orig);
> @@ -7325,7 +7540,7 @@ vectorizable_store (stmt_vec_info stmt_i
>         }
>        return true;
>      }
> -  else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
> +  else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
>      return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
>
>    if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
> --- gcc/testsuite/gcc.dg/vect/vect-simd-12.c.jj 2019-06-20 15:08:50.260400440 +0200
> +++ gcc/testsuite/gcc.dg/vect/vect-simd-12.c    2019-06-20 15:08:24.332805239 +0200
> @@ -0,0 +1,122 @@
> +/* { dg-require-effective-target size32plus } */
> +/* { dg-additional-options "-fopenmp-simd" } */
> +/* { dg-additional-options "-mavx" { target avx_runtime } } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
> +
> +#ifndef main
> +#include "tree-vect.h"
> +#endif
> +
> +int r, a[1024], b[1024];
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b)
> +{
> +  #pragma omp simd reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  int s = 0;
> +  #pragma omp simd reduction (inscan, +:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b)
> +{
> +  #pragma omp simd reduction (inscan, +:r) if (simd: 0)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  int s = 0;
> +  #pragma omp simd reduction (inscan, +:s) simdlen (1)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +#ifndef main
> +  check_vect ();
> +#endif
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  foo (a, b);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = 25;
> +      s += i;
> +    }
> +  if (bar () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -1;
> +      s += 2 * i;
> +    }
> +  r = 0;
> +  baz (a, b);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -25;
> +      s += i;
> +    }
> +  if (qux () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      s += 2 * i;
> +    }
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/vect/vect-simd-13.c.jj 2019-06-20 15:47:23.580359715 +0200
> +++ gcc/testsuite/gcc.dg/vect/vect-simd-13.c    2019-06-20 15:13:23.500134387 +0200
> @@ -0,0 +1,124 @@
> +/* { dg-require-effective-target size32plus } */
> +/* { dg-additional-options "-fopenmp-simd" } */
> +/* { dg-additional-options "-mavx" { target avx_runtime } } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
> +
> +#ifndef main
> +#include "tree-vect.h"
> +#endif
> +
> +int r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b)
> +{
> +  #pragma omp simd reduction (inscan, foo:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  int s = 0;
> +  #pragma omp simd reduction (inscan, foo:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b)
> +{
> +  #pragma omp simd reduction (inscan, foo:r) if (simd: 0)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  int s = 0;
> +  #pragma omp simd reduction (inscan, foo:s) simdlen (1)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +#ifndef main
> +  check_vect ();
> +#endif
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  foo (a, b);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = 25;
> +      s += i;
> +    }
> +  if (bar () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -1;
> +      s += 2 * i;
> +    }
> +  r = 0;
> +  baz (a, b);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -25;
> +      s += i;
> +    }
> +  if (qux () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      s += 2 * i;
> +    }
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/vect/vect-simd-14.c.jj 2019-06-20 15:48:30.536321539 +0200
> +++ gcc/testsuite/gcc.dg/vect/vect-simd-14.c    2019-06-20 15:54:39.291617792 +0200
> @@ -0,0 +1,94 @@
> +/* { dg-require-effective-target size32plus } */
> +/* { dg-additional-options "-fopenmp-simd" } */
> +/* { dg-additional-options "-mavx" { target avx_runtime } } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
> +
> +#ifndef main
> +#include "tree-vect.h"
> +#endif
> +
> +float r = 1.0f, a[1024], b[1024];
> +
> +__attribute__((noipa)) void
> +foo (float *a, float *b)
> +{
> +  #pragma omp simd reduction (inscan, *:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r *= a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) float
> +bar (void)
> +{
> +  float s = -__builtin_inff ();
> +  #pragma omp simd reduction (inscan, max:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s = s > a[i] ? s : a[i];
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  float s = 1.0f;
> +#ifndef main
> +  check_vect ();
> +#endif
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (i < 80)
> +       a[i] = (i & 1) ? 0.25f : 0.5f;
> +      else if (i < 200)
> +       a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
> +      else if (i < 280)
> +       a[i] = (i & 1) ? 0.25f : 0.5f;
> +      else if (i < 380)
> +       a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
> +      else
> +       switch (i % 6)
> +         {
> +         case 0: a[i] = 0.25f; break;
> +         case 1: a[i] = 2.0f; break;
> +         case 2: a[i] = -1.0f; break;
> +         case 3: a[i] = -4.0f; break;
> +         case 4: a[i] = 0.5f; break;
> +         case 5: a[i] = 1.0f; break;
> +         default: a[i] = 0.0f; break;
> +         }
> +      b[i] = -19.0f;
> +      asm ("" : "+g" (i));
> +    }
> +  foo (a, b);
> +  if (r * 16384.0f != 0.125f)
> +    abort ();
> +  float m = -175.25f;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -231.75f;
> +      s *= a[i];
> +      a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
> +      m += 0.75f;
> +    }
> +  if (bar () != 592.0f)
> +    abort ();
> +  s = -__builtin_inff ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      if (s < a[i])
> +       s = a[i];
> +    }
> +  return 0;
> +}


Hi,
I've noticed that this new test (gcc.dg/vect/vect-simd-14.c)
fails at execution time on arm targets.

It does pass on aarch64.

Christophe

> --- gcc/testsuite/gcc.dg/vect/vect-simd-15.c.jj 2019-06-20 15:50:34.483399705 +0200
> +++ gcc/testsuite/gcc.dg/vect/vect-simd-15.c    2019-06-20 15:52:09.976919050 +0200
> @@ -0,0 +1,186 @@
> +/* { dg-require-effective-target size32plus } */
> +/* { dg-additional-options "-fopenmp-simd" } */
> +/* { dg-additional-options "-mavx" { target avx_runtime } } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
> +
> +#ifndef main
> +#include "tree-vect.h"
> +#endif
> +
> +int r, a[1024], b[1024];
> +unsigned short r2, b2[1024];
> +unsigned char r3, b3[1024];
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
> +{
> +  #pragma omp simd reduction (inscan, +:r, r2, r3)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      {
> +       b[i] = r;
> +       b2[i] = r2;
> +       b3[i] = r3;
> +      }
> +      #pragma omp scan exclusive(r, r2, r3)
> +      { r += a[i]; r2 += a[i]; r3 += a[i]; }
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (unsigned short *s2p, unsigned char *s3p)
> +{
> +  int s = 0;
> +  unsigned short s2 = 0;
> +  unsigned char s3 = 0;
> +  #pragma omp simd reduction (inscan, +:s, s2, s3)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      { b[i] = s; b2[i] = s2; b3[i] = s3; }
> +      #pragma omp scan exclusive(s, s2, s3)
> +      {
> +       s += 2 * a[i];
> +       s2 += 2 * a[i];
> +       s3 += 2 * a[i];
> +      }
> +    }
> +  *s2p = s2;
> +  *s3p = s3;
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
> +{
> +  #pragma omp simd reduction (inscan, +:r, r2, r3) if (simd: 0)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      {
> +       b[i] = r;
> +       b2[i] = r2;
> +       b3[i] = r3;
> +      }
> +      #pragma omp scan exclusive(r, r2, r3)
> +      {
> +       r += a[i];
> +       r2 += a[i];
> +       r3 += a[i];
> +      }
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (unsigned short *s2p, unsigned char *s3p)
> +{
> +  int s = 0;
> +  unsigned short s2 = 0;
> +  unsigned char s3 = 0;
> +  #pragma omp simd reduction (inscan, +:s, s2, s3) simdlen (1)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      { b[i] = s; b2[i] = s2; b3[i] = s3; }
> +      #pragma omp scan exclusive(s, s2, s3)
> +      { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
> +    }
> +  *s2p = s2;
> +  *s3p = s3;
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  unsigned short s2;
> +  unsigned char s3;
> +#ifndef main
> +  check_vect ();
> +#endif
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      b2[i] = -1;
> +      b3[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  foo (a, b, b2, b3);
> +  if (r != 1024 * 1023 / 2
> +      || r2 != (unsigned short) r
> +      || r3 != (unsigned char) r)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s
> +         || b2[i] != (unsigned short) s
> +         || b3[i] != (unsigned char) s)
> +       abort ();
> +      else
> +       {
> +         b[i] = 25;
> +         b2[i] = 24;
> +         b3[i] = 26;
> +       }
> +      s += i;
> +    }
> +  if (bar (&s2, &s3) != 1024 * 1023)
> +    abort ();
> +  if (s2 != (unsigned short) (1024 * 1023)
> +      || s3 != (unsigned char) (1024 * 1023))
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s
> +         || b2[i] != (unsigned short) s
> +         || b3[i] != (unsigned char) s)
> +       abort ();
> +      else
> +       {
> +         b[i] = -1;
> +         b2[i] = -1;
> +         b3[i] = -1;
> +       }
> +      s += 2 * i;
> +    }
> +  r = 0;
> +  r2 = 0;
> +  r3 = 0;
> +  baz (a, b, b2, b3);
> +  if (r != 1024 * 1023 / 2
> +      || r2 != (unsigned short) r
> +      || r3 != (unsigned char) r)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s
> +         || b2[i] != (unsigned short) s
> +         || b3[i] != (unsigned char) s)
> +       abort ();
> +      else
> +       {
> +         b[i] = 25;
> +         b2[i] = 24;
> +         b3[i] = 26;
> +       }
> +      s += i;
> +    }
> +  s2 = 0;
> +  s3 = 0;
> +  if (qux (&s2, &s3) != 1024 * 1023)
> +    abort ();
> +  if (s2 != (unsigned short) (1024 * 1023)
> +      || s3 != (unsigned char) (1024 * 1023))
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s
> +         || b2[i] != (unsigned short) s
> +         || b3[i] != (unsigned char) s)
> +       abort ();
> +      s += 2 * i;
> +    }
> +  return 0;
> +}
> --- gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c.jj        2019-06-20 15:58:35.276983324 +0200
> +++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c   2019-06-20 15:58:35.274983355 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target sse2 } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
> +
> +#include "sse2-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/vect-simd-12.c"
> +
> +static void
> +sse2_test (void)
> +{
> +  do_main ();
> +}
> --- gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c.jj        2019-06-20 15:58:35.283983216 +0200
> +++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c   2019-06-20 15:58:35.281983247 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target sse2 } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
> +
> +#include "sse2-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/vect-simd-13.c"
> +
> +static void
> +sse2_test (void)
> +{
> +  do_main ();
> +}
> --- gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c.jj        2019-06-20 15:58:35.288983139 +0200
> +++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c   2019-06-20 15:58:35.287983154 +0200
> @@ -0,0 +1,15 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target sse2 } */
> +
> +#include "sse2-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/vect-simd-14.c"
> +
> +static void
> +sse2_test (void)
> +{
> +  do_main ();
> +}
> --- gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c.jj        2019-06-20 15:58:35.293983061 +0200
> +++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c   2019-06-20 15:58:35.292983077 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target sse2 } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
> +
> +#include "sse2-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/vect-simd-15.c"
> +
> +static void
> +sse2_test (void)
> +{
> +  do_main ();
> +}
> --- gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c.jj        2019-06-20 15:58:35.299982969 +0200
> +++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c   2019-06-20 15:58:35.297982999 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target avx2 } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
> +
> +#include "avx2-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/vect-simd-12.c"
> +
> +static void
> +avx2_test (void)
> +{
> +  do_main ();
> +}
> --- gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c.jj        2019-06-20 15:58:35.305982876 +0200
> +++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c   2019-06-20 15:58:35.303982907 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target avx2 } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
> +
> +#include "avx2-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/vect-simd-13.c"
> +
> +static void
> +avx2_test (void)
> +{
> +  do_main ();
> +}
> --- gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c.jj        2019-06-20 15:58:35.310982799 +0200
> +++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c   2019-06-20 15:58:35.309982815 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target avx2 } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
> +
> +#include "avx2-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/vect-simd-14.c"
> +
> +static void
> +avx2_test (void)
> +{
> +  do_main ();
> +}
> --- gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c.jj        2019-06-20 15:58:35.316982707 +0200
> +++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c   2019-06-20 15:58:35.314982738 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target avx2 } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
> +
> +#include "avx2-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/vect-simd-15.c"
> +
> +static void
> +avx2_test (void)
> +{
> +  do_main ();
> +}
> --- gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c.jj     2019-06-20 15:58:35.323982599 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c        2019-06-20 15:58:35.321982630 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target avx512f } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
> +
> +#include "avx512f-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/vect-simd-12.c"
> +
> +static void
> +avx512f_test (void)
> +{
> +  do_main ();
> +}
> --- gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c.jj     2019-06-20 15:58:35.328982522 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c        2019-06-20 15:58:35.326982553 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target avx512f } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
> +
> +#include "avx512f-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/vect-simd-13.c"
> +
> +static void
> +avx512f_test (void)
> +{
> +  do_main ();
> +}
> --- gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c.jj     2019-06-20 15:58:35.333982445 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c        2019-06-20 15:58:35.332982461 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target avx512f } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
> +
> +#include "avx512f-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/vect-simd-14.c"
> +
> +static void
> +avx512f_test (void)
> +{
> +  do_main ();
> +}
> --- gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c.jj    2019-06-20 15:58:35.347982230 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c       2019-06-20 15:58:35.346982245 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fopenmp-simd -mavx512bw -mprefer-vector-width=512 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target avx512bw } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
> +
> +#include "avx512bw-check.h"
> +
> +#define main() do_main ()
> +
> +#include "../../gcc.dg/vect/vect-simd-15.c"
> +
> +static void
> +avx512bw_test (void)
> +{
> +  do_main ();
> +}
> --- gcc/testsuite/g++.dg/vect/simd-6.cc.jj      2019-06-20 16:00:34.800142524 +0200
> +++ gcc/testsuite/g++.dg/vect/simd-6.cc 2019-06-20 16:07:41.722559826 +0200
> @@ -0,0 +1,161 @@
> +// { dg-require-effective-target size32plus }
> +// { dg-additional-options "-fopenmp-simd" }
> +// { dg-additional-options "-mavx" { target avx_runtime } }
> +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
> +
> +#include "../../gcc.dg/vect/tree-vect.h"
> +
> +template <typename T>
> +struct S {
> +  inline S ();
> +  inline ~S ();
> +  inline S (const S &);
> +  inline S & operator= (const S &);
> +  T s;
> +};
> +
> +template <typename T>
> +S<T>::S () : s (0)
> +{
> +}
> +
> +template <typename T>
> +S<T>::~S ()
> +{
> +}
> +
> +template <typename T>
> +S<T>::S (const S &x)
> +{
> +  s = x.s;
> +}
> +
> +template <typename T>
> +S<T> &
> +S<T>::operator= (const S &x)
> +{
> +  s = x.s;
> +  return *this;
> +}
> +
> +template <typename T>
> +static inline void
> +ini (S<T> &x)
> +{
> +  x.s = 0;
> +}
> +
> +S<int> r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s)
> +#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) initializer (ini (omp_priv))
> +
> +template <typename T>
> +__attribute__((noipa)) void
> +foo (S<T> *a, S<T> *b)
> +{
> +  #pragma omp simd reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r.s += a[i].s;
> +    }
> +}
> +
> +template <typename T>
> +__attribute__((noipa)) S<T>
> +bar (void)
> +{
> +  S<T> s;
> +  #pragma omp simd reduction (inscan, plus:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s.s += 2 * a[i].s;
> +    }
> +  return S<T> (s);
> +}
> +
> +__attribute__((noipa)) void
> +baz (S<int> *a, S<int> *b)
> +{
> +  #pragma omp simd reduction (inscan, +:r) simdlen(1)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r.s += a[i].s;
> +    }
> +}
> +
> +__attribute__((noipa)) S<int>
> +qux (void)
> +{
> +  S<int> s;
> +  #pragma omp simd if (0) reduction (inscan, plus:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s.s += 2 * a[i].s;
> +    }
> +  return S<int> (s);
> +}
> +
> +int
> +main ()
> +{
> +  S<int> s;
> +  check_vect ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i].s = i;
> +      b[i].s = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  foo (a, b);
> +  if (r.s != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      else
> +       b[i].s = 25;
> +      s.s += i;
> +    }
> +  if (bar<int> ().s != 1024 * 1023)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      s.s += 2 * i;
> +    }
> +  r.s = 0;
> +  baz (a, b);
> +  if (r.s != 1024 * 1023 / 2)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      else
> +       b[i].s = 25;
> +      s.s += i;
> +    }
> +  if (qux ().s != 1024 * 1023)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      s.s += 2 * i;
> +    }
> +  return 0;
> +}
> --- gcc/testsuite/g++.dg/vect/simd-7.cc.jj      2019-06-20 16:00:51.095891542 +0200
> +++ gcc/testsuite/g++.dg/vect/simd-7.cc 2019-06-20 16:12:50.222747875 +0200
> @@ -0,0 +1,124 @@
> +// { dg-require-effective-target size32plus }
> +// { dg-additional-options "-fopenmp-simd" }
> +// { dg-additional-options "-mavx" { target avx_runtime } }
> +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
> +
> +#include "../../gcc.dg/vect/tree-vect.h"
> +
> +int r, a[1024], b[1024], q;
> +
> +template <typename T, typename U>
> +__attribute__((noipa)) void
> +foo (T a, T b, U r)
> +{
> +  #pragma omp simd reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +template <typename T>
> +__attribute__((noipa)) T
> +bar (void)
> +{
> +  T &s = q;
> +  q = 0;
> +  #pragma omp simd reduction (inscan, +:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +template <typename T>
> +__attribute__((noipa)) void
> +baz (T *a, T *b, T &r)
> +{
> +  #pragma omp simd reduction (inscan, +:r) if (simd: 0)
> +  for (T i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +template <typename T>
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  T s = q;
> +  q = 0;
> +  #pragma omp simd reduction (inscan, +:s) simdlen (1)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  check_vect ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  foo<int *, int &> (a, b, r);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = 25;
> +      s += i;
> +    }
> +  if (bar<int> () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -1;
> +      s += 2 * i;
> +    }
> +  r = 0;
> +  baz<int> (a, b, r);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -25;
> +      s += i;
> +    }
> +  if (qux<int &> () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      s += 2 * i;
> +    }
> +  return 0;
> +}
> --- gcc/testsuite/g++.dg/vect/simd-8.cc.jj      2019-06-20 16:00:54.154844430 +0200
> +++ gcc/testsuite/g++.dg/vect/simd-8.cc 2019-06-20 16:15:37.994133891 +0200
> @@ -0,0 +1,122 @@
> +// { dg-require-effective-target size32plus }
> +// { dg-additional-options "-fopenmp-simd" }
> +// { dg-additional-options "-mavx" { target avx_runtime } }
> +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } }
> +
> +#include "../../gcc.dg/vect/tree-vect.h"
> +
> +int r, a[1024], b[1024], q;
> +
> +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b, int &r)
> +{
> +  #pragma omp simd reduction (inscan, foo:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  int &s = q;
> +  q = 0;
> +  #pragma omp simd reduction (inscan, foo:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b, int &r)
> +{
> +  #pragma omp simd reduction (inscan, foo:r) if (simd: 0)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  int &s = q;
> +  q = 0;
> +  #pragma omp simd reduction (inscan, foo:s) simdlen (1)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  check_vect ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  foo (a, b, r);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = 25;
> +      s += i;
> +    }
> +  if (bar () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -1;
> +      s += 2 * i;
> +    }
> +  r = 0;
> +  baz (a, b, r);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -25;
> +      s += i;
> +    }
> +  if (qux () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      s += 2 * i;
> +    }
> +  return 0;
> +}
> --- gcc/testsuite/g++.dg/vect/simd-9.cc.jj      2019-06-20 16:00:57.197797566 +0200
> +++ gcc/testsuite/g++.dg/vect/simd-9.cc 2019-06-20 16:17:27.484427949 +0200
> @@ -0,0 +1,153 @@
> +// { dg-require-effective-target size32plus }
> +// { dg-additional-options "-fopenmp-simd" }
> +// { dg-additional-options "-mavx" { target avx_runtime } }
> +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
> +
> +#include "../../gcc.dg/vect/tree-vect.h"
> +
> +struct S {
> +  inline S ();
> +  inline ~S ();
> +  inline S (const S &);
> +  inline S & operator= (const S &);
> +  int s;
> +};
> +
> +S::S () : s (0)
> +{
> +}
> +
> +S::~S ()
> +{
> +}
> +
> +S::S (const S &x)
> +{
> +  s = x.s;
> +}
> +
> +S &
> +S::operator= (const S &x)
> +{
> +  s = x.s;
> +  return *this;
> +}
> +
> +static inline void
> +ini (S &x)
> +{
> +  x.s = 0;
> +}
> +
> +S r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
> +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
> +
> +__attribute__((noipa)) void
> +foo (S *a, S *b, S &r)
> +{
> +  #pragma omp simd reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r.s += a[i].s;
> +    }
> +}
> +
> +__attribute__((noipa)) S
> +bar (void)
> +{
> +  S s;
> +  #pragma omp simd reduction (inscan, plus:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s.s += 2 * a[i].s;
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (S *a, S *b, S &r)
> +{
> +  #pragma omp simd reduction (inscan, +:r) simdlen(1)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r.s += a[i].s;
> +    }
> +}
> +
> +__attribute__((noipa)) S
> +qux (void)
> +{
> +  S s;
> +  #pragma omp simd if (0) reduction (inscan, plus:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s.s += 2 * a[i].s;
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  S s;
> +  check_vect ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i].s = i;
> +      b[i].s = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  foo (a, b, r);
> +  if (r.s != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      else
> +       b[i].s = 25;
> +      s.s += i;
> +    }
> +  if (bar ().s != 1024 * 1023)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      s.s += 2 * i;
> +    }
> +  r.s = 0;
> +  baz (a, b, r);
> +  if (r.s != 1024 * 1023 / 2)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      else
> +       b[i].s = 25;
> +      s.s += i;
> +    }
> +  if (qux ().s != 1024 * 1023)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      s.s += 2 * i;
> +    }
> +  return 0;
> +}
> --- gcc/testsuite/c-c++-common/gomp/scan-2.c.jj 2019-06-10 14:18:17.461525669 +0200
> +++ gcc/testsuite/c-c++-common/gomp/scan-2.c    2019-06-20 23:54:03.615422149 +0200
> @@ -8,7 +8,7 @@ f1 (int *c, int *d)
>    for (i = 0; i < 64; i++)
>      {
>        d[i] = a;
> -      #pragma omp scan exclusive (a)           /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */
> +      #pragma omp scan exclusive (a)
>        a += c[i];
>      }
>  }
>
>         Jakub
diff mbox series

Patch

--- gcc/omp-low.c.jj	2019-06-20 13:26:29.085150770 +0200
+++ gcc/omp-low.c	2019-06-20 15:46:25.964253058 +0200
@@ -3692,7 +3692,8 @@  struct omplow_simd_context {
 static bool
 lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
 			      omplow_simd_context *sctx, tree &ivar,
-			      tree &lvar, tree *rvar = NULL)
+			      tree &lvar, tree *rvar = NULL,
+			      tree *rvar2 = NULL)
 {
   if (known_eq (sctx->max_vf, 0U))
     {
@@ -3767,6 +3768,25 @@  lower_rec_simd_input_clauses (tree new_v
 	  *rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar,
 			  sctx->lastlane, NULL_TREE, NULL_TREE);
 	  TREE_THIS_NOTRAP (*rvar) = 1;
+
+	  if (!ctx->scan_inclusive)
+	    {
+	      /* And for exclusive scan yet another one, which will
+		 hold the value during the scan phase.  */
+	      tree savar = create_tmp_var_raw (atype);
+	      if (TREE_ADDRESSABLE (new_var))
+		TREE_ADDRESSABLE (savar) = 1;
+	      DECL_ATTRIBUTES (savar)
+		= tree_cons (get_identifier ("omp simd array"), NULL,
+			     tree_cons (get_identifier ("omp simd inscan "
+							"exclusive"), NULL,
+					DECL_ATTRIBUTES (savar)));
+	      gimple_add_tmp_var (savar);
+	      ctx->cb.decl_map->put (iavar, savar);
+	      *rvar2 = build4 (ARRAY_REF, TREE_TYPE (new_var), savar,
+			       sctx->idx, NULL_TREE, NULL_TREE);
+	      TREE_THIS_NOTRAP (*rvar2) = 1;
+	    }
 	}
       ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx,
 		     NULL_TREE, NULL_TREE);
@@ -5185,14 +5205,15 @@  lower_rec_input_clauses (tree clauses, g
 		      new_vard = TREE_OPERAND (new_var, 0);
 		      gcc_assert (DECL_P (new_vard));
 		    }
-		  tree rvar = NULL_TREE, *rvarp = NULL;
+		  tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
 		  if (is_simd
 		      && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
 		      && OMP_CLAUSE_REDUCTION_INSCAN (c))
 		    rvarp = &rvar;
 		  if (is_simd
 		      && lower_rec_simd_input_clauses (new_var, ctx, &sctx,
-						       ivar, lvar, rvarp))
+						       ivar, lvar, rvarp,
+						       &rvar2))
 		    {
 		      if (new_vard == new_var)
 			{
@@ -5220,6 +5241,14 @@  lower_rec_input_clauses (tree clauses, g
 				    (c, ivar2, build_outer_var_ref (var, ctx));
 			      gimplify_and_add (x, &llist[0]);
 
+			      if (rvar2)
+				{
+				  x = lang_hooks.decls.omp_clause_default_ctor
+					(c, unshare_expr (rvar2),
+					 build_outer_var_ref (var, ctx));
+				  gimplify_and_add (x, &llist[0]);
+				}
+
 			      /* For types that need construction, add another
 				 private var which will be default constructed
 				 and optionally initialized with
@@ -5229,7 +5258,9 @@  lower_rec_input_clauses (tree clauses, g
 				 iteration.  */
 			      tree nv = create_tmp_var_raw (TREE_TYPE (ivar));
 			      gimple_add_tmp_var (nv);
-			      ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0),
+			      ctx->cb.decl_map->put (TREE_OPERAND (rvar2
+								   ? rvar2
+								   : ivar, 0),
 						     nv);
 			      x = lang_hooks.decls.omp_clause_default_ctor
 				    (c, nv, build_outer_var_ref (var, ctx));
@@ -5296,6 +5327,18 @@  lower_rec_input_clauses (tree clauses, g
 			      gimplify_stmt (&dtor, &tseq);
 			      gimple_seq_add_seq (&llist[1], tseq);
 			    }
+
+			  if (rvar2)
+			    {
+			      x = lang_hooks.decls.omp_clause_dtor (c, rvar2);
+			      if (x)
+				{
+				  tseq = NULL;
+				  dtor = x;
+				  gimplify_stmt (&dtor, &tseq);
+				  gimple_seq_add_seq (&llist[1], tseq);
+				}
+			    }
 			  break;
 			}
 		      if (x)
@@ -5390,6 +5433,24 @@  lower_rec_input_clauses (tree clauses, g
 			      gimple_seq_add_seq (ilist, tseq);
 			    }
 			  OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
+			  if (!ctx->scan_inclusive)
+			    {
+			      tree nv2
+				= create_tmp_var_raw (TREE_TYPE (new_var));
+			      gimple_add_tmp_var (nv2);
+			      ctx->cb.decl_map->put (nv, nv2);
+			      x = lang_hooks.decls.omp_clause_default_ctor
+				    (c, nv2, build_outer_var_ref (var, ctx));
+			      gimplify_and_add (x, ilist);
+			      x = lang_hooks.decls.omp_clause_dtor (c, nv2);
+			      if (x)
+				{
+				  tseq = NULL;
+				  dtor = x;
+				  gimplify_stmt (&dtor, &tseq);
+				  gimple_seq_add_seq (dlist, tseq);
+				}
+			    }
 			  x = lang_hooks.decls.omp_clause_dtor (c, nv);
 			  if (x)
 			    {
@@ -5399,6 +5460,21 @@  lower_rec_input_clauses (tree clauses, g
 			      gimple_seq_add_seq (dlist, tseq);
 			    }
 			}
+		      else if (!ctx->scan_inclusive
+			       && TREE_ADDRESSABLE (TREE_TYPE (new_var)))
+			{
+			  tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var));
+			  gimple_add_tmp_var (nv2);
+			  ctx->cb.decl_map->put (new_vard, nv2);
+			  x = lang_hooks.decls.omp_clause_dtor (c, nv2);
+			  if (x)
+			    {
+			      tseq = NULL;
+			      dtor = x;
+			      gimplify_stmt (&dtor, &tseq);
+			      gimple_seq_add_seq (dlist, tseq);
+			    }
+			}
 		      DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
 		      goto do_dtor;
 		    }
@@ -5487,14 +5563,15 @@  lower_rec_input_clauses (tree clauses, g
 		      new_vard = TREE_OPERAND (new_var, 0);
 		      gcc_assert (DECL_P (new_vard));
 		    }
-		  tree rvar = NULL_TREE, *rvarp = NULL;
+		  tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
 		  if (is_simd
 		      && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
 		      && OMP_CLAUSE_REDUCTION_INSCAN (c))
 		    rvarp = &rvar;
 		  if (is_simd
 		      && lower_rec_simd_input_clauses (new_var, ctx, &sctx,
-						       ivar, lvar, rvarp))
+						       ivar, lvar, rvarp,
+						       &rvar2))
 		    {
 		      if (new_vard != new_var)
 			{
@@ -8573,18 +8650,40 @@  lower_omp_scan (gimple_stmt_iterator *gs
   gimple_seq before = NULL;
   omp_context *octx = ctx->outer;
   gcc_assert (octx);
+  if (!octx->scan_inclusive && !has_clauses)
+    {
+      gimple_stmt_iterator gsi2 = *gsi_p;
+      gsi_next (&gsi2);
+      gimple *stmt2 = gsi_stmt (gsi2);
+      /* For exclusive scan, swap GIMPLE_OMP_SCAN without clauses
+	 with following GIMPLE_OMP_SCAN with clauses, so that input_phase,
+	 the one with exclusive clause(s), comes first.  */
+      if (stmt2
+	  && gimple_code (stmt2) == GIMPLE_OMP_SCAN
+	  && gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt2)) != NULL)
+	{
+	  gsi_remove (gsi_p, false);
+	  gsi_insert_after (gsi_p, stmt, GSI_SAME_STMT);
+	  ctx = maybe_lookup_ctx (stmt2);
+	  gcc_assert (ctx);
+	  lower_omp_scan (gsi_p, ctx);
+	  return;
+	}
+    }
+
   bool input_phase = has_clauses ^ octx->scan_inclusive;
   if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
       && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
-      && !gimple_omp_for_combined_into_p (octx->stmt)
-      && octx->scan_inclusive)
+      && !gimple_omp_for_combined_into_p (octx->stmt))
     {
       if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
 				    OMP_CLAUSE__SIMDUID_))
 	{
 	  tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
 	  lane = create_tmp_var (unsigned_type_node);
-	  tree t = build_int_cst (integer_type_node, 1 + !input_phase);
+	  tree t = build_int_cst (integer_type_node,
+				  input_phase ? 1
+				  : octx->scan_inclusive ? 2 : 3);
 	  gimple *g
 	    = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
 	  gimple_call_set_lhs (g, lane);
@@ -8601,6 +8700,8 @@  lower_omp_scan (gimple_stmt_iterator *gs
 	    tree val = new_var;
 	    tree var2 = NULL_TREE;
 	    tree var3 = NULL_TREE;
+	    tree var4 = NULL_TREE;
+	    tree lane0 = NULL_TREE;
 	    tree new_vard = new_var;
 	    if (omp_is_reference (var))
 	      {
@@ -8623,16 +8724,26 @@  lower_omp_scan (gimple_stmt_iterator *gs
 					  DECL_ATTRIBUTES (v)))
 		      {
 			val = unshare_expr (val);
+			lane0 = TREE_OPERAND (val, 1);
 			TREE_OPERAND (val, 1) = lane;
 			var2 = lookup_decl (v, octx);
+			if (!octx->scan_inclusive)
+			  var4 = lookup_decl (var2, octx);
 			if (input_phase
 			    && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
-			  var3 = maybe_lookup_decl (var2, octx);
+			  var3 = maybe_lookup_decl (var4 ? var4 : var2, octx);
 			if (!input_phase)
 			  {
 			    var2 = build4 (ARRAY_REF, TREE_TYPE (val),
 					   var2, lane, NULL_TREE, NULL_TREE);
 			    TREE_THIS_NOTRAP (var2) = 1;
+			    if (!octx->scan_inclusive)
+			      {
+				var4 = build4 (ARRAY_REF, TREE_TYPE (val),
+					       var4, lane, NULL_TREE,
+					       NULL_TREE);
+				TREE_THIS_NOTRAP (var4) = 1;
+			      }
 			  }
 			else
 			  var2 = val;
@@ -8643,12 +8754,28 @@  lower_omp_scan (gimple_stmt_iterator *gs
 	    else
 	      {
 		var2 = build_outer_var_ref (var, octx);
-		if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
+		if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
 		  {
 		    var3 = maybe_lookup_decl (new_vard, octx);
-		    if (var3 == new_vard)
+		    if (var3 == new_vard || var3 == NULL_TREE)
 		      var3 = NULL_TREE;
+		    else if (!octx->scan_inclusive && !input_phase)
+		      {
+			var4 = maybe_lookup_decl (var3, octx);
+			if (var4 == var3 || var4 == NULL_TREE)
+			  {
+			    if (TREE_ADDRESSABLE (TREE_TYPE (new_var)))
+			      {
+				var4 = var3;
+				var3 = NULL_TREE;
+			      }
+			    else
+			      var4 = NULL_TREE;
+			  }
+		      }
 		  }
+		if (!octx->scan_inclusive && !input_phase && var4 == NULL_TREE)
+		  var4 = create_tmp_var (TREE_TYPE (val));
 	      }
 	    if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
 	      {
@@ -8689,9 +8816,17 @@  lower_omp_scan (gimple_stmt_iterator *gs
 		  }
 		else
 		  {
+		    tree x;
+		    if (!octx->scan_inclusive)
+		      {
+			tree v4 = unshare_expr (var4);
+			tree v2 = unshare_expr (var2);
+			x = lang_hooks.decls.omp_clause_assign_op (c, v4, v2);
+			gimplify_and_add (x, &before);
+		      }
 		    gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
-		    tree x = (DECL_HAS_VALUE_EXPR_P (new_vard)
-			      ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
+		    x = (DECL_HAS_VALUE_EXPR_P (new_vard)
+			 ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
 		    tree vexpr = val;
 		    if (x && omp_is_reference (var))
 		      vexpr = build_fold_addr_expr_loc (clause_loc, val);
@@ -8706,8 +8841,18 @@  lower_omp_scan (gimple_stmt_iterator *gs
 		      SET_DECL_VALUE_EXPR (new_vard, x);
 		    SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
 		    DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
-		    x = lang_hooks.decls.omp_clause_assign_op (c, val, var2);
-		    gimplify_and_add (x, &before);
+		    if (octx->scan_inclusive)
+		      {
+			x = lang_hooks.decls.omp_clause_assign_op (c, val,
+								   var2);
+			gimplify_and_add (x, &before);
+		      }
+		    else if (lane0 == NULL_TREE)
+		      {
+			x = lang_hooks.decls.omp_clause_assign_op (c, val,
+								   var4);
+			gimplify_and_add (x, &before);
+		      }
 		  }
 	      }
 	    else
@@ -8728,10 +8873,29 @@  lower_omp_scan (gimple_stmt_iterator *gs
 
 		    tree x = build2 (code, TREE_TYPE (var2),
 				     unshare_expr (var2), unshare_expr (val));
-		    gimplify_assign (unshare_expr (var2), x, &before);
-		    gimplify_assign (val, var2, &before);
+		    if (octx->scan_inclusive)
+		      {
+			gimplify_assign (unshare_expr (var2), x, &before);
+			gimplify_assign (val, var2, &before);
+		      }
+		    else
+		      {
+			gimplify_assign (unshare_expr (var4),
+					 unshare_expr (var2), &before);
+			gimplify_assign (var2, x, &before);
+			if (lane0 == NULL_TREE)
+			  gimplify_assign (val, var4, &before);
+		      }
 		  }
 	      }
+	    if (!octx->scan_inclusive && !input_phase && lane0)
+	      {
+		tree vexpr = unshare_expr (var4);
+		TREE_OPERAND (vexpr, 1) = lane0;
+		if (omp_is_reference (var))
+		  vexpr = build_fold_addr_expr_loc (clause_loc, vexpr);
+		SET_DECL_VALUE_EXPR (new_vard, vexpr);
+	      }
 	  }
     }
   else if (has_clauses)
--- gcc/tree-vectorizer.h.jj	2019-06-20 13:26:29.078150879 +0200
+++ gcc/tree-vectorizer.h	2019-06-20 14:18:04.241075200 +0200
@@ -917,7 +917,7 @@  struct _stmt_vec_info {
   bool strided_p;
 
   /* For both loads and stores.  */
-  unsigned simd_lane_access_p : 2;
+  unsigned simd_lane_access_p : 3;
 
   /* Classifies how the load or store is going to be implemented
      for loop vectorization.  */
--- gcc/tree-vect-data-refs.c.jj	2019-06-20 13:55:35.421150589 +0200
+++ gcc/tree-vect-data-refs.c	2019-06-20 14:18:04.240075216 +0200
@@ -4223,7 +4223,8 @@  vect_analyze_data_refs (vec_info *vinfo,
       /* See if this was detected as SIMD lane access.  */
       if (dr->aux == (void *)-1
 	  || dr->aux == (void *)-2
-	  || dr->aux == (void *)-3)
+	  || dr->aux == (void *)-3
+	  || dr->aux == (void *)-4)
 	{
 	  if (nested_in_vect_loop_p (loop, stmt_info))
 	    return opt_result::failure_at (stmt_info->stmt,
--- gcc/tree-vect-stmts.c.jj	2019-06-20 13:26:29.084150785 +0200
+++ gcc/tree-vect-stmts.c	2019-06-20 14:18:04.239075231 +0200
@@ -6512,7 +6512,37 @@  check_scan_store (stmt_vec_info stmt_inf
      kinds are there in order to allow optimizing the initializer store
      and combiner sequence, e.g. if it is originally some C++ish user
      defined reduction, but allow the vectorizer to pattern recognize it
-     and turn into the appropriate vectorized scan.  */
+     and turn into the appropriate vectorized scan.
+
+     For exclusive scan, this is slightly different:
+     #pragma omp simd reduction(inscan,+:r)
+     for (...)
+       {
+	 use (r);
+	 #pragma omp scan exclusive (r)
+	 r += something ();
+       }
+     shall have body with:
+       // Initialization for input phase, store the reduction initializer:
+       _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
+       _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
+       D.2042[_21] = 0;
+       // Actual input phase:
+       ...
+       r.0_5 = D.2042[_20];
+       _6 = _4 + r.0_5;
+       D.2042[_20] = _6;
+       // Initialization for scan phase:
+       _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
+       _26 = D.2043[_25];
+       D.2044[_25] = _26;
+       _27 = D.2042[_25];
+       _28 = _26 + _27;
+       D.2043[_25] = _28;
+       // Actual scan phase:
+       ...
+       r.1_8 = D.2044[_20];
+       ...  */
 
   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
     {
@@ -6553,26 +6583,52 @@  check_scan_store (stmt_vec_info stmt_inf
   if (TREE_CODE (rhs) != SSA_NAME)
     goto fail;
 
-  use_operand_p use_p;
-  imm_use_iterator iter;
   gimple *other_store_stmt = NULL;
-  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
+  tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
+  bool inscan_var_store
+    = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
+
+  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
     {
-      gimple *use_stmt = USE_STMT (use_p);
-      if (use_stmt == stmt || is_gimple_debug (use_stmt))
-	continue;
-      if (gimple_bb (use_stmt) != gimple_bb (stmt)
-	  || !gimple_store_p (use_stmt)
-	  || other_store_stmt)
-	goto fail;
-      other_store_stmt = use_stmt;
+      if (!inscan_var_store)
+	{
+	  use_operand_p use_p;
+	  imm_use_iterator iter;
+	  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
+	    {
+	      gimple *use_stmt = USE_STMT (use_p);
+	      if (use_stmt == stmt || is_gimple_debug (use_stmt))
+		continue;
+	      if (gimple_bb (use_stmt) != gimple_bb (stmt)
+		  || !is_gimple_assign (use_stmt)
+		  || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
+		  || other_store_stmt
+		  || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
+		goto fail;
+	      other_store_stmt = use_stmt;
+	    }
+	  if (other_store_stmt == NULL)
+	    goto fail;
+	  rhs = gimple_assign_lhs (other_store_stmt);
+	  if (!single_imm_use (rhs, &use_p, &other_store_stmt))
+	    goto fail;
+	}
     }
-  if (other_store_stmt == NULL)
-    goto fail;
-  stmt_vec_info other_store_stmt_info
-    = loop_vinfo->lookup_stmt (other_store_stmt);
-  if (other_store_stmt_info == NULL
-      || STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3)
+  else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
+    {
+      use_operand_p use_p;
+      imm_use_iterator iter;
+      FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
+	{
+	  gimple *use_stmt = USE_STMT (use_p);
+	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
+	    continue;
+	  if (other_store_stmt)
+	    goto fail;
+	  other_store_stmt = use_stmt;
+	}
+    }
+  else
     goto fail;
 
   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
@@ -6599,8 +6655,7 @@  check_scan_store (stmt_vec_info stmt_inf
 
   tree rhs1 = gimple_assign_rhs1 (def_stmt);
   tree rhs2 = gimple_assign_rhs2 (def_stmt);
-  if (TREE_CODE (rhs1) != SSA_NAME
-      || TREE_CODE (rhs2) != SSA_NAME)
+  if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
     goto fail;
 
   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
@@ -6615,22 +6670,83 @@  check_scan_store (stmt_vec_info stmt_inf
   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
   if (load1_stmt_info == NULL
       || load2_stmt_info == NULL
-      || STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3
-      || STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3)
+      || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
+	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
+      || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
+	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
     goto fail;
 
-  if (scan_operand_equal_p (gimple_assign_lhs (stmt),
+  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
+    {
+      dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
+      if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
+	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
+	goto fail;
+      tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
+      tree lrhs;
+      if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
+	lrhs = rhs1;
+      else
+	lrhs = rhs2;
+      use_operand_p use_p;
+      imm_use_iterator iter;
+      FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
+	{
+	  gimple *use_stmt = USE_STMT (use_p);
+	  if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
+	    continue;
+	  if (other_store_stmt)
+	    goto fail;
+	  other_store_stmt = use_stmt;
+	}
+    }
+
+  if (other_store_stmt == NULL)
+    goto fail;
+  if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
+      || !gimple_store_p (other_store_stmt))
+    goto fail;
+
+  stmt_vec_info other_store_stmt_info
+    = loop_vinfo->lookup_stmt (other_store_stmt);
+  if (other_store_stmt_info == NULL
+      || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
+	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
+    goto fail;
+
+  gimple *stmt1 = stmt;
+  gimple *stmt2 = other_store_stmt;
+  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
+    std::swap (stmt1, stmt2);
+  if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
 			    gimple_assign_rhs1 (load2_stmt)))
     {
       std::swap (rhs1, rhs2);
       std::swap (load1_stmt, load2_stmt);
       std::swap (load1_stmt_info, load2_stmt_info);
     }
-  if (!scan_operand_equal_p (gimple_assign_lhs (stmt),
-			     gimple_assign_rhs1 (load1_stmt))
-      || !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt),
+  if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
+			     gimple_assign_rhs1 (load1_stmt)))
+    goto fail;
+
+  tree var3 = NULL_TREE;
+  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
+      && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
 				gimple_assign_rhs1 (load2_stmt)))
     goto fail;
+  else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
+    {
+      dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
+      if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
+	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
+	goto fail;
+      var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
+      if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
+	  || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
+	  || lookup_attribute ("omp simd inscan exclusive",
+			       DECL_ATTRIBUTES (var3)))
+	goto fail;
+    }
 
   dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
   if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
@@ -6648,6 +6764,14 @@  check_scan_store (stmt_vec_info stmt_inf
   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
     std::swap (var1, var2);
 
+  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
+    {
+      if (!lookup_attribute ("omp simd inscan exclusive",
+			     DECL_ATTRIBUTES (var1)))
+	goto fail;
+      var1 = var3;
+    }
+
   if (loop_vinfo->scan_map == NULL)
     goto fail;
   tree *init = loop_vinfo->scan_map->get (var1);
@@ -6655,6 +6779,7 @@  check_scan_store (stmt_vec_info stmt_inf
     goto fail;
 
   /* The IL is as expected, now check if we can actually vectorize it.
+     Inclusive scan:
        _26 = D.2043[_25];
        _27 = D.2042[_25];
        _28 = _26 + _27;
@@ -6664,21 +6789,49 @@  check_scan_store (stmt_vec_info stmt_inf
      from the D.2042[_21] = 0; store):
        _30 = MEM <vector(8) int> [(int *)&D.2043];
        _31 = MEM <vector(8) int> [(int *)&D.2042];
-       _32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>;
+       _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
        _33 = _31 + _32;
        // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
-       _34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>;
+       _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
        _35 = _33 + _34;
        // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
        //         _31[1]+.._31[4], ... _31[4]+.._31[7] };
-       _36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>;
+       _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
        _37 = _35 + _36;
        // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
        //         _31[0]+.._31[4], ... _31[0]+.._31[7] };
        _38 = _30 + _37;
        _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
        MEM <vector(8) int> [(int *)&D.2043] = _39;
-       MEM <vector(8) int> [(int *)&D.2042] = _38;  */
+       MEM <vector(8) int> [(int *)&D.2042] = _38;
+     Exclusive scan:
+       _26 = D.2043[_25];
+       D.2044[_25] = _26;
+       _27 = D.2042[_25];
+       _28 = _26 + _27;
+       D.2043[_25] = _28;
+     should be vectorized as (where _40 is the vectorized rhs
+     from the D.2042[_21] = 0; store):
+       _30 = MEM <vector(8) int> [(int *)&D.2043];
+       _31 = MEM <vector(8) int> [(int *)&D.2042];
+       _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
+       _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
+       _34 = _32 + _33;
+       // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
+       //         _31[3]+_31[4], ... _31[5]+.._31[6] };
+       _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
+       _36 = _34 + _35;
+       // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
+       //         _31[1]+.._31[4], ... _31[3]+.._31[6] };
+       _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
+       _38 = _36 + _37;
+       // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
+       //         _31[0]+.._31[4], ... _31[0]+.._31[6] };
+       _39 = _30 + _38;
+       _50 = _31 + _39;
+       _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
+       MEM <vector(8) int> [(int *)&D.2044] = _39;
+       MEM <vector(8) int> [(int *)&D.2042] = _51;  */
   enum machine_mode vec_mode = TYPE_MODE (vectype);
   optab optab = optab_for_tree_code (code, vectype, optab_default);
   if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
@@ -6715,6 +6868,24 @@  vectorizable_scan_store (stmt_vec_info s
   tree rhs = gimple_assign_rhs1 (stmt);
   gcc_assert (TREE_CODE (rhs) == SSA_NAME);
 
+  tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
+  bool inscan_var_store
+    = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
+
+  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
+    {
+      use_operand_p use_p;
+      imm_use_iterator iter;
+      FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
+	{
+	  gimple *use_stmt = USE_STMT (use_p);
+	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
+	    continue;
+	  rhs = gimple_assign_lhs (use_stmt);
+	  break;
+	}
+    }
+
   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
   enum tree_code code = gimple_assign_rhs_code (def_stmt);
   if (code == POINTER_PLUS_EXPR)
@@ -6737,15 +6908,12 @@  vectorizable_scan_store (stmt_vec_info s
     {
       std::swap (rhs1, rhs2);
       std::swap (var1, var2);
+      std::swap (load1_dr_info, load2_dr_info);
     }
 
   tree *init = loop_vinfo->scan_map->get (var1);
   gcc_assert (init);
 
-  tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
-  bool inscan_var_store
-    = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
-
   unsigned HOST_WIDE_INT nunits;
   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
     gcc_unreachable ();
@@ -6789,29 +6957,50 @@  vectorizable_scan_store (stmt_vec_info s
   tree vec_oprnd1 = NULL_TREE;
   tree vec_oprnd2 = NULL_TREE;
   tree vec_oprnd3 = NULL_TREE;
-  tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr));
+  tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
   tree dataref_offset = build_int_cst (ref_type, 0);
   tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
+  tree ldataref_ptr = NULL_TREE;
   tree orig = NULL_TREE;
+  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
+    ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
   for (int j = 0; j < ncopies; j++)
     {
       stmt_vec_info new_stmt_info;
       if (j == 0)
 	{
 	  vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
-	  vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
+	  if (ldataref_ptr == NULL)
+	    vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
 	  vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
 	  orig = vec_oprnd3;
 	}
       else
 	{
 	  vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
-	  vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
+	  if (ldataref_ptr == NULL)
+	    vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
 	  vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
 	  if (!inscan_var_store)
 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
 	}
 
+      if (ldataref_ptr)
+	{
+	  vec_oprnd2 = make_ssa_name (vectype);
+	  tree data_ref = fold_build2 (MEM_REF, vectype,
+				       unshare_expr (ldataref_ptr),
+				       dataref_offset);
+	  vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
+	  gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
+	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+	  if (prev_stmt_info == NULL)
+	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+	  else
+	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+	  prev_stmt_info = new_stmt_info;
+	}
+
       tree v = vec_oprnd2;
       for (int i = 0; i < units_log2; ++i)
 	{
@@ -6848,6 +7037,17 @@  vectorizable_scan_store (stmt_vec_info s
 	      new_temp = new_temp2;
 	    }
 
+	  /* For exclusive scan, perform the perms[i] permutation once
+	     more.  */
+	  if (i == 0
+	      && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
+	      && v == vec_oprnd2)
+	    {
+	      v = new_temp;
+	      --i;
+	      continue;
+	    }
+
 	  tree new_temp2 = make_ssa_name (vectype);
 	  g = gimple_build_assign (new_temp2, code, v, new_temp);
 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
@@ -6863,16 +7063,30 @@  vectorizable_scan_store (stmt_vec_info s
       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
       prev_stmt_info = new_stmt_info;
 
+      tree last_perm_arg = new_temp;
+      /* For exclusive scan, new_temp computed above is the exclusive scan
+	 prefix sum.  Turn it into inclusive prefix sum for the broadcast
+	 of the last element into orig.  */
+      if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
+	{
+	  last_perm_arg = make_ssa_name (vectype);
+	  g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
+	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+	  STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+	  prev_stmt_info = new_stmt_info;
+	}
+
       orig = make_ssa_name (vectype);
-      g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp,
-			       perms[units_log2]);
+      g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
+			       last_perm_arg, perms[units_log2]);
       new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
       prev_stmt_info = new_stmt_info;
 
       if (!inscan_var_store)
 	{
-	  tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
+	  tree data_ref = fold_build2 (MEM_REF, vectype,
+				       unshare_expr (dataref_ptr),
 				       dataref_offset);
 	  vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
 	  g = gimple_build_assign (data_ref, new_temp);
@@ -6888,7 +7102,8 @@  vectorizable_scan_store (stmt_vec_info s
 	if (j != 0)
 	  dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
 
-	tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
+	tree data_ref = fold_build2 (MEM_REF, vectype,
+				     unshare_expr (dataref_ptr),
 				     dataref_offset);
 	vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
 	gimple *g = gimple_build_assign (data_ref, orig);
@@ -7325,7 +7540,7 @@  vectorizable_store (stmt_vec_info stmt_i
 	}
       return true;
     }
-  else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
+  else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
     return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
 
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
--- gcc/testsuite/gcc.dg/vect/vect-simd-12.c.jj	2019-06-20 15:08:50.260400440 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-12.c	2019-06-20 15:08:24.332805239 +0200
@@ -0,0 +1,122 @@ 
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#ifndef main
+#include "tree-vect.h"
+#endif
+
+int r, a[1024], b[1024];
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+  #pragma omp simd reduction (inscan, +:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r += a[i];
+    }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+  int s = 0;
+  #pragma omp simd reduction (inscan, +:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s += 2 * a[i];
+    }
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+  #pragma omp simd reduction (inscan, +:r) if (simd: 0)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r += a[i];
+    }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+  int s = 0;
+  #pragma omp simd reduction (inscan, +:s) simdlen (1)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s += 2 * a[i];
+    }
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+#ifndef main
+  check_vect ();
+#endif
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i] = i;
+      b[i] = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = 25;
+      s += i;
+    }
+  if (bar () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -1;
+      s += 2 * i;
+    }
+  r = 0;
+  baz (a, b);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -25;
+      s += i;
+    }
+  if (qux () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      s += 2 * i;
+    }
+  return 0;
+}
--- gcc/testsuite/gcc.dg/vect/vect-simd-13.c.jj	2019-06-20 15:47:23.580359715 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-13.c	2019-06-20 15:13:23.500134387 +0200
@@ -0,0 +1,124 @@ 
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#ifndef main
+#include "tree-vect.h"
+#endif
+
+int r, a[1024], b[1024];
+
+#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+  #pragma omp simd reduction (inscan, foo:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r += a[i];
+    }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+  int s = 0;
+  #pragma omp simd reduction (inscan, foo:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s += 2 * a[i];
+    }
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+  #pragma omp simd reduction (inscan, foo:r) if (simd: 0)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r += a[i];
+    }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+  int s = 0;
+  #pragma omp simd reduction (inscan, foo:s) simdlen (1)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s += 2 * a[i];
+    }
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+#ifndef main
+  check_vect ();
+#endif
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i] = i;
+      b[i] = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = 25;
+      s += i;
+    }
+  if (bar () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -1;
+      s += 2 * i;
+    }
+  r = 0;
+  baz (a, b);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -25;
+      s += i;
+    }
+  if (qux () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      s += 2 * i;
+    }
+  return 0;
+}
--- gcc/testsuite/gcc.dg/vect/vect-simd-14.c.jj	2019-06-20 15:48:30.536321539 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-14.c	2019-06-20 15:54:39.291617792 +0200
@@ -0,0 +1,94 @@ 
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#ifndef main
+#include "tree-vect.h"
+#endif
+
+float r = 1.0f, a[1024], b[1024];
+
+__attribute__((noipa)) void
+foo (float *a, float *b)
+{
+  #pragma omp simd reduction (inscan, *:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r *= a[i];
+    }
+}
+
+__attribute__((noipa)) float
+bar (void)
+{
+  float s = -__builtin_inff ();
+  #pragma omp simd reduction (inscan, max:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s = s > a[i] ? s : a[i];
+    }
+  return s;
+}
+
+int
+main ()
+{
+  float s = 1.0f;
+#ifndef main
+  check_vect ();
+#endif
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (i < 80)
+	a[i] = (i & 1) ? 0.25f : 0.5f;
+      else if (i < 200)
+	a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
+      else if (i < 280)
+	a[i] = (i & 1) ? 0.25f : 0.5f;
+      else if (i < 380)
+	a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
+      else
+	switch (i % 6)
+	  {
+	  case 0: a[i] = 0.25f; break;
+	  case 1: a[i] = 2.0f; break;
+	  case 2: a[i] = -1.0f; break;
+	  case 3: a[i] = -4.0f; break;
+	  case 4: a[i] = 0.5f; break;
+	  case 5: a[i] = 1.0f; break;
+	  default: a[i] = 0.0f; break;
+	  }
+      b[i] = -19.0f;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b);
+  if (r * 16384.0f != 0.125f)
+    abort ();
+  float m = -175.25f;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -231.75f;
+      s *= a[i];
+      a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
+      m += 0.75f;
+    }
+  if (bar () != 592.0f)
+    abort ();
+  s = -__builtin_inff ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      if (s < a[i])
+	s = a[i];
+    }
+  return 0;
+}
--- gcc/testsuite/gcc.dg/vect/vect-simd-15.c.jj	2019-06-20 15:50:34.483399705 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-15.c	2019-06-20 15:52:09.976919050 +0200
@@ -0,0 +1,186 @@ 
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#ifndef main
+#include "tree-vect.h"
+#endif
+
+int r, a[1024], b[1024];
+unsigned short r2, b2[1024];
+unsigned char r3, b3[1024];
+
+__attribute__((noipa)) void
+foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
+{
+  #pragma omp simd reduction (inscan, +:r, r2, r3)
+  for (int i = 0; i < 1024; i++)
+    {
+      {
+	b[i] = r;
+	b2[i] = r2;
+	b3[i] = r3;
+      }
+      #pragma omp scan exclusive(r, r2, r3)
+      { r += a[i]; r2 += a[i]; r3 += a[i]; }
+    }
+}
+
+__attribute__((noipa)) int
+bar (unsigned short *s2p, unsigned char *s3p)
+{
+  int s = 0;
+  unsigned short s2 = 0;
+  unsigned char s3 = 0;
+  #pragma omp simd reduction (inscan, +:s, s2, s3)
+  for (int i = 0; i < 1024; i++)
+    {
+      { b[i] = s; b2[i] = s2; b3[i] = s3; }
+      #pragma omp scan exclusive(s, s2, s3)
+      {
+	s += 2 * a[i];
+	s2 += 2 * a[i];
+	s3 += 2 * a[i];
+      }
+    }
+  *s2p = s2;
+  *s3p = s3;
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
+{
+  #pragma omp simd reduction (inscan, +:r, r2, r3) if (simd: 0)
+  for (int i = 0; i < 1024; i++)
+    {
+      {
+	b[i] = r;
+	b2[i] = r2;
+	b3[i] = r3;
+      }
+      #pragma omp scan exclusive(r, r2, r3)
+      {
+	r += a[i];
+	r2 += a[i];
+	r3 += a[i];
+      }
+    }
+}
+
+__attribute__((noipa)) int
+qux (unsigned short *s2p, unsigned char *s3p)
+{
+  int s = 0;
+  unsigned short s2 = 0;
+  unsigned char s3 = 0;
+  #pragma omp simd reduction (inscan, +:s, s2, s3) simdlen (1)
+  for (int i = 0; i < 1024; i++)
+    {
+      { b[i] = s; b2[i] = s2; b3[i] = s3; }
+      #pragma omp scan exclusive(s, s2, s3)
+      { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
+    }
+  *s2p = s2;
+  *s3p = s3;
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+  unsigned short s2;
+  unsigned char s3;
+#ifndef main
+  check_vect ();
+#endif
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i] = i;
+      b[i] = -1;
+      b2[i] = -1;
+      b3[i] = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b, b2, b3);
+  if (r != 1024 * 1023 / 2
+      || r2 != (unsigned short) r
+      || r3 != (unsigned char) r)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s
+	  || b2[i] != (unsigned short) s
+	  || b3[i] != (unsigned char) s)
+	abort ();
+      else
+	{
+	  b[i] = 25;
+	  b2[i] = 24;
+	  b3[i] = 26;
+	}
+      s += i;
+    }
+  if (bar (&s2, &s3) != 1024 * 1023)
+    abort ();
+  if (s2 != (unsigned short) (1024 * 1023)
+      || s3 != (unsigned char) (1024 * 1023))
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s
+	  || b2[i] != (unsigned short) s
+	  || b3[i] != (unsigned char) s)
+	abort ();
+      else
+	{
+	  b[i] = -1;
+	  b2[i] = -1;
+	  b3[i] = -1;
+	}
+      s += 2 * i;
+    }
+  r = 0;
+  r2 = 0;
+  r3 = 0;
+  baz (a, b, b2, b3);
+  if (r != 1024 * 1023 / 2
+      || r2 != (unsigned short) r
+      || r3 != (unsigned char) r)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s
+	  || b2[i] != (unsigned short) s
+	  || b3[i] != (unsigned char) s)
+	abort ();
+      else
+	{
+	  b[i] = 25;
+	  b2[i] = 24;
+	  b3[i] = 26;
+	}
+      s += i;
+    }
+  s2 = 0;
+  s3 = 0;
+  if (qux (&s2, &s3) != 1024 * 1023)
+    abort ();
+  if (s2 != (unsigned short) (1024 * 1023)
+      || s3 != (unsigned char) (1024 * 1023))
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s
+	  || b2[i] != (unsigned short) s
+	  || b3[i] != (unsigned char) s)
+	abort ();
+      s += 2 * i;
+    }
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c.jj	2019-06-20 15:58:35.276983324 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c	2019-06-20 15:58:35.274983355 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "sse2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-12.c"
+
+static void
+sse2_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c.jj	2019-06-20 15:58:35.283983216 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c	2019-06-20 15:58:35.281983247 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "sse2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-13.c"
+
+static void
+sse2_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c.jj	2019-06-20 15:58:35.288983139 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c	2019-06-20 15:58:35.287983154 +0200
@@ -0,0 +1,15 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "sse2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-14.c"
+
+static void
+sse2_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c.jj	2019-06-20 15:58:35.293983061 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c	2019-06-20 15:58:35.292983077 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "sse2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-15.c"
+
+static void
+sse2_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c.jj	2019-06-20 15:58:35.299982969 +0200
+++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c	2019-06-20 15:58:35.297982999 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-12.c"
+
+static void
+avx2_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c.jj	2019-06-20 15:58:35.305982876 +0200
+++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c	2019-06-20 15:58:35.303982907 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-13.c"
+
+static void
+avx2_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c.jj	2019-06-20 15:58:35.310982799 +0200
+++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c	2019-06-20 15:58:35.309982815 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-14.c"
+
+static void
+avx2_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c.jj	2019-06-20 15:58:35.316982707 +0200
+++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c	2019-06-20 15:58:35.314982738 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-15.c"
+
+static void
+avx2_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c.jj	2019-06-20 15:58:35.323982599 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c	2019-06-20 15:58:35.321982630 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx512f-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-12.c"
+
+static void
+avx512f_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c.jj	2019-06-20 15:58:35.328982522 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c	2019-06-20 15:58:35.326982553 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx512f-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-13.c"
+
+static void
+avx512f_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c.jj	2019-06-20 15:58:35.333982445 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c	2019-06-20 15:58:35.332982461 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx512f-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-14.c"
+
+static void
+avx512f_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c.jj	2019-06-20 15:58:35.347982230 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c	2019-06-20 15:58:35.346982245 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512bw -mprefer-vector-width=512 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx512bw-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-15.c"
+
+static void
+avx512bw_test (void)
+{
+  do_main ();
+}
--- gcc/testsuite/g++.dg/vect/simd-6.cc.jj	2019-06-20 16:00:34.800142524 +0200
+++ gcc/testsuite/g++.dg/vect/simd-6.cc	2019-06-20 16:07:41.722559826 +0200
@@ -0,0 +1,161 @@ 
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+template <typename T>
+struct S {
+  inline S ();
+  inline ~S ();
+  inline S (const S &);
+  inline S & operator= (const S &);
+  T s;
+};
+
+template <typename T>
+S<T>::S () : s (0)
+{
+}
+
+template <typename T>
+S<T>::~S ()
+{
+}
+
+template <typename T>
+S<T>::S (const S &x)
+{
+  s = x.s;
+}
+
+template <typename T>
+S<T> &
+S<T>::operator= (const S &x)
+{
+  s = x.s;
+  return *this;
+}
+
+template <typename T>
+static inline void
+ini (S<T> &x)
+{
+  x.s = 0;
+}
+
+S<int> r, a[1024], b[1024];
+
+#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s)
+#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) initializer (ini (omp_priv))
+
+template <typename T>
+__attribute__((noipa)) void
+foo (S<T> *a, S<T> *b)
+{
+  #pragma omp simd reduction (inscan, +:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r.s += a[i].s;
+    }
+}
+
+template <typename T>
+__attribute__((noipa)) S<T>
+bar (void)
+{
+  S<T> s;
+  #pragma omp simd reduction (inscan, plus:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s.s += 2 * a[i].s;
+    }
+  return S<T> (s);
+}
+
+__attribute__((noipa)) void
+baz (S<int> *a, S<int> *b)
+{
+  #pragma omp simd reduction (inscan, +:r) simdlen(1)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r.s += a[i].s;
+    }
+}
+
+__attribute__((noipa)) S<int>
+qux (void)
+{
+  S<int> s;
+  #pragma omp simd if (0) reduction (inscan, plus:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s.s += 2 * a[i].s;
+    }
+  return S<int> (s);
+}
+
+int
+main ()
+{
+  S<int> s;
+  check_vect ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i].s = i;
+      b[i].s = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b);
+  if (r.s != 1024 * 1023 / 2)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i].s != s.s)
+	abort ();
+      else
+	b[i].s = 25;
+      s.s += i;
+    }
+  if (bar<int> ().s != 1024 * 1023)
+    abort ();
+  s.s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i].s != s.s)
+	abort ();
+      s.s += 2 * i;
+    }
+  r.s = 0;
+  baz (a, b);
+  if (r.s != 1024 * 1023 / 2)
+    abort ();
+  s.s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i].s != s.s)
+	abort ();
+      else
+	b[i].s = 25;
+      s.s += i;
+    }
+  if (qux ().s != 1024 * 1023)
+    abort ();
+  s.s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i].s != s.s)
+	abort ();
+      s.s += 2 * i;
+    }
+  return 0;
+}
--- gcc/testsuite/g++.dg/vect/simd-7.cc.jj	2019-06-20 16:00:51.095891542 +0200
+++ gcc/testsuite/g++.dg/vect/simd-7.cc	2019-06-20 16:12:50.222747875 +0200
@@ -0,0 +1,124 @@ 
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+int r, a[1024], b[1024], q;
+
+template <typename T, typename U>
+__attribute__((noipa)) void
+foo (T a, T b, U r)
+{
+  #pragma omp simd reduction (inscan, +:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r += a[i];
+    }
+}
+
+template <typename T>
+__attribute__((noipa)) T
+bar (void)
+{
+  T &s = q;
+  q = 0;
+  #pragma omp simd reduction (inscan, +:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s += 2 * a[i];
+    }
+  return s;
+}
+
+template <typename T>
+__attribute__((noipa)) void
+baz (T *a, T *b, T &r)
+{
+  #pragma omp simd reduction (inscan, +:r) if (simd: 0)
+  for (T i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r += a[i];
+    }
+}
+
+template <typename T>
+__attribute__((noipa)) int
+qux (void)
+{
+  T s = q;
+  q = 0;
+  #pragma omp simd reduction (inscan, +:s) simdlen (1)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s += 2 * a[i];
+    }
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+  check_vect ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i] = i;
+      b[i] = -1;
+      asm ("" : "+g" (i));
+    }
+  foo<int *, int &> (a, b, r);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = 25;
+      s += i;
+    }
+  if (bar<int> () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -1;
+      s += 2 * i;
+    }
+  r = 0;
+  baz<int> (a, b, r);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -25;
+      s += i;
+    }
+  if (qux<int &> () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      s += 2 * i;
+    }
+  return 0;
+}
--- gcc/testsuite/g++.dg/vect/simd-8.cc.jj	2019-06-20 16:00:54.154844430 +0200
+++ gcc/testsuite/g++.dg/vect/simd-8.cc	2019-06-20 16:15:37.994133891 +0200
@@ -0,0 +1,122 @@ 
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } }
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+int r, a[1024], b[1024], q;
+
+#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
+
+__attribute__((noipa)) void
+foo (int *a, int *b, int &r)
+{
+  #pragma omp simd reduction (inscan, foo:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r += a[i];
+    }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+  int &s = q;
+  q = 0;
+  #pragma omp simd reduction (inscan, foo:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s += 2 * a[i];
+    }
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, int &r)
+{
+  #pragma omp simd reduction (inscan, foo:r) if (simd: 0)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r += a[i];
+    }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+  int &s = q;
+  q = 0;
+  #pragma omp simd reduction (inscan, foo:s) simdlen (1)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s += 2 * a[i];
+    }
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+  check_vect ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i] = i;
+      b[i] = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b, r);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = 25;
+      s += i;
+    }
+  if (bar () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -1;
+      s += 2 * i;
+    }
+  r = 0;
+  baz (a, b, r);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      else
+	b[i] = -25;
+      s += i;
+    }
+  if (qux () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+	abort ();
+      s += 2 * i;
+    }
+  return 0;
+}
--- gcc/testsuite/g++.dg/vect/simd-9.cc.jj	2019-06-20 16:00:57.197797566 +0200
+++ gcc/testsuite/g++.dg/vect/simd-9.cc	2019-06-20 16:17:27.484427949 +0200
@@ -0,0 +1,153 @@ 
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+struct S {
+  inline S ();
+  inline ~S ();
+  inline S (const S &);
+  inline S & operator= (const S &);
+  int s;
+};
+
+S::S () : s (0)
+{
+}
+
+S::~S ()
+{
+}
+
+S::S (const S &x)
+{
+  s = x.s;
+}
+
+S &
+S::operator= (const S &x)
+{
+  s = x.s;
+  return *this;
+}
+
+static inline void
+ini (S &x)
+{
+  x.s = 0;
+}
+
+S r, a[1024], b[1024];
+
+#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
+#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
+
+__attribute__((noipa)) void
+foo (S *a, S *b, S &r)
+{
+  #pragma omp simd reduction (inscan, +:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r.s += a[i].s;
+    }
+}
+
+__attribute__((noipa)) S
+bar (void)
+{
+  S s;
+  #pragma omp simd reduction (inscan, plus:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s.s += 2 * a[i].s;
+    }
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (S *a, S *b, S &r)
+{
+  #pragma omp simd reduction (inscan, +:r) simdlen(1)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      r.s += a[i].s;
+    }
+}
+
+__attribute__((noipa)) S
+qux (void)
+{
+  S s;
+  #pragma omp simd if (0) reduction (inscan, plus:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = s;
+      #pragma omp scan exclusive(s)
+      s.s += 2 * a[i].s;
+    }
+  return s;
+}
+
+int
+main ()
+{
+  S s;
+  check_vect ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i].s = i;
+      b[i].s = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b, r);
+  if (r.s != 1024 * 1023 / 2)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i].s != s.s)
+	abort ();
+      else
+	b[i].s = 25;
+      s.s += i;
+    }
+  if (bar ().s != 1024 * 1023)
+    abort ();
+  s.s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i].s != s.s)
+	abort ();
+      s.s += 2 * i;
+    }
+  r.s = 0;
+  baz (a, b, r);
+  if (r.s != 1024 * 1023 / 2)
+    abort ();
+  s.s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i].s != s.s)
+	abort ();
+      else
+	b[i].s = 25;
+      s.s += i;
+    }
+  if (qux ().s != 1024 * 1023)
+    abort ();
+  s.s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i].s != s.s)
+	abort ();
+      s.s += 2 * i;
+    }
+  return 0;
+}
--- gcc/testsuite/c-c++-common/gomp/scan-2.c.jj	2019-06-10 14:18:17.461525669 +0200
+++ gcc/testsuite/c-c++-common/gomp/scan-2.c	2019-06-20 23:54:03.615422149 +0200
@@ -8,7 +8,7 @@  f1 (int *c, int *d)
   for (i = 0; i < 64; i++)
     {
       d[i] = a;
-      #pragma omp scan exclusive (a)		/* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */
+      #pragma omp scan exclusive (a)
       a += c[i];
     }
 }