new file mode 100644
@@ -0,0 +1,166 @@
+/* { dg-do run } */
+/* { dg-options "-msve-vector-bits=256 -std=gnu99 -fopenmp -O2 -fdump-tree-ompexp" } */
+
+#include <arm_sve.h>
+
+#pragma omp declare reduction (+:svint32_t: omp_out = svadd_s32_z (svptrue_b32(), omp_in, omp_out))
+
+int parallel_reduction ()
+{
+ int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
+ int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
+ svint32_t va = svld1_s32 (svptrue_b32 (), b);
+ int i = 0;
+ int64_t res;
+
+ #pragma omp parallel reduction (+:va, i)
+ {
+ va = svld1_s32 (svptrue_b32 (), a);
+ i++;
+ }
+
+ res = svaddv_s32 (svptrue_b32 (), va);
+
+ if (res != i * 8)
+ __builtin_abort ();
+
+ return 0;
+}
+
+int for_reduction ()
+{
+ int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
+ int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
+ svint32_t va = svld1_s32 (svptrue_b32 (), b);
+ int i = 0;
+ int j;
+ int64_t res;
+
+ #pragma omp parallel for reduction (+:va, i)
+ for (j = 0; j < 8; j++)
+ {
+ va = svld1_s32 (svptrue_b32 (), a);
+ i++;
+ }
+
+ res = svaddv_s32 (svptrue_b32 (), va);
+
+ if (res != i * 8)
+ __builtin_abort ();
+
+ return 0;
+}
+
+int simd_reduction ()
+{
+ int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
+ int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
+ svint32_t va = svld1_s32 (svptrue_b32 (), b);
+ int i = 0;
+ int j;
+ int64_t res;
+
+ /* The list includes va that is already vectorized, so the only impact here
+ is on the scalar variable i. OMP spec says only scalar variables are
+ allowed in the list. Should non-scalars be diagnosed? */
+ #pragma omp simd reduction (+:va, i)
+ for (j = 0; j < 8; j++)
+ {
+ va = svld1_s32 (svptrue_b32 (), a);
+ i++;
+ }
+
+ res = svaddv_s32 (svptrue_b32 (), va);
+
+ if (res != i)
+ __builtin_abort ();
+
+ return 0;
+}
+
+int taskloop_reduction ()
+{
+ int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
+ int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
+ svint32_t va = svld1_s32 (svptrue_b32 (), b);
+ int i = 0;
+ int j;
+ int64_t res;
+
+ #pragma omp taskloop reduction (+:va, i)
+ for (j = 0; j < 8; j++)
+ {
+ svint32_t tva = svld1_s32 (svptrue_b32 (), a);
+ #pragma omp in_reduction (+: va)
+ va = svadd_s32_z (svptrue_b32 (), tva, va);
+ i++;
+ }
+
+ res = svaddv_s32 (svptrue_b32 (), va);
+
+ if (res != i * 8)
+ __builtin_abort ();
+
+ return 0;
+}
+
+int task_reduction ()
+{
+ int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
+ int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
+ svint32_t va = svld1_s32 (svptrue_b32 (), b);
+ int i = 0;
+ int j;
+ int64_t res;
+
+ #pragma omp parallel reduction (task,+:va)
+ {
+ va = svadd_s32_z (svptrue_b32 (), svld1_s32 (svptrue_b32 (), a), va);
+ i++;
+ }
+
+ res = svaddv_s32 (svptrue_b32 (), va);
+
+ if (res != i * 8)
+ __builtin_abort ();
+
+ return 0;
+}
+
+int inscan_reduction_incl ()
+{
+ int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
+ int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
+ svint32_t va = svld1_s32 (svptrue_b32 (), b);
+ int j;
+ int i = 0;
+ int64_t res = 0;
+
+ #pragma omp parallel
+ #pragma omp for reduction (inscan,+:va, i)
+ for (j = 0; j < 8; j++)
+ {
+ va = svld1_s32 (svptrue_b32 (), a);
+ i++;
+ #pragma omp scan inclusive (va, i)
+ res += svaddv_s32 (svptrue_b32 (), va);
+ }
+
+ if (res != i * 8)
+ __builtin_abort ();
+
+ return 0;
+}
+
+int
+main()
+{
+ parallel_reduction ();
+ task_reduction ();
+ inscan_reduction_incl ();
+ taskloop_reduction ();
+ simd_reduction ();
+ for_reduction ();
+
+ return 0;
+}