@@ -16045,6 +16045,7 @@ emit_reduc_half (rtx dest, rtx src, int i)
break;
case E_V16QImode:
case E_V8HImode:
+ case E_V8HFmode:
case E_V4SImode:
case E_V2DImode:
d = gen_reg_rtx (V1TImode);
@@ -16066,6 +16067,7 @@ emit_reduc_half (rtx dest, rtx src, int i)
break;
case E_V32QImode:
case E_V16HImode:
+ case E_V16HFmode:
case E_V8SImode:
case E_V4DImode:
if (i == 256)
@@ -16085,6 +16087,7 @@ emit_reduc_half (rtx dest, rtx src, int i)
break;
case E_V64QImode:
case E_V32HImode:
+ case E_V32HFmode:
if (i < 64)
{
d = gen_reg_rtx (V4TImode);
@@ -3157,7 +3157,8 @@ (define_insn "sse3_h<insn>v4sf3"
(set_attr "mode" "V4SF")])
(define_mode_iterator REDUC_SSE_PLUS_MODE
- [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
+ [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
+ (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")])
(define_expand "reduc_plus_scal_<mode>"
[(plus:REDUC_SSE_PLUS_MODE
@@ -3194,7 +3195,9 @@ (define_expand "reduc_plus_scal_v16qi"
(define_mode_iterator REDUC_PLUS_MODE
[(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
+ (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
(define_expand "reduc_plus_scal_<mode>"
@@ -3214,7 +3217,8 @@ (define_expand "reduc_plus_scal_<mode>"
;; Modes handled by reduc_sm{in,ax}* patterns.
(define_mode_iterator REDUC_SSE_SMINMAX_MODE
- [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
+ [(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+ (V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
(V4SI "TARGET_SSE2") (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
(V2DI "TARGET_SSE4_2")])
@@ -3233,9 +3237,11 @@ (define_expand "reduc_<code>_scal_<mode>"
(define_mode_iterator REDUC_SMINMAX_MODE
[(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
+ (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
(V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
(V64QI "TARGET_AVX512BW")
+ (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
(V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
(V8DF "TARGET_AVX512F")])
new file mode 100644
@@ -0,0 +1,96 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mprefer-vector-width=512 -fdump-tree-optimized" } */
+
+/* { dg-final { scan-tree-dump-times "\.REDUC_PLUS" 3 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_MIN" 3 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_MAX" 3 "optimized" } } */
+
+_Float16
+__attribute__((noipa, target("avx512fp16,avx512vl"), optimize("Ofast")))
+reduc_add_128 (_Float16* p)
+{
+ _Float16 sum = 0;
+ for (int i = 0; i != 8; i++)
+ sum += p[i];
+ return sum;
+}
+
+_Float16
+__attribute__((noipa, target("avx512fp16,avx512vl"), optimize("Ofast")))
+reduc_add_256 (_Float16* p)
+{
+ _Float16 sum = 0;
+ for (int i = 0; i != 16; i++)
+ sum += p[i];
+ return sum;
+}
+
+_Float16
+__attribute__((noipa, target("avx512fp16,avx512vl"), optimize("Ofast")))
+reduc_add_512 (_Float16* p)
+{
+ _Float16 sum = 0;
+ for (int i = 0; i != 32; i++)
+ sum += p[i];
+ return sum;
+}
+
+_Float16
+__attribute__((noipa, target("avx512fp16,avx512vl"), optimize("Ofast")))
+reduc_min_128 (_Float16* p)
+{
+ _Float16 sum = p[0];
+ for (int i = 0; i != 8; i++)
+ sum = sum > p[i] ? p[i] : sum;
+ return sum;
+}
+
+_Float16
+__attribute__((noipa, target("avx512fp16,avx512vl"), optimize("Ofast")))
+reduc_min_256 (_Float16* p)
+{
+ _Float16 sum = p[0];
+ for (int i = 0; i != 16; i++)
+ sum = sum > p[i] ? p[i] : sum;
+ return sum;
+}
+
+_Float16
+__attribute__((noipa, target("avx512fp16,avx512vl"), optimize("Ofast")))
+reduc_min_512 (_Float16* p)
+{
+ _Float16 sum = p[0];
+ for (int i = 0; i != 32; i++)
+ sum = sum > p[i] ? p[i] : sum;
+ return sum;
+}
+
+_Float16
+__attribute__((noipa, target("avx512fp16,avx512vl"), optimize("Ofast")))
+reduc_max_128 (_Float16* p)
+{
+ _Float16 sum = p[0];
+ for (int i = 0; i != 8; i++)
+ sum = sum < p[i] ? p[i] : sum;
+ return sum;
+}
+
+_Float16
+__attribute__((noipa, target("avx512fp16,avx512vl"), optimize("Ofast")))
+reduc_max_256 (_Float16* p)
+{
+ _Float16 sum = p[0];
+ for (int i = 0; i != 16; i++)
+ sum = sum < p[i] ? p[i] : sum;
+ return sum;
+}
+
+_Float16
+__attribute__((noipa, target("avx512fp16,avx512vl"), optimize("Ofast")))
+reduc_max_512 (_Float16* p)
+{
+ _Float16 sum = p[0];
+ for (int i = 0; i != 32; i++)
+ sum = sum < p[i] ? p[i] : sum;
+ return sum;
+}
new file mode 100644
@@ -0,0 +1,91 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512fp16" } */
+/* { dg-require-effective-target avx512fp16 } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512FP16
+#define AVX512VL
+
+#include "avx512f-helper.h"
+
+#include "avx512fp16-reduce-op-2.c"
+
+void
+test_256 (void)
+{
+ _Float16 a[32];
+ int sign = 1;
+ _Float16 res1 = 0, exp1;
+ _Float16 res2 = 0, exp2;
+ _Float16 res3 = 0, exp3;
+
+ for (int i = 0; i != 32; i++)
+ {
+ a[i] = sign * (4.0 * i);
+ sign *= -1;
+ if (i < 8)
+ res1 += a[i];
+ if (i < 16)
+ res2 += a[i];
+ res3 += a[i];
+ }
+
+ exp1 = reduc_add_128 (a);
+ exp2 = reduc_add_256 (a);
+ exp3 = reduc_add_512 (a);
+ if (exp1 != res1 || exp2 != res2 || exp3 != res3)
+ abort();
+}
+
+#define MAX(A, B) ((A) > (B) ? (A) : (B))
+#define MIN(A, B) ((A) < (B) ? (A) : (B))
+
+void
+test_128 ()
+{
+ _Float16 a[32];
+ int sign = 1;
+ _Float16 min_res1, min_exp1, max_res1, max_exp1;
+ _Float16 min_res2, min_exp2, max_res2, max_exp2;
+ _Float16 min_res3, min_exp3, max_res3, max_exp3;
+
+ for (int i = 0; i != 32; i++)
+ {
+ a[i] = sign * (4.9 * i * i - 8.3 * i + 14.8);
+ sign *= -1;
+ }
+
+ min_res1 = max_res1 = a[0];
+ for (int i = 0 ; i != 8; i++)
+ {
+ min_res1 = MIN (min_res1, a[i]);
+ max_res1 = MAX (max_res1, a[i]);
+ }
+
+ min_res2 = min_res1;
+ max_res2 = max_res1;
+ for (int i = 8 ; i != 16; i++)
+ {
+ min_res2 = MIN (min_res2, a[i]);
+ max_res2 = MAX (max_res2, a[i]);
+ }
+
+ min_res3 = min_res2;
+ max_res3 = max_res2;
+ for (int i = 16 ; i != 32; i++)
+ {
+ min_res3 = MIN (min_res3, a[i]);
+ max_res3 = MAX (max_res3, a[i]);
+ }
+
+ min_exp1 = reduc_min_128 (a);
+ min_exp2 = reduc_min_256 (a);
+ min_exp3 = reduc_min_512 (a);
+ max_exp1 = reduc_max_128 (a);
+ max_exp2 = reduc_max_256 (a);
+ max_exp3 = reduc_max_512 (a);
+
+ if (min_exp1 != min_res1 || min_exp2 != min_res2 || min_exp3 != min_res3
+ || max_exp1 != max_res1 || max_exp2 != max_res2 || max_exp3 != max_res3)
+ abort();
+}