Patchwork [i386,AVX512,32/n] Add reduce,range,fpclass.

login
register
mail settings
Submitter Kirill Yukhin
Date Aug. 29, 2014, 1:55 p.m.
Message ID <20140829135525.GD18938@msticlxl57.ims.intel.com>
Download mbox | patch
Permalink /patch/384249/
State New
Headers show

Comments

Kirill Yukhin - Aug. 29, 2014, 1:55 p.m.
Hello,
Patch in the bottom adds support for reduce,range,fpclass.

Bootstrapped.
AVX-512* tests on top of patch-set all pass
under simulator.

Is it ok for trunk?

gcc/
	* config/i386/i386.c
	(ix86_expand_args_builtin): Handle avx512dq_rangepv8df_mask_round,
	avx512dq_rangepv16sf_mask_round, avx512dq_rangepv4df_mask,
	avx512dq_rangepv8sf_mask, avx512dq_rangepv2df_mask,
	avx512dq_rangepv4sf_mask.
	* config/i386/sse.md
	(define_c_enum "unspec"): Add UNSPEC_REDUCE, UNSPEC_FPCLASS,
	UNSPEC_FPCLASS_SCALAR, UNSPEC_RANGE, UNSPEC_RANGE_SCALAR.
	(define_insn "<mask_codefor>reducep<mode><mask_name>"): New.
	(define_insn "reduces<mode>"): Ditto.
	(define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"):
	Ditto.
	(define_insn "avx512dq_ranges<mode><round_saeonly_name>"): Ditto.
	(define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"): Ditto.
	(define_insn "avx512dq_vmfpclass<mode>"): Ditto.

--
Thanks, K
Uros Bizjak - Aug. 30, 2014, 8:21 a.m.
On Fri, Aug 29, 2014 at 3:55 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:

> Patch in the bottom adds support for reduce,range,fpclass.
>
> Bootstrapped.
> AVX-512* tests on top of patch-set all pass
> under simulator.
>
> Is it ok for trunk?
>
> gcc/
>         * config/i386/i386.c
>         (ix86_expand_args_builtin): Handle avx512dq_rangepv8df_mask_round,
>         avx512dq_rangepv16sf_mask_round, avx512dq_rangepv4df_mask,
>         avx512dq_rangepv8sf_mask, avx512dq_rangepv2df_mask,
>         avx512dq_rangepv4sf_mask.
>         * config/i386/sse.md
>         (define_c_enum "unspec"): Add UNSPEC_REDUCE, UNSPEC_FPCLASS,
>         UNSPEC_FPCLASS_SCALAR, UNSPEC_RANGE, UNSPEC_RANGE_SCALAR.
>         (define_insn "<mask_codefor>reducep<mode><mask_name>"): New.
>         (define_insn "reduces<mode>"): Ditto.
>         (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"):
>         Ditto.
>         (define_insn "avx512dq_ranges<mode><round_saeonly_name>"): Ditto.
>         (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"): Ditto.
>         (define_insn "avx512dq_vmfpclass<mode>"): Ditto.
>
> --
> Thanks, K
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index ff37ffe..15cdb5e 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -34114,6 +34114,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
>               case CODE_FOR_avx512vl_getmantv4df_mask:
>               case CODE_FOR_avx512vl_getmantv4sf_mask:
>               case CODE_FOR_avx512vl_getmantv2df_mask:
> +             case CODE_FOR_avx512dq_rangepv8df_mask_round:
> +             case CODE_FOR_avx512dq_rangepv16sf_mask_round:
> +             case CODE_FOR_avx512dq_rangepv4df_mask:
> +             case CODE_FOR_avx512dq_rangepv8sf_mask:
> +             case CODE_FOR_avx512dq_rangepv2df_mask:
> +             case CODE_FOR_avx512dq_rangepv4sf_mask:
>                 error ("the last argument must be a 4-bit immediate");
>                 return const0_rtx;
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index d85f9a4..c505526 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -128,6 +128,13 @@
>    UNSPEC_SHA256MSG1
>    UNSPEC_SHA256MSG2
>    UNSPEC_SHA256RNDS2
> +
> +  ;; For AVX512DQ support
> +  UNSPEC_REDUCE
> +  UNSPEC_FPCLASS
> +  UNSPEC_FPCLASS_SCALAR
> +  UNSPEC_RANGE
> +  UNSPEC_RANGE_SCALAR
>  ])

It looks to me that _SCALAR unspecs are redundant, and should be
possible to use UNSPEC_REDUCE for all patterns without unwanted
matching.

Uros.

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ff37ffe..15cdb5e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -34114,6 +34114,12 @@  ix86_expand_args_builtin (const struct builtin_description *d,
 	      case CODE_FOR_avx512vl_getmantv4df_mask:
 	      case CODE_FOR_avx512vl_getmantv4sf_mask:
 	      case CODE_FOR_avx512vl_getmantv2df_mask:
+	      case CODE_FOR_avx512dq_rangepv8df_mask_round:
+	      case CODE_FOR_avx512dq_rangepv16sf_mask_round:
+	      case CODE_FOR_avx512dq_rangepv4df_mask:
+	      case CODE_FOR_avx512dq_rangepv8sf_mask:
+	      case CODE_FOR_avx512dq_rangepv2df_mask:
+	      case CODE_FOR_avx512dq_rangepv4sf_mask:
 		error ("the last argument must be a 4-bit immediate");
 		return const0_rtx;
 
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d85f9a4..c505526 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -128,6 +128,13 @@ 
   UNSPEC_SHA256MSG1
   UNSPEC_SHA256MSG2
   UNSPEC_SHA256RNDS2
+
+  ;; For AVX512DQ support
+  UNSPEC_REDUCE
+  UNSPEC_FPCLASS
+  UNSPEC_FPCLASS_SCALAR
+  UNSPEC_RANGE
+  UNSPEC_RANGE_SCALAR
 ])
 
 (define_c_enum "unspecv" [
@@ -2330,6 +2337,34 @@ 
   DONE;
 })
 
+(define_insn "<mask_codefor>reducep<mode><mask_name>"
+  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+	(unspec:VF_AVX512VL
+	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
+	   (match_operand:SI 2 "const_0_to_255_operand")]
+	  UNSPEC_REDUCE))]
+  "TARGET_AVX512DQ"
+  "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "reduces<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "const_0_to_255_operand")]
+	    UNSPEC_REDUCE)
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512DQ"
+  "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel floating point comparisons
@@ -16760,6 +16795,63 @@ 
    (set_attr "memory" "none,load")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
+  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+	(unspec:VF_AVX512VL
+	  [(match_operand:VF_AVX512VL 1 "register_operand" "v")
+	   (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	   (match_operand:SI 3 "const_0_to_15_operand")]
+	  UNSPEC_RANGE))]
+  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
+  "vrange<ssemodesuffix>\t{<round_saeonly_mask_op4>%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3<round_saeonly_mask_op4>}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512dq_ranges<mode><round_saeonly_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+	     (match_operand:SI 3 "const_0_to_15_operand")]
+	    UNSPEC_RANGE_SCALAR)
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512DQ"
+  "vrange<ssescalarmodesuffix>\t{<round_saeonly_op4>%3, %2, %1, %0|%0, %1, %2, %3<round_saeonly_op4>}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+          (unspec:<avx512fmaskmode>
+            [(match_operand:VF_AVX512VL 1 "register_operand" "v")
+             (match_operand:QI 2 "const_0_to_255_operand" "n")]
+             UNSPEC_FPCLASS))]
+   "TARGET_AVX512DQ"
+   "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
+  [(set_attr "type" "sse")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512dq_vmfpclass<mode>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+	(and:<avx512fmaskmode>
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:VF_128 1 "register_operand" "v")
+             (match_operand:QI 2 "const_0_to_255_operand" "n")]
+	    UNSPEC_FPCLASS_SCALAR)
+	  (const_int 1)))]
+   "TARGET_AVX512DQ"
+   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+  [(set_attr "type" "sse")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
   [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
 	(unspec:VF_AVX512VL