diff mbox

[i386] Disable AVX-512VL insns for scalar mode operands on -march=knl.

Message ID 20150804114734.GA32256@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Kirill Yukhin Aug. 4, 2015, 11:47 a.m. UTC
Hello,

For vec_dup and vec_concat patterns (of v2df mode) second operand
is of scalar mode, so `ix86_hard_regno_mode_ok’ didn’t block EVEX registers,
of non-512b modes (when AVX-512VL is turned off).
This turns into 128/256b xmm[>15] regs emit on -march=knl.

There’re should be more patterns w/ similar issue. Will look for them later.

Bootstrapped and regtested.

If no objections, I'll commit it tomorrow morning (Moscow time).

gcc/
	* config/i386/i386.md (define_attr "isa"): Addd avx512vl and
	noavx512vl.
	(define_attr "enabled"): Handle avx521vl and noavx512vl.
	* config/i386/sse.md (define_insn "vec_dupv2df<mask_name>"): Split
	AVX-512 alternative out of SSE.
	(define_insn "*vec_concatv2df"): Ditto.

--
Thanks, K

commit 924990a6e8d38b6ebff9dd9a79e285ef81890202
Author: Kirill Yukhin <kirill.yukhin@intel.com>
Date:   Mon Aug 3 15:21:06 2015 +0300

    Fix vec_concatv2df and vec_dupv2df to block wrongly enabled AVX-512VL insns.

Comments

Uros Bizjak Aug. 4, 2015, 12:10 p.m. UTC | #1
On Tue, Aug 4, 2015 at 1:47 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> Hello,
>
> For vec_dup and vec_concat patterns (of v2df mode) second operand
> is of scalar mode, so `ix86_hard_regno_mode_ok’ didn’t block EVEX registers,
> of non-512b modes (when AVX-512VL is turned off).
> This turns into 128/256b xmm[>15] regs emit on -march=knl.
>
> There’re should be more patterns w/ similar issue. Will look for them later.
>
> Bootstrapped and regtested.
>
> If no objections, I'll commit it tomorrow morning (Moscow time).
>
> gcc/
>         * config/i386/i386.md (define_attr "isa"): Addd avx512vl and
>         noavx512vl.
>         (define_attr "enabled"): Handle avx521vl and noavx512vl.
>         * config/i386/sse.md (define_insn "vec_dupv2df<mask_name>"): Split
>         AVX-512 alternative out of SSE.
>         (define_insn "*vec_concatv2df"): Ditto.
>
> -   (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
> -   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
> -   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
> +   (set_attr "prefix_data16" "*,*,*,*,*,1,*,*,*,*")

Please change the above to:

   (set (attr "prefix_data16")
    (if_then_else (eq_attr "alternative" "5")
              (const_string "1")
              (const_string "*")))

Uros.
diff mbox

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 5c5c1fc..9ffe9aa 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -784,7 +784,8 @@ 
 (define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
 		    sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
 		    avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
-		    fma_avx512f,avx512bw,noavx512bw,avx512dq,noavx512dq"
+		    fma_avx512f,avx512bw,noavx512bw,avx512dq,noavx512dq,
+		    avx512vl,noavx512vl"
   (const_string "base"))
 
 (define_attr "enabled" ""
@@ -819,6 +820,8 @@ 
 	 (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
 	 (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
 	 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
+	 (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
+	 (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
 	]
 	(const_int 1)))
 
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0970f0e..a509369 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8638,44 +8638,47 @@ 
    (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
 
 (define_insn "vec_dupv2df<mask_name>"
-  [(set (match_operand:V2DF 0 "register_operand"     "=x,v")
+  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v")
 	(vec_duplicate:V2DF
-	  (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))]
+	  (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
   "TARGET_SSE2 && <mask_avx512vl_condition>"
   "@
    unpcklpd\t%0, %0
-   %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
-  [(set_attr "isa" "noavx,sse3")
+   %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
+   vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "isa" "noavx,sse3,avx512vl")
    (set_attr "type" "sselog1")
-   (set_attr "prefix" "orig,maybe_vex")
-   (set_attr "mode" "V2DF,DF")])
+   (set_attr "prefix" "orig,maybe_vex,evex")
+   (set_attr "mode" "V2DF,DF,DF")])
 
 (define_insn "*vec_concatv2df"
-  [(set (match_operand:V2DF 0 "register_operand"     "=x,v,v,x,x,v,x,x")
+  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v,x,v,x,x,v,x,x")
 	(vec_concat:V2DF
-	  (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0")
-	  (match_operand:DF 2 "vector_move_operand"  " x,v,1,m,m,C,x,m")))]
+	  (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0")
+	  (match_operand:DF 2 "vector_move_operand"  " x,x,v,1,1,m,m,C,x,m")))]
   "TARGET_SSE
    && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
        || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
   "@
    unpcklpd\t{%2, %0|%0, %2}
    vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
    %vmovddup\t{%1, %0|%0, %1}
+   vmovddup\t{%1, %0|%0, %1}
    movhpd\t{%2, %0|%0, %2}
    vmovhpd\t{%2, %1, %0|%0, %1, %2}
    %vmovsd\t{%1, %0|%0, %1}
    movlhps\t{%2, %0|%0, %2}
    movhps\t{%2, %0|%0, %2}"
-  [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
+  [(set_attr "isa" "sse2_noavx,avx,avx512vl,sse3,avx512vl,sse2_noavx,avx,sse2,noavx,noavx")
    (set (attr "type")
      (if_then_else
        (eq_attr "alternative" "0,1,2")
        (const_string "sselog")
        (const_string "ssemov")))
-   (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
-   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
-   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
+   (set_attr "prefix_data16" "*,*,*,*,*,1,*,*,*,*")
+   (set_attr "prefix" "orig,vex,evex,maybe_vex,evex,orig,vex,maybe_vex,orig,orig")
+   (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;