Support vec_set/vec_extract/vec_init for V4HF/V2HF.

Message ID	20231110033900.246872-1-hongtao.liu@intel.com
State	New
Headers	show Return-Path: <gcc-patches-bounces+incoming=patchwork.ozlabs.org@gcc.gnu.org> DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 285803858D33 From: liuhongt <hongtao.liu@intel.com> To: gcc-patches@gcc.gnu.org Cc: crazylht@gmail.com, hjl.tools@gmail.com Subject: [PATCH] Support vec_set/vec_extract/vec_init for V4HF/V2HF. Date: Fri, 10 Nov 2023 11:39:00 +0800 Message-Id: <20231110033900.246872-1-hongtao.liu@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list Errors-To: gcc-patches-bounces+incoming=patchwork.ozlabs.org@gcc.gnu.org
Series	Support vec_set/vec_extract/vec_init for V4HF/V2HF. \| expand Support vec_set/vec_extract/vec_init for V4HF/V2HF.

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 8fad73c1549..b52ec51fbe4 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -15592,6 +15592,17 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, } goto widen; + case E_V4HFmode: + case E_V4BFmode: + if (TARGET_MMX_WITH_SSE) + { + val = force_reg (GET_MODE_INNER (mode), val); + rtx x = gen_rtx_VEC_DUPLICATE (mode, val); + emit_insn (gen_rtx_SET (target, x)); + return true; + } + return false; + case E_V2HImode: if (TARGET_SSE2) { @@ -15605,6 +15616,17 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, } return false; + case E_V2HFmode: + case E_V2BFmode: + if (TARGET_SSE2) + { + val = force_reg (GET_MODE_INNER (mode), val); + rtx x = gen_rtx_VEC_DUPLICATE (mode, val); + emit_insn (gen_rtx_SET (target, x)); + return true; + } + return false; + case E_V8QImode: case E_V4QImode: if (!mmx_ok) @@ -15815,6 +15837,8 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, use_vector_set = TARGET_MMX_WITH_SSE && TARGET_SSE4_1; break; case E_V4HImode: + case E_V4HFmode: + case E_V4BFmode: use_vector_set = TARGET_SSE || TARGET_3DNOW_A; break; case E_V4QImode: @@ -16051,6 +16075,8 @@ ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode, case E_V4SImode: case E_V8HImode: case E_V4HImode: + case E_V4HFmode: + case E_V4BFmode: break; case E_V16QImode: @@ -16438,6 +16464,7 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode, rtx ops[64], op0, op1, op2, op3, op4, op5; machine_mode half_mode = VOIDmode; machine_mode quarter_mode = VOIDmode; + machine_mode int_inner_mode = VOIDmode; int n, i; switch (mode) @@ -16582,6 +16609,13 @@ quarter: ix86_expand_vector_init_interleave (mode, target, ops, n >> 1); return; + case E_V4HFmode: + case E_V4BFmode: + case E_V2HFmode: + case E_V2BFmode: + int_inner_mode = HImode; + break; + case E_V4HImode: case E_V8QImode: @@ -16613,6 +16647,16 @@ quarter: for (j = 0; j < n_elt_per_word; ++j) { rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); + if (int_inner_mode != E_VOIDmode) + { + gcc_assert (TARGET_SSE2 && int_inner_mode == HImode); + rtx tmp = gen_reg_rtx (int_inner_mode); + elt = lowpart_subreg (int_inner_mode, + force_reg (inner_mode, elt), + inner_mode); + emit_move_insn (tmp, elt); + elt = tmp; + } elt = convert_modes (tmp_mode, inner_mode, elt, true); if (j == 0) @@ -16839,6 +16883,14 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx) case E_V16SFmode: cmp_mode = V16SImode; break; + case E_V2HFmode: + case E_V2BFmode: + cmp_mode = V2HImode; + break; + case E_V4HFmode: + case E_V4BFmode: + cmp_mode = V4HImode; + break; case E_V8HFmode: cmp_mode = V8HImode; break; @@ -17085,9 +17137,13 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) case E_V8HFmode: case E_V8BFmode: case E_V2HImode: + case E_V2HFmode: + case E_V2BFmode: use_vec_merge = TARGET_SSE2; break; case E_V4HImode: + case E_V4HFmode: + case E_V4BFmode: use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); break; @@ -17428,9 +17484,13 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) case E_V8HFmode: case E_V8BFmode: case E_V2HImode: + case E_V2HFmode: + case E_V2BFmode: use_vec_extr = TARGET_SSE2; break; case E_V4HImode: + case E_V4HFmode: + case E_V4BFmode: use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); break; diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 50402b5b544..a3d08bb9d3b 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -112,11 +112,21 @@ (define_mode_attr mmxintvecmodelower ;; Mapping of vector modes to a vector mode of double size (define_mode_attr mmxdoublevecmode - [(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF") (V4HI "V8HI")]) + [(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF") (V4HI "V8HI") + (V2HI "V4HI") (V2HF "V4HF") (V2BF "V4BF")]) ;; Mapping of vector modes back to the scalar modes (define_mode_attr mmxscalarmode - [(V2SI "SI") (V2SF "SF")]) + [(V2SI "SI") (V2SF "SF") + (V4HF "HF") (V4BF "BF") + (V2HF "HF") (V2BF "BF") + (V4HI "HI") (V2HI "HI")]) + +(define_mode_attr mmxscalarmodelower + [(V2SI "si") (V2SF "sf") + (V4HF "hf") (V4BF "bf") + (V2HF "hf") (V2BF "bf") + (V4HI "hi") (V2HI "hi")]) (define_mode_attr Yv_Yw [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")]) @@ -4882,11 +4892,11 @@ (define_insn "*mmx_pinsrd" (set_attr "mode" "TI")]) (define_insn "*mmx_pinsrw" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,YW") - (vec_merge:V4HI - (vec_duplicate:V4HI - (match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm")) - (match_operand:V4HI 1 "register_operand" "0,0,YW") + [(set (match_operand:V4FI_64 0 "register_operand" "=y,x,YW,&x") + (vec_merge:V4FI_64 + (vec_duplicate:V4FI_64 + (match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,rm,x")) + (match_operand:V4FI_64 1 "register_operand" "0,0,YW,x") (match_operand:SI 3 "const_int_operand")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A) @@ -4896,6 +4906,8 @@ (define_insn "*mmx_pinsrw" operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); switch (which_alternative) { + case 3: + return "#"; case 2: if (MEM_P (operands[2])) return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; @@ -4911,11 +4923,28 @@ (define_insn "*mmx_pinsrw" gcc_unreachable (); } } - [(set_attr "isa" "*,sse2_noavx,avx") - (set_attr "mmx_isa" "native,*,*") - (set_attr "type" "mmxcvt,sselog,sselog") + [(set_attr "isa" "*,sse2_noavx,avx,sse4") + (set_attr "mmx_isa" "native,*,*,*") + (set_attr "type" "mmxcvt,sselog,sselog,sselog") (set_attr "length_immediate" "1") - (set_attr "mode" "DI,TI,TI")]) + (set_attr "mode" "DI,TI,TI,TI")]) + +;; For TARGET_SSE2, implement insert from XMM reg with PSHULFW + PBLENDW. +(define_split + [(set (match_operand:V4FI_64 0 "sse_reg_operand") + (vec_merge:V4FI_64 + (vec_duplicate:V4FI_64 + (match_operand:<mmxscalarmode> 2 "sse_reg_operand")) + (match_operand:V4FI_64 1 "sse_reg_operand") + (match_operand:SI 3 "const_int_operand")))] + "TARGET_MMX_WITH_SSE && TARGET_SSE4_1 && reload_completed + && ((unsigned) exact_log2 (INTVAL (operands[3])) + < GET_MODE_NUNITS (<MODE>mode))" + [(set (match_dup 0) + (vec_duplicate:V4FI_64 (match_dup 2))) + (set (match_dup 0) + (vec_merge:V4FI_64 (match_dup 1) (match_dup 0) (match_dup 3)))] + "operands[3] = GEN_INT (~INTVAL (operands[3]) & 0xf);") (define_insn "*mmx_pinsrb" [(set (match_operand:V8QI 0 "register_operand" "=x,YW") @@ -4973,6 +5002,41 @@ (define_insn "*mmx_pextrw" (set_attr "prefix" "orig,maybe_vex,maybe_vex,maybe_evex") (set_attr "mode" "DI,TI,TI,TI")]) +(define_insn "*mmx_pextrw<mode>" + [(set (match_operand:<mmxscalarmode> 0 "register_sse4nonimm_operand" "=?r,?r,jm,m,x,Yw") + (vec_select:<mmxscalarmode> + (match_operand:V4F_64 1 "register_operand" "y,YW,YW,YW,0,YW") + (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" +{ + switch (which_alternative) + { + case 0: + case 1: + return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"; + case 2: + case 3: + return "%vpextrw\t{%2, %1, %0|%0, %1, %2}"; + case 4: + operands[2] = GEN_INT (INTVAL (operands[2]) * 2); + return "psrldq\t{%2, %0|%0, %2}"; + case 5: + operands[2] = GEN_INT (INTVAL (operands[2]) * 2); + return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "*,sse2,sse4_noavx,avx,noavx,avx") + (set_attr "addr" "*,*,gpr16,*,*,*") + (set_attr "mmx_isa" "native,*,*,*,*,*") + (set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,sseishft1,sseishft1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,maybe_vex,maybe_vex,maybe_evex,orig,maybe_evex") + (set_attr "mode" "DI,TI,TI,TI,TI,TI")]) + (define_insn "*mmx_pextrw_zext" [(set (match_operand:SWI48 0 "register_operand" "=r,r") (zero_extend:SWI48 @@ -5069,18 +5133,18 @@ (define_expand "mmx_pshufw" && (TARGET_SSE || TARGET_3DNOW_A)" { int mask = INTVAL (operands[2]); - emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1], - GEN_INT ((mask >> 0) & 3), - GEN_INT ((mask >> 2) & 3), - GEN_INT ((mask >> 4) & 3), - GEN_INT ((mask >> 6) & 3))); + emit_insn (gen_mmx_pshufwv4hi_1 (operands[0], operands[1], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT ((mask >> 4) & 3), + GEN_INT ((mask >> 6) & 3))); DONE; }) -(define_insn "mmx_pshufw_1" - [(set (match_operand:V4HI 0 "register_operand" "=y,Yw") - (vec_select:V4HI - (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yw") +(define_insn "mmx_pshufw<mode>_1" + [(set (match_operand:V4FI_64 0 "register_operand" "=y,Yw") + (vec_select:V4FI_64 + (match_operand:V4FI_64 1 "register_mmxmem_operand" "ym,Yw") (parallel [(match_operand 2 "const_0_to_3_operand") (match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand") @@ -5134,10 +5198,10 @@ (define_insn "*mmx_pshufd_1" (set_attr "mode" "TI")]) (define_insn "*mmx_pblendw64" - [(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,x") - (vec_merge:V4HI - (match_operand:V4HI 2 "register_operand" "Yr,*x,x") - (match_operand:V4HI 1 "register_operand" "0,0,x") + [(set (match_operand:V4FI_64 0 "register_operand" "=Yr,*x,x") + (vec_merge:V4FI_64 + (match_operand:V4FI_64 2 "register_operand" "Yr,*x,x") + (match_operand:V4FI_64 1 "register_operand" "0,0,x") (match_operand:SI 3 "const_0_to_15_operand")))] "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" "@ @@ -5152,10 +5216,10 @@ (define_insn "*mmx_pblendw64" (set_attr "mode" "TI")]) (define_insn "*mmx_pblendw32" - [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,x") - (vec_merge:V2HI - (match_operand:V2HI 2 "register_operand" "Yr,*x,x") - (match_operand:V2HI 1 "register_operand" "0,0,x") + [(set (match_operand:V2FI_32 0 "register_operand" "=Yr,*x,x") + (vec_merge:V2FI_32 + (match_operand:V2FI_32 2 "register_operand" "Yr,*x,x") + (match_operand:V2FI_32 1 "register_operand" "0,0,x") (match_operand:SI 3 "const_0_to_7_operand")))] "TARGET_SSE4_1" "@ @@ -5212,6 +5276,16 @@ (define_insn "*vec_dupv4hi" (set_attr "length_immediate" "1") (set_attr "mode" "DI,TI")]) +(define_insn "*vec_dup<mode>" + [(set (match_operand:V4F_64 0 "register_operand" "=Yw") + (vec_duplicate:V4F_64 + (match_operand:<mmxscalarmode> 1 "register_operand" "Yw")))] + "TARGET_MMX_WITH_SSE" + "%vpshuflw\t{$0, %1, %0|%0, %1, 0}" + [(set_attr "isa" "sse2") + (set_attr "type" "sselog1") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) (define_insn "*vec_dupv2si" [(set (match_operand:V2SI 0 "register_operand" "=y,Yv") @@ -5405,9 +5479,9 @@ (define_expand "vec_initv2sisi" DONE; }) -(define_expand "vec_setv4hi" - [(match_operand:V4HI 0 "register_operand") - (match_operand:HI 1 "register_operand") +(define_expand "vec_set<mode>" + [(match_operand:V4FI_64 0 "register_operand") + (match_operand:<mmxscalarmode> 1 "register_operand") (match_operand 2 "vec_setm_mmx_operand")] "TARGET_MMX || TARGET_MMX_WITH_SSE" { @@ -5419,9 +5493,9 @@ (define_expand "vec_setv4hi" DONE; }) -(define_expand "vec_extractv4hihi" - [(match_operand:HI 0 "register_operand") - (match_operand:V4HI 1 "register_operand") +(define_expand "vec_extract<mode><mmxscalarmodelower>" + [(match_operand:<mmxscalarmode> 0 "register_operand") + (match_operand:V4FI_64 1 "register_operand") (match_operand 2 "const_int_operand")] "TARGET_MMX || TARGET_MMX_WITH_SSE" { @@ -5440,6 +5514,16 @@ (define_expand "vec_initv4hihi" DONE; }) +(define_expand "vec_init<mode><mmxscalarmodelower>" + [(match_operand:V4F_64 0 "register_operand") + (match_operand 1)] + "TARGET_MMX_WITH_SSE" +{ + ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0], + operands[1]); + DONE; +}) + (define_expand "vec_setv8qi" [(match_operand:V8QI 0 "register_operand") (match_operand:QI 1 "register_operand") @@ -5476,11 +5560,11 @@ (define_expand "vec_initv8qiqi" }) (define_insn "*pinsrw" - [(set (match_operand:V2HI 0 "register_operand" "=x,YW") - (vec_merge:V2HI - (vec_duplicate:V2HI - (match_operand:HI 2 "nonimmediate_operand" "rm,rm")) - (match_operand:V2HI 1 "register_operand" "0,YW") + [(set (match_operand:V2FI_32 0 "register_operand" "=x,YW,&x") + (vec_merge:V2FI_32 + (vec_duplicate:V2FI_32 + (match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,x")) + (match_operand:V2FI_32 1 "register_operand" "0,YW,x") (match_operand:SI 3 "const_int_operand")))] "TARGET_SSE2 && ((unsigned) exact_log2 (INTVAL (operands[3])) @@ -5489,6 +5573,8 @@ (define_insn "*pinsrw" operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); switch (which_alternative) { + case 2: + return "#"; case 1: if (MEM_P (operands[2])) return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; @@ -5503,11 +5589,29 @@ (define_insn "*pinsrw" gcc_unreachable (); } } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,sse4") (set_attr "type" "sselog") (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) +;; For TARGET_SSE2, implement insert from XMM reg with PSHULFW + PBLENDW. +(define_split + [(set (match_operand:V2FI_32 0 "sse_reg_operand") + (vec_merge:V2FI_32 + (vec_duplicate:V2FI_32 + (match_operand:<mmxscalarmode> 2 "sse_reg_operand")) + (match_operand:V2FI_32 1 "sse_reg_operand") + (match_operand:SI 3 "const_int_operand")))] + "TARGET_SSE4_1 && reload_completed + && ((unsigned) exact_log2 (INTVAL (operands[3])) + < GET_MODE_NUNITS (<MODE>mode))" + [(set (match_dup 0) + (vec_duplicate:V2FI_32 (match_dup 2))) + (set (match_dup 0) + (vec_merge:V2FI_32 (match_dup 1) (match_dup 0) (match_dup 3)))] + "operands[3] = GEN_INT (~INTVAL (operands[3]) & 0x3);") + + (define_insn "*pinsrb" [(set (match_operand:V4QI 0 "register_operand" "=x,YW") (vec_merge:V4QI @@ -5561,6 +5665,39 @@ (define_insn "*pextrw" (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +(define_insn "*pextrw<mode>" + [(set (match_operand:<mmxscalarmode> 0 "register_sse4nonimm_operand" "=?r,jm,m,x,Yw") + (vec_select:<mmxscalarmode> + (match_operand:V2F_32 1 "register_operand" "YW,YW,YW,0,YW") + (parallel [(match_operand:SI 2 "const_0_to_1_operand")])))] + "TARGET_SSE2" +{ + switch (which_alternative) + { + case 0: + return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"; + case 1: + return "pextrw\t{%2, %1, %0|%0, %1, %2}"; + case 2: + return "vpextrw\t{%2, %1, %0|%0, %1, %2}"; + case 3: + operands[2] = GEN_INT (INTVAL (operands[2]) * 2); + return "psrldq\t{%2, %0|%0, %2}"; + case 4: + operands[2] = GEN_INT (INTVAL (operands[2]) * 2); + return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "*,sse4_noavx,avx,noavx,avx") + (set_attr "addr" "*,gpr16,*,*,*") + (set_attr "type" "sselog1,sselog1,sselog1,sseishft1,sseishft1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex,orig,maybe_evex,orig,maybe_evex") + (set_attr "mode" "TI")]) + (define_insn "*pextrw_zext" [(set (match_operand:SWI48 0 "register_operand" "=r") (zero_extend:SWI48 @@ -5608,9 +5745,9 @@ (define_insn "*pextrb_zext" (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -(define_expand "vec_setv2hi" - [(match_operand:V2HI 0 "register_operand") - (match_operand:HI 1 "register_operand") +(define_expand "vec_set<mode>" + [(match_operand:V2FI_32 0 "register_operand") + (match_operand:<mmxscalarmode> 1 "register_operand") (match_operand 2 "vec_setm_sse41_operand")] "TARGET_SSE2" { @@ -5622,9 +5759,9 @@ (define_expand "vec_setv2hi" DONE; }) -(define_expand "vec_extractv2hihi" - [(match_operand:HI 0 "register_operand") - (match_operand:V2HI 1 "register_operand") +(define_expand "vec_extract<mode><mmxscalarmodelower>" + [(match_operand:<mmxscalarmode> 0 "register_operand") + (match_operand:V2FI_32 1 "register_operand") (match_operand 2 "const_int_operand")] "TARGET_SSE2" { @@ -5659,29 +5796,29 @@ (define_expand "vec_extractv4qiqi" }) (define_insn_and_split "*punpckwd" - [(set (match_operand:V2HI 0 "register_operand" "=x,Yw") - (vec_select:V2HI - (vec_concat:V4HI - (match_operand:V2HI 1 "register_operand" "0,Yw") - (match_operand:V2HI 2 "register_operand" "x,Yw")) + [(set (match_operand:V2FI_32 0 "register_operand" "=x,Yw") + (vec_select:V2FI_32 + (vec_concat:<mmxdoublevecmode> + (match_operand:V2FI_32 1 "register_operand" "0,Yw") + (match_operand:V2FI_32 2 "register_operand" "x,Yw")) (parallel [(match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand")])))] "TARGET_SSE2" "#" "&& reload_completed" [(set (match_dup 5) - (vec_select:V8HI + (vec_select:<mmxxmmmode> (match_dup 5) (parallel [(match_dup 3) (match_dup 4) (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)])))] { - rtx dest = lowpart_subreg (V8HImode, operands[0], V2HImode); - rtx op1 = lowpart_subreg (V8HImode, operands[1], V2HImode); - rtx op2 = lowpart_subreg (V8HImode, operands[2], V2HImode); + rtx dest = lowpart_subreg (<mmxxmmmode>mode, operands[0], <MODE>mode); + rtx op1 = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode); + rtx op2 = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode); - emit_insn (gen_vec_interleave_lowv8hi (dest, op1, op2)); + emit_insn (gen_vec_interleave_low<mmxxmmmodelower> (dest, op1, op2)); static const int map[4] = { 0, 2, 1, 3 }; @@ -5699,10 +5836,10 @@ (define_insn_and_split "*punpckwd" (set_attr "type" "sselog") (set_attr "mode" "TI")]) -(define_insn "*pshufw_1" - [(set (match_operand:V2HI 0 "register_operand" "=Yw") - (vec_select:V2HI - (match_operand:V2HI 1 "register_operand" "Yw") +(define_insn "*pshufw<mode>_1" + [(set (match_operand:V2FI_32 0 "register_operand" "=Yw") + (vec_select:V2FI_32 + (match_operand:V2FI_32 1 "register_operand" "Yw") (parallel [(match_operand 2 "const_0_to_1_operand") (match_operand 3 "const_0_to_1_operand")])))] "TARGET_SSE2" @@ -5731,8 +5868,18 @@ (define_insn "*vec_dupv2hi" (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) -(define_expand "vec_initv2hihi" - [(match_operand:V2HI 0 "register_operand") +(define_insn "*vec_dup<mode>" + [(set (match_operand:V2F_32 0 "register_operand" "=Yw") + (vec_duplicate:V2F_32 + (match_operand:<mmxscalarmode> 1 "register_operand" "Yw")))] + "TARGET_SSE2" + "%vpshuflw\t{$0, %1, %0|%0, %1, 0}" + [(set_attr "type" "sselog1") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +(define_expand "vec_init<mode><mmxscalarmodelower>" + [(match_operand:V2FI_32 0 "register_operand") (match_operand 1)] "TARGET_SSE2" { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 33198756bb0..48a9bd99576 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -12372,9 +12372,9 @@ (define_insn_and_split "*vec_extract<mode>_0" "operands[1] = gen_lowpart (<ssescalarmode>mode, operands[1]);") (define_insn "*vec_extract<mode>" - [(set (match_operand:HFBF 0 "register_sse4nonimm_operand" "=?r,jm,m,x,v") + [(set (match_operand:HFBF 0 "register_sse4nonimm_operand" "=?r,jm,m,x,Yw") (vec_select:HFBF - (match_operand:<ssevecmode> 1 "register_operand" "v,x,v,0,v") + (match_operand:<ssevecmode> 1 "register_operand" "v,x,v,0,YW") (parallel [(match_operand:SI 2 "const_0_to_7_operand")])))] "TARGET_SSE2" diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_elem-1.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_elem-1.c new file mode 100644 index 00000000000..dba98aa4810 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_elem-1.c @@ -0,0 +1,135 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O1 -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ + +#include "sse4_1-check.h" + +typedef _Float16 v4hf __attribute__((vector_size(8))); + +v4hf +__attribute__((noipa)) +vector_init_dupv4hf (_Float16 a) +{ + return __extension__(v4hf){a, a, a, a}; +} + +v4hf +__attribute__((noipa)) +vector_init_allzero (_Float16 a) +{ + return __extension__(v4hf){0, 0, 0, 0}; +} + +v4hf +__attribute__((noipa)) +vector_init_one_nonzero (_Float16 a) +{ + return __extension__(v4hf){0, 0, a, 0}; +} + +v4hf +__attribute__((noipa)) +vector_init_one_var (_Float16 a) +{ + return __extension__(v4hf){1, 2, a, 4}; +} + +v4hf +__attribute__((noipa)) +vector_init_general (_Float16 a, _Float16 a1, _Float16 a2, _Float16 a3) +{ + return __extension__(v4hf){a3, a2, a1, a}; +} + +v4hf +__attribute__((noipa)) +vec_set (_Float16 a, v4hf b) +{ + b[1] = a; + return b; +} + +v4hf +__attribute__((noipa)) +vec_set_var (_Float16 a, v4hf b, int c) +{ + b[c] = a; + return b; +} + +_Float16 +__attribute__((noipa)) +vec_extract (v4hf b) +{ + return b[2]; +} + +static void +sse4_1_test () +{ + typedef union { + _Float16 a[4]; + v4hf x;}union64hf; + union64hf res, exp, src; + + res.x = vector_init_dupv4hf (1.0f16); + for (int i = 0; i != 4; i++) + exp.a[i] = 1.0f16; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0) + __builtin_abort (); + + res.x = vector_init_allzero (1.0f16); + for (int i = 0; i != 4; i++) + exp.a[i] = 0.0f16; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0) + __builtin_abort (); + + res.x = vector_init_one_nonzero (1.0f16); + for (int i = 0; i != 4; i++) + exp.a[i] = 0.0f16; + exp.a[2] = 1.0f16; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0) + __builtin_abort (); + + res.x = vector_init_one_var (3.0f16); + for (int i = 0; i != 4; i++) + exp.a[i] = i + 1; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0) + __builtin_abort (); + + res.x = vector_init_general (4.0, 3.0f, 2.0f, 1.0); + for (int i = 0; i != 4; i++) + exp.a[i] = 1 + i; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0) + __builtin_abort (); + + for (int i = 0; i != 4; i++) + { + src.a[i] = i; + exp.a[i] = i; + } + res.x = vec_set (3.0f, src.x); + exp.a[1] = 3.0f; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0) + __builtin_abort (); + + for (int i = 0; i != 4; i++) + { + src.a[i] = i; + exp.a[i] = i; + } + res.x = vec_set_var (3.0f, src.x, 1); + exp.a[1] = 3.0f; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0) + __builtin_abort (); + + for (int i = 0; i != 4; i++) + { + src.a[i] = i; + exp.a[i] = i; + } + _Float16 res_scalar = vec_extract (src.x); + if (res_scalar != 2.0f) + __builtin_abort (); + return ; +} diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_elem-2.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_elem-2.c new file mode 100644 index 00000000000..cc195638bff --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_elem-2.c @@ -0,0 +1,135 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O1 -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ + +#include "sse4_1-check.h" + +typedef _Float16 v2hf __attribute__((vector_size(4))); + +v2hf +__attribute__((noipa)) +vector_init_dupv2hf (_Float16 a) +{ + return __extension__(v2hf){a, a}; +} + +v2hf +__attribute__((noipa)) +vector_init_allzero (_Float16 a) +{ + return __extension__(v2hf){0, 0}; +} + +v2hf +__attribute__((noipa)) +vector_init_one_nonzero (_Float16 a) +{ + return __extension__(v2hf){0, a}; +} + +v2hf +__attribute__((noipa)) +vector_init_one_var (_Float16 a) +{ + return __extension__(v2hf){1, a}; +} + +v2hf +__attribute__((noipa)) +vector_init_general (_Float16 a1, _Float16 a2) +{ + return __extension__(v2hf){a2, a1}; +} + +v2hf +__attribute__((noipa)) +vec_set (_Float16 a, v2hf b) +{ + b[1] = a; + return b; +} + +v2hf +__attribute__((noipa)) +vec_set_var (_Float16 a, v2hf b, int c) +{ + b[c] = a; + return b; +} + +_Float16 +__attribute__((noipa)) +vec_extract (v2hf b) +{ + return b[1]; +} + +static void +sse4_1_test () +{ + typedef union { + _Float16 a[2]; + v2hf x;}union64hf; + union64hf res, exp, src; + + res.x = vector_init_dupv2hf (1.0f16); + for (int i = 0; i != 2; i++) + exp.a[i] = 1.0f16; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0) + __builtin_abort (); + + res.x = vector_init_allzero (1.0f16); + for (int i = 0; i != 2; i++) + exp.a[i] = 0.0f16; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0) + __builtin_abort (); + + res.x = vector_init_one_nonzero (1.0f16); + for (int i = 0; i != 2; i++) + exp.a[i] = 0.0f16; + exp.a[1] = 1.0f16; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0) + __builtin_abort (); + + res.x = vector_init_one_var (3.0f16); + exp.a[0] = 1; + exp.a[1] = 3; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0) + __builtin_abort (); + + res.x = vector_init_general (2.0f, 1.0); + for (int i = 0; i != 2; i++) + exp.a[i] = 1 + i; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0) + __builtin_abort (); + + for (int i = 0; i != 2; i++) + { + src.a[i] = i; + exp.a[i] = i; + } + res.x = vec_set (3.0f, src.x); + exp.a[1] = 3.0f; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0) + __builtin_abort (); + + for (int i = 0; i != 2; i++) + { + src.a[i] = i; + exp.a[i] = i; + } + res.x = vec_set_var (3.0f, src.x, 1); + exp.a[1] = 3.0f; + if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0) + __builtin_abort (); + + for (int i = 0; i != 2; i++) + { + src.a[i] = i; + exp.a[i] = i; + } + _Float16 res_scalar = vec_extract (src.x); + if (res_scalar != 1.0f) + __builtin_abort (); + return ; +}

Support vec_set/vec_extract/vec_init for V4HF/V2HF.

Commit Message

Patch