Message ID | 20190216224032.4889-39-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | V8: Emulate MMX intrinsics with SSE | expand |
On Sat, Feb 16, 2019 at 11:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > PR target/89021 > * config/i386/i386.c (ix86_expand_vector_init_duplicate): Set > mmx_ok to true if TARGET_MMX_WITH_SSE is true. > (ix86_expand_vector_init_one_nonzero): Likewise. > (ix86_expand_vector_init_one_var): Likewise. > (ix86_expand_vector_init_general): Likewise. > (ix86_expand_vector_init): Likewise. > (ix86_expand_vector_set): Likewise. > (ix86_expand_vector_extract): Likewise. Please move this initialization to .md files. We don't support V2SF, so (define_expand "vec_setv2sf" [(match_operand:V2SF 0 "register_operand") (match_operand:SF 1 "register_operand") (match_operand 2 "const_int_operand")] "TARGET_MMX" { ix86_expand_vector_set (false, operands[0], operands[1], INTVAL (operands[2])); DONE; }) should remain false, while in: (define_expand "vec_setv2si" [(match_operand:V2SI 0 "register_operand") (match_operand:SI 1 "register_operand") (match_operand 2 "const_int_operand")] "TARGET_MMX" { ix86_expand_vector_set (false, operands[0], operands[1], INTVAL (operands[2])); DONE; }) we should use TARGET_MMX_WITH_SSE instead of false in the call to ix86_expand_vector_set. The same change for the other MMX modes: V4HI and V8QI. Same for other exported vector initializers: ix86_expand_vector_init and ix86_expand_vector_extract. We support V2SI, V4HI and V8QI, but not V2SF (non-MMX modes are agnostic to mmx_ok argument). Uros. > * config/i386/mmx.md (*vec_dupv2sf): Changed to > define_insn_and_split to support SSE emulation. > (*vec_extractv2sf_0): Likewise. > (*vec_extractv2sf_1): Likewise. > (*vec_extractv2si_0): Likewise. > (*vec_extractv2si_1): Likewise. > (*vec_extractv2si_zext_mem): Likewise. > (vec_setv2sf): Also allow TARGET_MMX_WITH_SSE. > (vec_extractv2sf_1 splitter): Likewise. > (vec_extractv2sfsf): Likewise. > (vec_setv2si): Likewise. > (vec_extractv2si_1 splitter): Likewise. > (vec_extractv2sisi): Likewise. > (vec_setv4hi): Likewise. > (vec_extractv4hihi): Likewise. > (vec_setv8qi): Likewise. > (vec_extractv8qiqi): Likewise. > --- > gcc/config/i386/i386.c | 8 +++++ > gcc/config/i386/mmx.md | 66 +++++++++++++++++++++++++++--------------- > 2 files changed, 50 insertions(+), 24 deletions(-) > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index a76c17beece..25e0dc43a9e 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -42620,6 +42620,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, > { > bool ok; > > + mmx_ok |= TARGET_MMX_WITH_SSE; > switch (mode) > { > case E_V2SImode: > @@ -42779,6 +42780,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, > bool use_vector_set = false; > rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL; > > + mmx_ok |= TARGET_MMX_WITH_SSE; > switch (mode) > { > case E_V2DImode: > @@ -42972,6 +42974,7 @@ ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode, > XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); > const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); > > + mmx_ok |= TARGET_MMX_WITH_SSE; > switch (mode) > { > case E_V2DFmode: > @@ -43357,6 +43360,7 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode, > machine_mode quarter_mode = VOIDmode; > int n, i; > > + mmx_ok |= TARGET_MMX_WITH_SSE; > switch (mode) > { > case E_V2SFmode: > @@ -43556,6 +43560,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) > int i; > rtx x; > > + mmx_ok |= TARGET_MMX_WITH_SSE; > + > /* Handle first initialization from vector elts. */ > if (n_elts != XVECLEN (vals, 0)) > { > @@ -43655,6 +43661,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) > machine_mode mmode = VOIDmode; > rtx (*gen_blendm) (rtx, rtx, rtx, rtx); > > + mmx_ok |= TARGET_MMX_WITH_SSE; > switch (mode) > { > case E_V2SFmode: > @@ -44010,6 +44017,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) > bool use_vec_extr = false; > rtx tmp; > > + mmx_ok |= TARGET_MMX_WITH_SSE; > switch (mode) > { > case E_V2SImode: > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index c612d6b9e5c..f98952fd8a0 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -555,14 +555,23 @@ > (set_attr "prefix_extra" "1") > (set_attr "mode" "V2SF")]) > > -(define_insn "*vec_dupv2sf" > - [(set (match_operand:V2SF 0 "register_operand" "=y") > +(define_insn_and_split "*vec_dupv2sf" > + [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv") > (vec_duplicate:V2SF > - (match_operand:SF 1 "register_operand" "0")))] > - "TARGET_MMX" > - "punpckldq\t%0, %0" > - [(set_attr "type" "mmxcvt") > - (set_attr "mode" "DI")]) > + (match_operand:SF 1 "register_operand" "0,0,Yv")))] > + "TARGET_MMX || TARGET_MMX_WITH_SSE" > + "@ > + punpckldq\t%0, %0 > + # > + #" > + "TARGET_MMX_WITH_SSE && reload_completed" > + [(set (match_dup 0) > + (vec_duplicate:V4SF (match_dup 1)))] > + "operands[0] = lowpart_subreg (V4SFmode, operands[0], > + GET_MODE (operands[0]));" > + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") > + (set_attr "type" "mmxcvt,ssemov,ssemov") > + (set_attr "mode" "DI,TI,TI")]) > > (define_insn "*mmx_concatv2sf" > [(set (match_operand:V2SF 0 "register_operand" "=y,y") > @@ -580,7 +589,7 @@ > [(match_operand:V2SF 0 "register_operand") > (match_operand:SF 1 "register_operand") > (match_operand 2 "const_int_operand")] > - "TARGET_MMX" > + "TARGET_MMX || TARGET_MMX_WITH_SSE" > { > ix86_expand_vector_set (false, operands[0], operands[1], > INTVAL (operands[2])); > @@ -594,11 +603,13 @@ > (vec_select:SF > (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m") > (parallel [(const_int 0)])))] > - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > + "(TARGET_MMX || TARGET_MMX_WITH_SSE) > + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > "#" > "&& reload_completed" > [(set (match_dup 0) (match_dup 1))] > - "operands[1] = gen_lowpart (SFmode, operands[1]);") > + "operands[1] = gen_lowpart (SFmode, operands[1]);" > + [(set_attr "mmx_isa" "*,*,native,native,*,*")]) > > ;; Avoid combining registers from different units in a single alternative, > ;; see comment above inline_secondary_memory_needed function in i386.c > @@ -607,7 +618,8 @@ > (vec_select:SF > (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o") > (parallel [(const_int 1)])))] > - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > + "(TARGET_MMX || TARGET_MMX_WITH_SSE) > + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > "@ > punpckhdq\t%0, %0 > %vmovshdup\t{%1, %0|%0, %1} > @@ -617,6 +629,7 @@ > # > #" > [(set_attr "isa" "*,sse3,noavx,*,*,*,*") > + (set_attr "mmx_isa" "native,*,*,native,*,*,*") > (set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov") > (set (attr "length_immediate") > (if_then_else (eq_attr "alternative" "2") > @@ -634,7 +647,7 @@ > (vec_select:SF > (match_operand:V2SF 1 "memory_operand") > (parallel [(const_int 1)])))] > - "TARGET_MMX && reload_completed" > + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed" > [(set (match_dup 0) (match_dup 1))] > "operands[1] = adjust_address (operands[1], SFmode, 4);") > > @@ -642,7 +655,7 @@ > [(match_operand:SF 0 "register_operand") > (match_operand:V2SF 1 "register_operand") > (match_operand 2 "const_int_operand")] > - "TARGET_MMX" > + "TARGET_MMX || TARGET_MMX_WITH_SSE" > { > ix86_expand_vector_extract (false, operands[0], operands[1], > INTVAL (operands[2])); > @@ -1526,7 +1539,7 @@ > [(match_operand:V2SI 0 "register_operand") > (match_operand:SI 1 "register_operand") > (match_operand 2 "const_int_operand")] > - "TARGET_MMX" > + "TARGET_MMX || TARGET_MMX_WITH_SSE" > { > ix86_expand_vector_set (false, operands[0], operands[1], > INTVAL (operands[2])); > @@ -1540,11 +1553,13 @@ > (vec_select:SI > (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m") > (parallel [(const_int 0)])))] > - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > + "(TARGET_MMX || TARGET_MMX_WITH_SSE) > + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > "#" > "&& reload_completed" > [(set (match_dup 0) (match_dup 1))] > - "operands[1] = gen_lowpart (SImode, operands[1]);") > + "operands[1] = gen_lowpart (SImode, operands[1]);" > + [(set_attr "mmx_isa" "*,*,native,native,*")]) > > ;; Avoid combining registers from different units in a single alternative, > ;; see comment above inline_secondary_memory_needed function in i386.c > @@ -1553,7 +1568,8 @@ > (vec_select:SI > (match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o") > (parallel [(const_int 1)])))] > - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > + "(TARGET_MMX || TARGET_MMX_WITH_SSE) > + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > "@ > punpckhdq\t%0, %0 > %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5} > @@ -1562,6 +1578,7 @@ > # > #" > [(set_attr "isa" "*,sse2,noavx,*,*,*") > + (set_attr "mmx_isa" "native,*,*,native,*,*") > (set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov") > (set (attr "length_immediate") > (if_then_else (eq_attr "alternative" "1,2") > @@ -1575,7 +1592,7 @@ > (vec_select:SI > (match_operand:V2SI 1 "memory_operand") > (parallel [(const_int 1)])))] > - "TARGET_MMX && reload_completed" > + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed" > [(set (match_dup 0) (match_dup 1))] > "operands[1] = adjust_address (operands[1], SImode, 4);") > > @@ -1592,13 +1609,14 @@ > { > operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4); > } > - [(set_attr "isa" "*,sse2,*")]) > + [(set_attr "isa" "*,sse2,*") > + (set_attr "mmx_isa" "native,*,*")]) > > (define_expand "vec_extractv2sisi" > [(match_operand:SI 0 "register_operand") > (match_operand:V2SI 1 "register_operand") > (match_operand 2 "const_int_operand")] > - "TARGET_MMX" > + "TARGET_MMX || TARGET_MMX_WITH_SSE" > { > ix86_expand_vector_extract (false, operands[0], operands[1], > INTVAL (operands[2])); > @@ -1618,7 +1636,7 @@ > [(match_operand:V4HI 0 "register_operand") > (match_operand:HI 1 "register_operand") > (match_operand 2 "const_int_operand")] > - "TARGET_MMX" > + "TARGET_MMX || TARGET_MMX_WITH_SSE" > { > ix86_expand_vector_set (false, operands[0], operands[1], > INTVAL (operands[2])); > @@ -1629,7 +1647,7 @@ > [(match_operand:HI 0 "register_operand") > (match_operand:V4HI 1 "register_operand") > (match_operand 2 "const_int_operand")] > - "TARGET_MMX" > + "TARGET_MMX || TARGET_MMX_WITH_SSE" > { > ix86_expand_vector_extract (false, operands[0], operands[1], > INTVAL (operands[2])); > @@ -1649,7 +1667,7 @@ > [(match_operand:V8QI 0 "register_operand") > (match_operand:QI 1 "register_operand") > (match_operand 2 "const_int_operand")] > - "TARGET_MMX" > + "TARGET_MMX || TARGET_MMX_WITH_SSE" > { > ix86_expand_vector_set (false, operands[0], operands[1], > INTVAL (operands[2])); > @@ -1660,7 +1678,7 @@ > [(match_operand:QI 0 "register_operand") > (match_operand:V8QI 1 "register_operand") > (match_operand 2 "const_int_operand")] > - "TARGET_MMX" > + "TARGET_MMX || TARGET_MMX_WITH_SSE" > { > ix86_expand_vector_extract (false, operands[0], operands[1], > INTVAL (operands[2])); > -- > 2.20.1 >
On Sun, Feb 17, 2019 at 8:24 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Sat, Feb 16, 2019 at 11:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > PR target/89021 > > * config/i386/i386.c (ix86_expand_vector_init_duplicate): Set > > mmx_ok to true if TARGET_MMX_WITH_SSE is true. > > (ix86_expand_vector_init_one_nonzero): Likewise. > > (ix86_expand_vector_init_one_var): Likewise. > > (ix86_expand_vector_init_general): Likewise. > > (ix86_expand_vector_init): Likewise. > > (ix86_expand_vector_set): Likewise. > > (ix86_expand_vector_extract): Likewise. > > Please move this initialization to .md files. We don't support V2SF, so > > (define_expand "vec_setv2sf" > [(match_operand:V2SF 0 "register_operand") > (match_operand:SF 1 "register_operand") > (match_operand 2 "const_int_operand")] > "TARGET_MMX" > { > ix86_expand_vector_set (false, operands[0], operands[1], > INTVAL (operands[2])); > DONE; > }) > > should remain false, while in: > > (define_expand "vec_setv2si" > [(match_operand:V2SI 0 "register_operand") > (match_operand:SI 1 "register_operand") > (match_operand 2 "const_int_operand")] > "TARGET_MMX" > { > ix86_expand_vector_set (false, operands[0], operands[1], > INTVAL (operands[2])); > DONE; > }) > > we should use TARGET_MMX_WITH_SSE instead of false in the call to > ix86_expand_vector_set. The same change for the other MMX modes: V4HI > and V8QI. > > Same for other exported vector initializers: ix86_expand_vector_init > and ix86_expand_vector_extract. We support V2SI, V4HI and V8QI, but > not V2SF (non-MMX modes are agnostic to mmx_ok argument). > There is limited support for V2SF with MMX. My goal is to emulate all MMX features with SSE so that -msse2 -mno-mmx works for all MMX codes which don't use MMX inline asm. Leaving out V2SF emulation creates a hole in my implementation. Do you have testcases to show issues with V2SF emulation?
On Sun, Feb 17, 2019 at 6:03 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Sun, Feb 17, 2019 at 8:24 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > On Sat, Feb 16, 2019 at 11:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > PR target/89021 > > > * config/i386/i386.c (ix86_expand_vector_init_duplicate): Set > > > mmx_ok to true if TARGET_MMX_WITH_SSE is true. > > > (ix86_expand_vector_init_one_nonzero): Likewise. > > > (ix86_expand_vector_init_one_var): Likewise. > > > (ix86_expand_vector_init_general): Likewise. > > > (ix86_expand_vector_init): Likewise. > > > (ix86_expand_vector_set): Likewise. > > > (ix86_expand_vector_extract): Likewise. > > > > Please move this initialization to .md files. We don't support V2SF, so > > > > (define_expand "vec_setv2sf" > > [(match_operand:V2SF 0 "register_operand") > > (match_operand:SF 1 "register_operand") > > (match_operand 2 "const_int_operand")] > > "TARGET_MMX" > > { > > ix86_expand_vector_set (false, operands[0], operands[1], > > INTVAL (operands[2])); > > DONE; > > }) > > > > should remain false, while in: > > > > (define_expand "vec_setv2si" > > [(match_operand:V2SI 0 "register_operand") > > (match_operand:SI 1 "register_operand") > > (match_operand 2 "const_int_operand")] > > "TARGET_MMX" > > { > > ix86_expand_vector_set (false, operands[0], operands[1], > > INTVAL (operands[2])); > > DONE; > > }) > > > > we should use TARGET_MMX_WITH_SSE instead of false in the call to > > ix86_expand_vector_set. The same change for the other MMX modes: V4HI > > and V8QI. > > > > Same for other exported vector initializers: ix86_expand_vector_init > > and ix86_expand_vector_extract. We support V2SI, V4HI and V8QI, but > > not V2SF (non-MMX modes are agnostic to mmx_ok argument). > > > > There is limited support for V2SF with MMX. My goal is to emulate > all MMX features with SSE so that -msse2 -mno-mmx works for all > MMX codes which don't use MMX inline asm. Leaving out V2SF > emulation creates a hole in my implementation. Do you have testcases > to show issues with V2SF emulation? No, but V2SF should be avoided from emulation. Uros.
On Sun, Feb 17, 2019 at 9:08 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Sun, Feb 17, 2019 at 6:03 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Sun, Feb 17, 2019 at 8:24 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > On Sat, Feb 16, 2019 at 11:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > PR target/89021 > > > > * config/i386/i386.c (ix86_expand_vector_init_duplicate): Set > > > > mmx_ok to true if TARGET_MMX_WITH_SSE is true. > > > > (ix86_expand_vector_init_one_nonzero): Likewise. > > > > (ix86_expand_vector_init_one_var): Likewise. > > > > (ix86_expand_vector_init_general): Likewise. > > > > (ix86_expand_vector_init): Likewise. > > > > (ix86_expand_vector_set): Likewise. > > > > (ix86_expand_vector_extract): Likewise. > > > > > > Please move this initialization to .md files. We don't support V2SF, so > > > > > > (define_expand "vec_setv2sf" > > > [(match_operand:V2SF 0 "register_operand") > > > (match_operand:SF 1 "register_operand") > > > (match_operand 2 "const_int_operand")] > > > "TARGET_MMX" > > > { > > > ix86_expand_vector_set (false, operands[0], operands[1], > > > INTVAL (operands[2])); > > > DONE; > > > }) > > > > > > should remain false, while in: > > > > > > (define_expand "vec_setv2si" > > > [(match_operand:V2SI 0 "register_operand") > > > (match_operand:SI 1 "register_operand") > > > (match_operand 2 "const_int_operand")] > > > "TARGET_MMX" > > > { > > > ix86_expand_vector_set (false, operands[0], operands[1], > > > INTVAL (operands[2])); > > > DONE; > > > }) > > > > > > we should use TARGET_MMX_WITH_SSE instead of false in the call to > > > ix86_expand_vector_set. The same change for the other MMX modes: V4HI > > > and V8QI. > > > > > > Same for other exported vector initializers: ix86_expand_vector_init > > > and ix86_expand_vector_extract. We support V2SI, V4HI and V8QI, but > > > not V2SF (non-MMX modes are agnostic to mmx_ok argument). > > > > > > > There is limited support for V2SF with MMX. My goal is to emulate > > all MMX features with SSE so that -msse2 -mno-mmx works for all > > MMX codes which don't use MMX inline asm. Leaving out V2SF > > emulation creates a hole in my implementation. Do you have testcases > > to show issues with V2SF emulation? > > No, but V2SF should be avoided from emulation. Then -msse2 -mno-mmx won't work on some MMX codes like: #include <mmintrin.h> float foo (__m64 x) { return ((__v2sf) x)[0]; } Is there any reason why it shouldn't work for -msse2 -mno-mmx?
On Sun, Feb 17, 2019 at 6:15 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Sat, Feb 16, 2019 at 11:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > > > PR target/89021 > > > > > * config/i386/i386.c (ix86_expand_vector_init_duplicate): Set > > > > > mmx_ok to true if TARGET_MMX_WITH_SSE is true. > > > > > (ix86_expand_vector_init_one_nonzero): Likewise. > > > > > (ix86_expand_vector_init_one_var): Likewise. > > > > > (ix86_expand_vector_init_general): Likewise. > > > > > (ix86_expand_vector_init): Likewise. > > > > > (ix86_expand_vector_set): Likewise. > > > > > (ix86_expand_vector_extract): Likewise. > > > > > > > > Please move this initialization to .md files. We don't support V2SF, so > > > > > > > > (define_expand "vec_setv2sf" > > > > [(match_operand:V2SF 0 "register_operand") > > > > (match_operand:SF 1 "register_operand") > > > > (match_operand 2 "const_int_operand")] > > > > "TARGET_MMX" > > > > { > > > > ix86_expand_vector_set (false, operands[0], operands[1], > > > > INTVAL (operands[2])); > > > > DONE; > > > > }) > > > > > > > > should remain false, while in: > > > > > > > > (define_expand "vec_setv2si" > > > > [(match_operand:V2SI 0 "register_operand") > > > > (match_operand:SI 1 "register_operand") > > > > (match_operand 2 "const_int_operand")] > > > > "TARGET_MMX" > > > > { > > > > ix86_expand_vector_set (false, operands[0], operands[1], > > > > INTVAL (operands[2])); > > > > DONE; > > > > }) > > > > > > > > we should use TARGET_MMX_WITH_SSE instead of false in the call to > > > > ix86_expand_vector_set. The same change for the other MMX modes: V4HI > > > > and V8QI. > > > > > > > > Same for other exported vector initializers: ix86_expand_vector_init > > > > and ix86_expand_vector_extract. We support V2SI, V4HI and V8QI, but > > > > not V2SF (non-MMX modes are agnostic to mmx_ok argument). > > > > > > > > > > There is limited support for V2SF with MMX. My goal is to emulate > > > all MMX features with SSE so that -msse2 -mno-mmx works for all > > > MMX codes which don't use MMX inline asm. Leaving out V2SF > > > emulation creates a hole in my implementation. Do you have testcases > > > to show issues with V2SF emulation? > > > > No, but V2SF should be avoided from emulation. > > Then -msse2 -mno-mmx won't work on some MMX codes like: > > #include <mmintrin.h> > > float > foo (__m64 x) > { > return ((__v2sf) x)[0]; > } > > Is there any reason why it shouldn't work for -msse2 -mno-mmx? Does this require 3DNOW insn? If not, it is OK also for V2SF. Uros.
On Sun, Feb 17, 2019 at 9:22 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Sun, Feb 17, 2019 at 6:15 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > On Sat, Feb 16, 2019 at 11:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > > > > > PR target/89021 > > > > > > * config/i386/i386.c (ix86_expand_vector_init_duplicate): Set > > > > > > mmx_ok to true if TARGET_MMX_WITH_SSE is true. > > > > > > (ix86_expand_vector_init_one_nonzero): Likewise. > > > > > > (ix86_expand_vector_init_one_var): Likewise. > > > > > > (ix86_expand_vector_init_general): Likewise. > > > > > > (ix86_expand_vector_init): Likewise. > > > > > > (ix86_expand_vector_set): Likewise. > > > > > > (ix86_expand_vector_extract): Likewise. > > > > > > > > > > Please move this initialization to .md files. We don't support V2SF, so > > > > > > > > > > (define_expand "vec_setv2sf" > > > > > [(match_operand:V2SF 0 "register_operand") > > > > > (match_operand:SF 1 "register_operand") > > > > > (match_operand 2 "const_int_operand")] > > > > > "TARGET_MMX" > > > > > { > > > > > ix86_expand_vector_set (false, operands[0], operands[1], > > > > > INTVAL (operands[2])); > > > > > DONE; > > > > > }) > > > > > > > > > > should remain false, while in: > > > > > > > > > > (define_expand "vec_setv2si" > > > > > [(match_operand:V2SI 0 "register_operand") > > > > > (match_operand:SI 1 "register_operand") > > > > > (match_operand 2 "const_int_operand")] > > > > > "TARGET_MMX" > > > > > { > > > > > ix86_expand_vector_set (false, operands[0], operands[1], > > > > > INTVAL (operands[2])); > > > > > DONE; > > > > > }) > > > > > > > > > > we should use TARGET_MMX_WITH_SSE instead of false in the call to > > > > > ix86_expand_vector_set. The same change for the other MMX modes: V4HI > > > > > and V8QI. > > > > > > > > > > Same for other exported vector initializers: ix86_expand_vector_init > > > > > and ix86_expand_vector_extract. We support V2SI, V4HI and V8QI, but > > > > > not V2SF (non-MMX modes are agnostic to mmx_ok argument). > > > > > > > > > > > > > There is limited support for V2SF with MMX. My goal is to emulate > > > > all MMX features with SSE so that -msse2 -mno-mmx works for all > > > > MMX codes which don't use MMX inline asm. Leaving out V2SF > > > > emulation creates a hole in my implementation. Do you have testcases > > > > to show issues with V2SF emulation? > > > > > > No, but V2SF should be avoided from emulation. > > > > Then -msse2 -mno-mmx won't work on some MMX codes like: > > > > #include <mmintrin.h> > > > > float > > foo (__m64 x) > > { > > return ((__v2sf) x)[0]; > > } > > > > Is there any reason why it shouldn't work for -msse2 -mno-mmx? > > Does this require 3DNOW insn? If not, it is OK also for V2SF. V2SF vector manipulation, like init, dup, extract, insert, doesn't require 3DNOW. They are tested with -msse2 -mo-mmx in my tests for MMX intrinsic emulations with SSE.
On Sun, Feb 17, 2019 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > On Sat, Feb 16, 2019 at 11:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > > > > > > > PR target/89021 > > > > > > > * config/i386/i386.c (ix86_expand_vector_init_duplicate): Set > > > > > > > mmx_ok to true if TARGET_MMX_WITH_SSE is true. > > > > > > > (ix86_expand_vector_init_one_nonzero): Likewise. > > > > > > > (ix86_expand_vector_init_one_var): Likewise. > > > > > > > (ix86_expand_vector_init_general): Likewise. > > > > > > > (ix86_expand_vector_init): Likewise. > > > > > > > (ix86_expand_vector_set): Likewise. > > > > > > > (ix86_expand_vector_extract): Likewise. > > > > > > > > > > > > Please move this initialization to .md files. We don't support V2SF, so > > > > > > > > > > > > (define_expand "vec_setv2sf" > > > > > > [(match_operand:V2SF 0 "register_operand") > > > > > > (match_operand:SF 1 "register_operand") > > > > > > (match_operand 2 "const_int_operand")] > > > > > > "TARGET_MMX" > > > > > > { > > > > > > ix86_expand_vector_set (false, operands[0], operands[1], > > > > > > INTVAL (operands[2])); > > > > > > DONE; > > > > > > }) > > > > > > > > > > > > should remain false, while in: > > > > > > > > > > > > (define_expand "vec_setv2si" > > > > > > [(match_operand:V2SI 0 "register_operand") > > > > > > (match_operand:SI 1 "register_operand") > > > > > > (match_operand 2 "const_int_operand")] > > > > > > "TARGET_MMX" > > > > > > { > > > > > > ix86_expand_vector_set (false, operands[0], operands[1], > > > > > > INTVAL (operands[2])); > > > > > > DONE; > > > > > > }) > > > > > > > > > > > > we should use TARGET_MMX_WITH_SSE instead of false in the call to > > > > > > ix86_expand_vector_set. The same change for the other MMX modes: V4HI > > > > > > and V8QI. > > > > > > > > > > > > Same for other exported vector initializers: ix86_expand_vector_init > > > > > > and ix86_expand_vector_extract. We support V2SI, V4HI and V8QI, but > > > > > > not V2SF (non-MMX modes are agnostic to mmx_ok argument). > > > > > > > > > > > > > > > > There is limited support for V2SF with MMX. My goal is to emulate > > > > > all MMX features with SSE so that -msse2 -mno-mmx works for all > > > > > MMX codes which don't use MMX inline asm. Leaving out V2SF > > > > > emulation creates a hole in my implementation. Do you have testcases > > > > > to show issues with V2SF emulation? > > > > > > > > No, but V2SF should be avoided from emulation. > > > > > > Then -msse2 -mno-mmx won't work on some MMX codes like: > > > > > > #include <mmintrin.h> > > > > > > float > > > foo (__m64 x) > > > { > > > return ((__v2sf) x)[0]; > > > } > > > > > > Is there any reason why it shouldn't work for -msse2 -mno-mmx? > > > > Does this require 3DNOW insn? If not, it is OK also for V2SF. > > V2SF vector manipulation, like init, dup, extract, insert, doesn't > require 3DNOW. They are tested with -msse2 -mo-mmx in my > tests for MMX intrinsic emulations with SSE. That's OK then. Please move initialization to .md files, as suggested before. Uros.
On Sun, Feb 17, 2019 at 10:50 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Sun, Feb 17, 2019 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > On Sat, Feb 16, 2019 at 11:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > > > > > > > > > PR target/89021 > > > > > > > > * config/i386/i386.c (ix86_expand_vector_init_duplicate): Set > > > > > > > > mmx_ok to true if TARGET_MMX_WITH_SSE is true. > > > > > > > > (ix86_expand_vector_init_one_nonzero): Likewise. > > > > > > > > (ix86_expand_vector_init_one_var): Likewise. > > > > > > > > (ix86_expand_vector_init_general): Likewise. > > > > > > > > (ix86_expand_vector_init): Likewise. > > > > > > > > (ix86_expand_vector_set): Likewise. > > > > > > > > (ix86_expand_vector_extract): Likewise. > > > > > > > > > > > > > > Please move this initialization to .md files. We don't support V2SF, so > > > > > > > > > > > > > > (define_expand "vec_setv2sf" > > > > > > > [(match_operand:V2SF 0 "register_operand") > > > > > > > (match_operand:SF 1 "register_operand") > > > > > > > (match_operand 2 "const_int_operand")] > > > > > > > "TARGET_MMX" > > > > > > > { > > > > > > > ix86_expand_vector_set (false, operands[0], operands[1], > > > > > > > INTVAL (operands[2])); > > > > > > > DONE; > > > > > > > }) > > > > > > > > > > > > > > should remain false, while in: > > > > > > > > > > > > > > (define_expand "vec_setv2si" > > > > > > > [(match_operand:V2SI 0 "register_operand") > > > > > > > (match_operand:SI 1 "register_operand") > > > > > > > (match_operand 2 "const_int_operand")] > > > > > > > "TARGET_MMX" > > > > > > > { > > > > > > > ix86_expand_vector_set (false, operands[0], operands[1], > > > > > > > INTVAL (operands[2])); > > > > > > > DONE; > > > > > > > }) > > > > > > > > > > > > > > we should use TARGET_MMX_WITH_SSE instead of false in the call to > > > > > > > ix86_expand_vector_set. The same change for the other MMX modes: V4HI > > > > > > > and V8QI. > > > > > > > > > > > > > > Same for other exported vector initializers: ix86_expand_vector_init > > > > > > > and ix86_expand_vector_extract. We support V2SI, V4HI and V8QI, but > > > > > > > not V2SF (non-MMX modes are agnostic to mmx_ok argument). > > > > > > > > > > > > > > > > > > > There is limited support for V2SF with MMX. My goal is to emulate > > > > > > all MMX features with SSE so that -msse2 -mno-mmx works for all > > > > > > MMX codes which don't use MMX inline asm. Leaving out V2SF > > > > > > emulation creates a hole in my implementation. Do you have testcases > > > > > > to show issues with V2SF emulation? > > > > > > > > > > No, but V2SF should be avoided from emulation. > > > > > > > > Then -msse2 -mno-mmx won't work on some MMX codes like: > > > > > > > > #include <mmintrin.h> > > > > > > > > float > > > > foo (__m64 x) > > > > { > > > > return ((__v2sf) x)[0]; > > > > } > > > > > > > > Is there any reason why it shouldn't work for -msse2 -mno-mmx? > > > > > > Does this require 3DNOW insn? If not, it is OK also for V2SF. > > > > V2SF vector manipulation, like init, dup, extract, insert, doesn't > > require 3DNOW. They are tested with -msse2 -mo-mmx in my > > tests for MMX intrinsic emulations with SSE. > > That's OK then. Please move initialization to .md files, as suggested before. > I will give it a try.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a76c17beece..25e0dc43a9e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -42620,6 +42620,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, { bool ok; + mmx_ok |= TARGET_MMX_WITH_SSE; switch (mode) { case E_V2SImode: @@ -42779,6 +42780,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, bool use_vector_set = false; rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL; + mmx_ok |= TARGET_MMX_WITH_SSE; switch (mode) { case E_V2DImode: @@ -42972,6 +42974,7 @@ ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode, XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); + mmx_ok |= TARGET_MMX_WITH_SSE; switch (mode) { case E_V2DFmode: @@ -43357,6 +43360,7 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode, machine_mode quarter_mode = VOIDmode; int n, i; + mmx_ok |= TARGET_MMX_WITH_SSE; switch (mode) { case E_V2SFmode: @@ -43556,6 +43560,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) int i; rtx x; + mmx_ok |= TARGET_MMX_WITH_SSE; + /* Handle first initialization from vector elts. */ if (n_elts != XVECLEN (vals, 0)) { @@ -43655,6 +43661,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) machine_mode mmode = VOIDmode; rtx (*gen_blendm) (rtx, rtx, rtx, rtx); + mmx_ok |= TARGET_MMX_WITH_SSE; switch (mode) { case E_V2SFmode: @@ -44010,6 +44017,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) bool use_vec_extr = false; rtx tmp; + mmx_ok |= TARGET_MMX_WITH_SSE; switch (mode) { case E_V2SImode: diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index c612d6b9e5c..f98952fd8a0 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -555,14 +555,23 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "V2SF")]) -(define_insn "*vec_dupv2sf" - [(set (match_operand:V2SF 0 "register_operand" "=y") +(define_insn_and_split "*vec_dupv2sf" + [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv") (vec_duplicate:V2SF - (match_operand:SF 1 "register_operand" "0")))] - "TARGET_MMX" - "punpckldq\t%0, %0" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) + (match_operand:SF 1 "register_operand" "0,0,Yv")))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" + "@ + punpckldq\t%0, %0 + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(set (match_dup 0) + (vec_duplicate:V4SF (match_dup 1)))] + "operands[0] = lowpart_subreg (V4SFmode, operands[0], + GET_MODE (operands[0]));" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxcvt,ssemov,ssemov") + (set_attr "mode" "DI,TI,TI")]) (define_insn "*mmx_concatv2sf" [(set (match_operand:V2SF 0 "register_operand" "=y,y") @@ -580,7 +589,7 @@ [(match_operand:V2SF 0 "register_operand") (match_operand:SF 1 "register_operand") (match_operand 2 "const_int_operand")] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_set (false, operands[0], operands[1], INTVAL (operands[2])); @@ -594,11 +603,13 @@ (vec_select:SF (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m") (parallel [(const_int 0)])))] - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "#" "&& reload_completed" [(set (match_dup 0) (match_dup 1))] - "operands[1] = gen_lowpart (SFmode, operands[1]);") + "operands[1] = gen_lowpart (SFmode, operands[1]);" + [(set_attr "mmx_isa" "*,*,native,native,*,*")]) ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c @@ -607,7 +618,8 @@ (vec_select:SF (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o") (parallel [(const_int 1)])))] - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ punpckhdq\t%0, %0 %vmovshdup\t{%1, %0|%0, %1} @@ -617,6 +629,7 @@ # #" [(set_attr "isa" "*,sse3,noavx,*,*,*,*") + (set_attr "mmx_isa" "native,*,*,native,*,*,*") (set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov") (set (attr "length_immediate") (if_then_else (eq_attr "alternative" "2") @@ -634,7 +647,7 @@ (vec_select:SF (match_operand:V2SF 1 "memory_operand") (parallel [(const_int 1)])))] - "TARGET_MMX && reload_completed" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed" [(set (match_dup 0) (match_dup 1))] "operands[1] = adjust_address (operands[1], SFmode, 4);") @@ -642,7 +655,7 @@ [(match_operand:SF 0 "register_operand") (match_operand:V2SF 1 "register_operand") (match_operand 2 "const_int_operand")] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_extract (false, operands[0], operands[1], INTVAL (operands[2])); @@ -1526,7 +1539,7 @@ [(match_operand:V2SI 0 "register_operand") (match_operand:SI 1 "register_operand") (match_operand 2 "const_int_operand")] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_set (false, operands[0], operands[1], INTVAL (operands[2])); @@ -1540,11 +1553,13 @@ (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m") (parallel [(const_int 0)])))] - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "#" "&& reload_completed" [(set (match_dup 0) (match_dup 1))] - "operands[1] = gen_lowpart (SImode, operands[1]);") + "operands[1] = gen_lowpart (SImode, operands[1]);" + [(set_attr "mmx_isa" "*,*,native,native,*")]) ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c @@ -1553,7 +1568,8 @@ (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o") (parallel [(const_int 1)])))] - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ punpckhdq\t%0, %0 %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5} @@ -1562,6 +1578,7 @@ # #" [(set_attr "isa" "*,sse2,noavx,*,*,*") + (set_attr "mmx_isa" "native,*,*,native,*,*") (set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov") (set (attr "length_immediate") (if_then_else (eq_attr "alternative" "1,2") @@ -1575,7 +1592,7 @@ (vec_select:SI (match_operand:V2SI 1 "memory_operand") (parallel [(const_int 1)])))] - "TARGET_MMX && reload_completed" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed" [(set (match_dup 0) (match_dup 1))] "operands[1] = adjust_address (operands[1], SImode, 4);") @@ -1592,13 +1609,14 @@ { operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4); } - [(set_attr "isa" "*,sse2,*")]) + [(set_attr "isa" "*,sse2,*") + (set_attr "mmx_isa" "native,*,*")]) (define_expand "vec_extractv2sisi" [(match_operand:SI 0 "register_operand") (match_operand:V2SI 1 "register_operand") (match_operand 2 "const_int_operand")] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_extract (false, operands[0], operands[1], INTVAL (operands[2])); @@ -1618,7 +1636,7 @@ [(match_operand:V4HI 0 "register_operand") (match_operand:HI 1 "register_operand") (match_operand 2 "const_int_operand")] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_set (false, operands[0], operands[1], INTVAL (operands[2])); @@ -1629,7 +1647,7 @@ [(match_operand:HI 0 "register_operand") (match_operand:V4HI 1 "register_operand") (match_operand 2 "const_int_operand")] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_extract (false, operands[0], operands[1], INTVAL (operands[2])); @@ -1649,7 +1667,7 @@ [(match_operand:V8QI 0 "register_operand") (match_operand:QI 1 "register_operand") (match_operand 2 "const_int_operand")] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_set (false, operands[0], operands[1], INTVAL (operands[2])); @@ -1660,7 +1678,7 @@ [(match_operand:QI 0 "register_operand") (match_operand:V8QI 1 "register_operand") (match_operand 2 "const_int_operand")] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_extract (false, operands[0], operands[1], INTVAL (operands[2]));