Message ID | Y3iZZpCSBrzTZVP4@tucnak |
---|---|
State | New |
Headers | show |
Series | i386: Outline fast BF -> SF conversion and fix up sNaN handling in it [PR107628] | expand |
On Sat, Nov 19, 2022 at 9:53 AM Jakub Jelinek <jakub@redhat.com> wrote: > > On Fri, Oct 21, 2022 at 10:23:14AM +0200, Uros Bizjak wrote: > > OK, but now we have two more copies of a function that effectively > > extends BF to SF. Can you please split this utility function out and > > use it here and in cbranchbf4/cstorebf4? I'm talking about this part: > > > > + op = gen_lowpart (HImode, op1); > > + if (CONST_INT_P (op)) > > + op = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, > > + op1, BFmode); > > + else > > + { > > + rtx t1 = gen_reg_rtx (SImode); > > + emit_insn (gen_zero_extendhisi2 (t1, op)); > > + emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16))); > > + op = gen_lowpart (SFmode, t1); > > + } > > > > Taking this a bit further, it looks like a generic function to extend > > BF to SF, when extendbfsf2 named function is not defined. > > > > The above could be a follow-up patch, the proposed patch is OK. > > Sorry for the delay, only got to this now. > And I'm fixing the sNaN handling in it too. If the argument is a BFmode sNaN > constant, we want in this case just a SFmode sNaN constant, but > simplify_const_unary_operation (FLOAT_EXTEND, ...) > in that case returns NULL (as normally conversions of a sNaN to some > other float type should raise an exception). In this case we want > to bypass that, as we know the sNaN will be used immediately in the SFmode > comparison a few instructions later. The patch fixes it by just > simplifying the lowpart to HImode and its zero extension to SImode, then > force into a pseudo and do the left shift and subreg to SFmode on the > pseudo. CSE or combine can handle it later. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2022-11-19 Jakub Jelinek <jakub@redhat.com> > > PR target/107628 > * config/i386/i386-protos.h (ix86_expand_fast_convert_bf_to_sf): > Declare. > * config/i386/i386-expand.cc (ix86_expand_fast_convert_bf_to_sf): New > function. > * config/i386/i386.md (cbranchbf4, cstorebf4): Use it. > > * gcc.target/i386/pr107628.c: New test. OK. Thanks, Uros. > > --- gcc/config/i386/i386-protos.h.jj 2022-10-10 09:31:57.234987578 +0200 > +++ gcc/config/i386/i386-protos.h 2022-11-18 12:21:26.975706528 +0100 > @@ -227,6 +227,7 @@ extern void ix86_expand_atomic_fetch_op_ > bool, bool); > extern void ix86_expand_cmpxchg_loop (rtx *, rtx, rtx, rtx, rtx, rtx, > bool, rtx_code_label *); > +extern rtx ix86_expand_fast_convert_bf_to_sf (rtx); > > #ifdef TREE_CODE > extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); > --- gcc/config/i386/i386-expand.cc.jj 2022-11-11 08:15:45.452186618 +0100 > +++ gcc/config/i386/i386-expand.cc 2022-11-18 12:35:16.646193028 +0100 > @@ -24138,4 +24138,30 @@ ix86_expand_cmpxchg_loop (rtx *ptarget_b > *ptarget_bool = target_bool; > } > > +/* Convert a BFmode VAL to SFmode without signaling sNaNs. > + This is done by returning SF SUBREG of ((HI SUBREG) (VAL)) << 16. */ > + > +rtx > +ix86_expand_fast_convert_bf_to_sf (rtx val) > +{ > + rtx op = gen_lowpart (HImode, val), ret; > + if (CONST_INT_P (op)) > + { > + ret = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, > + val, BFmode); > + if (ret) > + return ret; > + /* FLOAT_EXTEND simplification will fail if VAL is a sNaN. */ > + ret = gen_reg_rtx (SImode); > + emit_move_insn (ret, GEN_INT (INTVAL (op) & 0xffff)); > + } > + else > + { > + ret = gen_reg_rtx (SImode); > + emit_insn (gen_zero_extendhisi2 (ret, op)); > + } > + emit_insn (gen_ashlsi3 (ret, ret, GEN_INT (16))); > + return gen_lowpart (SFmode, ret); > +} > + > #include "gt-i386-expand.h" > --- gcc/config/i386/i386.md.jj 2022-11-07 10:30:42.727630162 +0100 > +++ gcc/config/i386/i386.md 2022-11-18 12:22:25.172898912 +0100 > @@ -1668,28 +1668,8 @@ (define_expand "cbranchbf4" > (pc)))] > "" > { > - rtx op1 = gen_lowpart (HImode, operands[1]); > - if (CONST_INT_P (op1)) > - op1 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, > - operands[1], BFmode); > - else > - { > - rtx t1 = gen_reg_rtx (SImode); > - emit_insn (gen_zero_extendhisi2 (t1, op1)); > - emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16))); > - op1 = gen_lowpart (SFmode, t1); > - } > - rtx op2 = gen_lowpart (HImode, operands[2]); > - if (CONST_INT_P (op2)) > - op2 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, > - operands[2], BFmode); > - else > - { > - rtx t2 = gen_reg_rtx (SImode); > - emit_insn (gen_zero_extendhisi2 (t2, op2)); > - emit_insn (gen_ashlsi3 (t2, t2, GEN_INT (16))); > - op2 = gen_lowpart (SFmode, t2); > - } > + rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]); > + rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]); > do_compare_rtx_and_jump (op1, op2, GET_CODE (operands[0]), 0, > SFmode, NULL_RTX, NULL, > as_a <rtx_code_label *> (operands[3]), > @@ -1723,28 +1703,8 @@ (define_expand "cstorebf4" > (const_int 0)]))] > "" > { > - rtx op1 = gen_lowpart (HImode, operands[2]); > - if (CONST_INT_P (op1)) > - op1 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, > - operands[2], BFmode); > - else > - { > - rtx t1 = gen_reg_rtx (SImode); > - emit_insn (gen_zero_extendhisi2 (t1, op1)); > - emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16))); > - op1 = gen_lowpart (SFmode, t1); > - } > - rtx op2 = gen_lowpart (HImode, operands[3]); > - if (CONST_INT_P (op2)) > - op2 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, > - operands[3], BFmode); > - else > - { > - rtx t2 = gen_reg_rtx (SImode); > - emit_insn (gen_zero_extendhisi2 (t2, op2)); > - emit_insn (gen_ashlsi3 (t2, t2, GEN_INT (16))); > - op2 = gen_lowpart (SFmode, t2); > - } > + rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]); > + rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]); > rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]), > op1, op2, SFmode, 0, 1); > if (!rtx_equal_p (res, operands[0])) > --- gcc/testsuite/gcc.target/i386/pr107628.c.jj 2022-11-18 13:15:06.859061627 +0100 > +++ gcc/testsuite/gcc.target/i386/pr107628.c 2022-11-18 13:14:51.797270220 +0100 > @@ -0,0 +1,11 @@ > +/* PR target/107628 */ > +/* { dg-do compile } */ > +/* { dg-options "-fsignaling-nans -msse2" } */ > + > +typedef __bf16 __attribute__((__vector_size__ (2))) V; > + > +void > +foo (V v) > +{ > + v < (V) (short) 65436; > +} > > > Jakub >
--- gcc/config/i386/i386-protos.h.jj 2022-10-10 09:31:57.234987578 +0200 +++ gcc/config/i386/i386-protos.h 2022-11-18 12:21:26.975706528 +0100 @@ -227,6 +227,7 @@ extern void ix86_expand_atomic_fetch_op_ bool, bool); extern void ix86_expand_cmpxchg_loop (rtx *, rtx, rtx, rtx, rtx, rtx, bool, rtx_code_label *); +extern rtx ix86_expand_fast_convert_bf_to_sf (rtx); #ifdef TREE_CODE extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); --- gcc/config/i386/i386-expand.cc.jj 2022-11-11 08:15:45.452186618 +0100 +++ gcc/config/i386/i386-expand.cc 2022-11-18 12:35:16.646193028 +0100 @@ -24138,4 +24138,30 @@ ix86_expand_cmpxchg_loop (rtx *ptarget_b *ptarget_bool = target_bool; } +/* Convert a BFmode VAL to SFmode without signaling sNaNs. + This is done by returning SF SUBREG of ((HI SUBREG) (VAL)) << 16. */ + +rtx +ix86_expand_fast_convert_bf_to_sf (rtx val) +{ + rtx op = gen_lowpart (HImode, val), ret; + if (CONST_INT_P (op)) + { + ret = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, + val, BFmode); + if (ret) + return ret; + /* FLOAT_EXTEND simplification will fail if VAL is a sNaN. */ + ret = gen_reg_rtx (SImode); + emit_move_insn (ret, GEN_INT (INTVAL (op) & 0xffff)); + } + else + { + ret = gen_reg_rtx (SImode); + emit_insn (gen_zero_extendhisi2 (ret, op)); + } + emit_insn (gen_ashlsi3 (ret, ret, GEN_INT (16))); + return gen_lowpart (SFmode, ret); +} + #include "gt-i386-expand.h" --- gcc/config/i386/i386.md.jj 2022-11-07 10:30:42.727630162 +0100 +++ gcc/config/i386/i386.md 2022-11-18 12:22:25.172898912 +0100 @@ -1668,28 +1668,8 @@ (define_expand "cbranchbf4" (pc)))] "" { - rtx op1 = gen_lowpart (HImode, operands[1]); - if (CONST_INT_P (op1)) - op1 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, - operands[1], BFmode); - else - { - rtx t1 = gen_reg_rtx (SImode); - emit_insn (gen_zero_extendhisi2 (t1, op1)); - emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16))); - op1 = gen_lowpart (SFmode, t1); - } - rtx op2 = gen_lowpart (HImode, operands[2]); - if (CONST_INT_P (op2)) - op2 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, - operands[2], BFmode); - else - { - rtx t2 = gen_reg_rtx (SImode); - emit_insn (gen_zero_extendhisi2 (t2, op2)); - emit_insn (gen_ashlsi3 (t2, t2, GEN_INT (16))); - op2 = gen_lowpart (SFmode, t2); - } + rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]); + rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]); do_compare_rtx_and_jump (op1, op2, GET_CODE (operands[0]), 0, SFmode, NULL_RTX, NULL, as_a <rtx_code_label *> (operands[3]), @@ -1723,28 +1703,8 @@ (define_expand "cstorebf4" (const_int 0)]))] "" { - rtx op1 = gen_lowpart (HImode, operands[2]); - if (CONST_INT_P (op1)) - op1 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, - operands[2], BFmode); - else - { - rtx t1 = gen_reg_rtx (SImode); - emit_insn (gen_zero_extendhisi2 (t1, op1)); - emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16))); - op1 = gen_lowpart (SFmode, t1); - } - rtx op2 = gen_lowpart (HImode, operands[3]); - if (CONST_INT_P (op2)) - op2 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, - operands[3], BFmode); - else - { - rtx t2 = gen_reg_rtx (SImode); - emit_insn (gen_zero_extendhisi2 (t2, op2)); - emit_insn (gen_ashlsi3 (t2, t2, GEN_INT (16))); - op2 = gen_lowpart (SFmode, t2); - } + rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]); + rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]); rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]), op1, op2, SFmode, 0, 1); if (!rtx_equal_p (res, operands[0])) --- gcc/testsuite/gcc.target/i386/pr107628.c.jj 2022-11-18 13:15:06.859061627 +0100 +++ gcc/testsuite/gcc.target/i386/pr107628.c 2022-11-18 13:14:51.797270220 +0100 @@ -0,0 +1,11 @@ +/* PR target/107628 */ +/* { dg-do compile } */ +/* { dg-options "-fsignaling-nans -msse2" } */ + +typedef __bf16 __attribute__((__vector_size__ (2))) V; + +void +foo (V v) +{ + v < (V) (short) 65436; +}