Message ID | 20160420195359.GA3113@intel.com |
---|---|
State | New |
Headers | show |
On Wed, Apr 20, 2016 at 9:53 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: > Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is > standard AVX2 constants and all 1s in XImode is standard AVX512F constants, > pass mode to standard_sse_constant_p and standard_sse_constant_opcode > to check if all 1s is available for target. > > Tested on Linux/x86-64. OK for master? No. This patch should use "isa" attribute instead of adding even more similar patterns. Also, please leave MEM_P checks, the rare C->m move can be easily resolved by IRA. Also, the mode checks should be in the predicate, standard_sse_constant_p just validates if the constant is allowed by ISA. Uros. > BTW, it will be used to fix > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70155 > > > H.J. > --- > * config/i386/i386-protos.h (standard_sse_constant_p): Take > machine_mode with VOIDmode as default. > * config/i386/i386.c (standard_sse_constant_p): Get mode if > it is VOIDmode. Return 2 for all 1s of integer in supported > modes. > (ix86_expand_vector_move): Pass mode to standard_sse_constant_p. > * config/i386/i386.md (*movxi_internal_avx512f): Replace > vector_move_operand with nonimmediate_or_sse_const_operand and > use BC instead of C in constraint. Check register_operand > instead of MEM_P. Pass mode to standard_sse_constant_opcode. > (*movoi_internal_avx): Disabled for TARGET_AVX2. Check > register_operand instead of MEM_P. > (*movoi_internal_avx2): New pattern. > (*movti_internal_sse): Likewise. > (*movti_internal): Renamed to ... > (*movti_internal_sse2): This. Require SSE2. Use BC instead of > C in constraint. Check register_operand instead of MEM_P in > 32-bit mode. > --- > gcc/config/i386/i386-protos.h | 2 +- > gcc/config/i386/i386.c | 27 ++++++++--- > gcc/config/i386/i386.md | 104 ++++++++++++++++++++++++++++++++++++++++-- > 3 files changed, 121 insertions(+), 12 deletions(-) > > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > index ff47bc1..cf54189 100644 > --- a/gcc/config/i386/i386-protos.h > +++ b/gcc/config/i386/i386-protos.h > @@ -50,7 +50,7 @@ extern bool ix86_using_red_zone (void); > extern int standard_80387_constant_p (rtx); > extern const char *standard_80387_constant_opcode (rtx); > extern rtx standard_80387_constant_rtx (int); > -extern int standard_sse_constant_p (rtx); > +extern int standard_sse_constant_p (rtx, machine_mode = VOIDmode); > extern const char *standard_sse_constant_opcode (rtx_insn *, rtx); > extern bool symbolic_reference_mentioned_p (rtx); > extern bool extended_reg_mentioned_p (rtx); > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 6379313..dd951c2 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -10766,18 +10766,31 @@ standard_80387_constant_rtx (int idx) > in supported SSE/AVX vector mode. */ > > int > -standard_sse_constant_p (rtx x) > +standard_sse_constant_p (rtx x, machine_mode mode) > { > - machine_mode mode; > - > if (!TARGET_SSE) > return 0; > > - mode = GET_MODE (x); > - > + if (mode == VOIDmode) > + mode = GET_MODE (x); > + > if (x == const0_rtx || x == CONST0_RTX (mode)) > return 1; > - if (vector_all_ones_operand (x, mode)) > + if (CONST_INT_P (x)) > + { > + /* If mode != VOIDmode, standard_sse_constant_p must be called: > + 1. On TImode with SSE2. > + 2. On OImode with AVX2. > + 3. On XImode with AVX512F. > + */ > + if ((HOST_WIDE_INT) INTVAL (x) == HOST_WIDE_INT_M1 > + && (mode == VOIDmode > + || (mode == TImode && TARGET_SSE2) > + || (mode == OImode && TARGET_AVX2) > + || (mode == XImode && TARGET_AVX512F))) > + return 2; > + } > + else if (vector_all_ones_operand (x, mode)) > switch (mode) > { > case V16QImode: > @@ -18758,7 +18771,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) > && (CONSTANT_P (op1) > || (SUBREG_P (op1) > && CONSTANT_P (SUBREG_REG (op1)))) > - && !standard_sse_constant_p (op1)) > + && !standard_sse_constant_p (op1, mode)) > op1 = validize_mem (force_const_mem (mode, op1)); > > /* We need to check memory alignment for SSE mode since attribute > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index babd0a4..75227aa 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -1971,8 +1971,10 @@ > > (define_insn "*movxi_internal_avx512f" > [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,m") > - (match_operand:XI 1 "vector_move_operand" "C ,vm,v"))] > - "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > + (match_operand:XI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))] > + "TARGET_AVX512F > + && (register_operand (operands[0], XImode) > + || register_operand (operands[1], XImode))" > { > switch (which_alternative) > { > @@ -1996,7 +1998,10 @@ > (define_insn "*movoi_internal_avx" > [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m") > (match_operand:OI 1 "vector_move_operand" "C ,vm,v"))] > - "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > + "TARGET_AVX > + && !TARGET_AVX2 > + && (register_operand (operands[0], OImode) > + || register_operand (operands[1], OImode))" > { > switch (get_attr_type (insn)) > { > @@ -2042,10 +2047,62 @@ > ] > (const_string "OI")))]) > > -(define_insn "*movti_internal" > +(define_insn "*movoi_internal_avx2" > + [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m") > + (match_operand:OI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))] > + "TARGET_AVX2 > + && (register_operand (operands[0], OImode) > + || register_operand (operands[1], OImode))" > +{ > + switch (get_attr_type (insn)) > + { > + case TYPE_SSELOG1: > + return standard_sse_constant_opcode (insn, operands[1]); > + > + case TYPE_SSEMOV: > + if (misaligned_operand (operands[0], OImode) > + || misaligned_operand (operands[1], OImode)) > + { > + if (get_attr_mode (insn) == MODE_V8SF) > + return "vmovups\t{%1, %0|%0, %1}"; > + else if (get_attr_mode (insn) == MODE_XI) > + return "vmovdqu32\t{%1, %0|%0, %1}"; > + else > + return "vmovdqu\t{%1, %0|%0, %1}"; > + } > + else > + { > + if (get_attr_mode (insn) == MODE_V8SF) > + return "vmovaps\t{%1, %0|%0, %1}"; > + else if (get_attr_mode (insn) == MODE_XI) > + return "vmovdqa32\t{%1, %0|%0, %1}"; > + else > + return "vmovdqa\t{%1, %0|%0, %1}"; > + } > + > + default: > + gcc_unreachable (); > + } > +} > + [(set_attr "type" "sselog1,ssemov,ssemov") > + (set_attr "prefix" "vex") > + (set (attr "mode") > + (cond [(ior (match_operand 0 "ext_sse_reg_operand") > + (match_operand 1 "ext_sse_reg_operand")) > + (const_string "XI") > + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") > + (const_string "V8SF") > + (and (eq_attr "alternative" "2") > + (match_test "TARGET_SSE_TYPELESS_STORES")) > + (const_string "V8SF") > + ] > + (const_string "OI")))]) > + > +(define_insn "*movti_internal_sse" > [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,m") > (match_operand:TI 1 "general_operand" "riFo,re,C,vm,v"))] > "(TARGET_64BIT || TARGET_SSE) > + && !TARGET_SSE2 > && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > { > switch (get_attr_type (insn)) > @@ -2061,6 +2118,45 @@ > to stack may result in unaligned memory access. */ > if (misaligned_operand (operands[0], TImode) > || misaligned_operand (operands[1], TImode)) > + return "%vmovups\t{%1, %0|%0, %1}"; > + else > + return "%vmovaps\t{%1, %0|%0, %1}"; > + > + default: > + gcc_unreachable (); > + } > +} > + [(set_attr "isa" "x64,x64,*,*,*") > + (set_attr "type" "multi,multi,sselog1,ssemov,ssemov") > + (set_attr "prefix" "orig") > + (set (attr "mode") > + (cond [(eq_attr "alternative" "0,1") > + (const_string "DI")] > + (const_string "TI")))]) > + > +(define_insn "*movti_internal_sse2" > + [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v, v ,m") > + (match_operand:TI 1 "general_operand" "riFo,re,BC,vm,v"))] > + "TARGET_SSE2 > + && ((TARGET_64BIT > + && !(MEM_P (operands[0]) && MEM_P (operands[1]))) > + || (!TARGET_64BIT > + && (register_operand (operands[0], TImode) > + || register_operand (operands[1], TImode))))" > +{ > + switch (get_attr_type (insn)) > + { > + case TYPE_MULTI: > + return "#"; > + > + case TYPE_SSELOG1: > + return standard_sse_constant_opcode (insn, operands[1]); > + > + case TYPE_SSEMOV: > + /* TDmode values are passed as TImode on the stack. Moving them > + to stack may result in unaligned memory access. */ > + if (misaligned_operand (operands[0], TImode) > + || misaligned_operand (operands[1], TImode)) > { > if (get_attr_mode (insn) == MODE_V4SF) > return "%vmovups\t{%1, %0|%0, %1}"; > -- > 2.5.5 >
On Thu, Apr 21, 2016 at 9:37 AM, Uros Bizjak <ubizjak@gmail.com> wrote: > On Wed, Apr 20, 2016 at 9:53 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: >> Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is >> standard AVX2 constants and all 1s in XImode is standard AVX512F constants, >> pass mode to standard_sse_constant_p and standard_sse_constant_opcode >> to check if all 1s is available for target. >> >> Tested on Linux/x86-64. OK for master? > > No. > > This patch should use "isa" attribute instead of adding even more > similar patterns. Also, please leave MEM_P checks, the rare C->m move > can be easily resolved by IRA. Actually, register_operand checks are indeed better, please disregard MEM_P recommendation. Uros.
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index ff47bc1..cf54189 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -50,7 +50,7 @@ extern bool ix86_using_red_zone (void); extern int standard_80387_constant_p (rtx); extern const char *standard_80387_constant_opcode (rtx); extern rtx standard_80387_constant_rtx (int); -extern int standard_sse_constant_p (rtx); +extern int standard_sse_constant_p (rtx, machine_mode = VOIDmode); extern const char *standard_sse_constant_opcode (rtx_insn *, rtx); extern bool symbolic_reference_mentioned_p (rtx); extern bool extended_reg_mentioned_p (rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6379313..dd951c2 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -10766,18 +10766,31 @@ standard_80387_constant_rtx (int idx) in supported SSE/AVX vector mode. */ int -standard_sse_constant_p (rtx x) +standard_sse_constant_p (rtx x, machine_mode mode) { - machine_mode mode; - if (!TARGET_SSE) return 0; - mode = GET_MODE (x); - + if (mode == VOIDmode) + mode = GET_MODE (x); + if (x == const0_rtx || x == CONST0_RTX (mode)) return 1; - if (vector_all_ones_operand (x, mode)) + if (CONST_INT_P (x)) + { + /* If mode != VOIDmode, standard_sse_constant_p must be called: + 1. On TImode with SSE2. + 2. On OImode with AVX2. + 3. On XImode with AVX512F. + */ + if ((HOST_WIDE_INT) INTVAL (x) == HOST_WIDE_INT_M1 + && (mode == VOIDmode + || (mode == TImode && TARGET_SSE2) + || (mode == OImode && TARGET_AVX2) + || (mode == XImode && TARGET_AVX512F))) + return 2; + } + else if (vector_all_ones_operand (x, mode)) switch (mode) { case V16QImode: @@ -18758,7 +18771,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) && (CONSTANT_P (op1) || (SUBREG_P (op1) && CONSTANT_P (SUBREG_REG (op1)))) - && !standard_sse_constant_p (op1)) + && !standard_sse_constant_p (op1, mode)) op1 = validize_mem (force_const_mem (mode, op1)); /* We need to check memory alignment for SSE mode since attribute diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index babd0a4..75227aa 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1971,8 +1971,10 @@ (define_insn "*movxi_internal_avx512f" [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,m") - (match_operand:XI 1 "vector_move_operand" "C ,vm,v"))] - "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + (match_operand:XI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))] + "TARGET_AVX512F + && (register_operand (operands[0], XImode) + || register_operand (operands[1], XImode))" { switch (which_alternative) { @@ -1996,7 +1998,10 @@ (define_insn "*movoi_internal_avx" [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m") (match_operand:OI 1 "vector_move_operand" "C ,vm,v"))] - "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "TARGET_AVX + && !TARGET_AVX2 + && (register_operand (operands[0], OImode) + || register_operand (operands[1], OImode))" { switch (get_attr_type (insn)) { @@ -2042,10 +2047,62 @@ ] (const_string "OI")))]) -(define_insn "*movti_internal" +(define_insn "*movoi_internal_avx2" + [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m") + (match_operand:OI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))] + "TARGET_AVX2 + && (register_operand (operands[0], OImode) + || register_operand (operands[1], OImode))" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSELOG1: + return standard_sse_constant_opcode (insn, operands[1]); + + case TYPE_SSEMOV: + if (misaligned_operand (operands[0], OImode) + || misaligned_operand (operands[1], OImode)) + { + if (get_attr_mode (insn) == MODE_V8SF) + return "vmovups\t{%1, %0|%0, %1}"; + else if (get_attr_mode (insn) == MODE_XI) + return "vmovdqu32\t{%1, %0|%0, %1}"; + else + return "vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V8SF) + return "vmovaps\t{%1, %0|%0, %1}"; + else if (get_attr_mode (insn) == MODE_XI) + return "vmovdqa32\t{%1, %0|%0, %1}"; + else + return "vmovdqa\t{%1, %0|%0, %1}"; + } + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "vex") + (set (attr "mode") + (cond [(ior (match_operand 0 "ext_sse_reg_operand") + (match_operand 1 "ext_sse_reg_operand")) + (const_string "XI") + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (const_string "V8SF") + (and (eq_attr "alternative" "2") + (match_test "TARGET_SSE_TYPELESS_STORES")) + (const_string "V8SF") + ] + (const_string "OI")))]) + +(define_insn "*movti_internal_sse" [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,m") (match_operand:TI 1 "general_operand" "riFo,re,C,vm,v"))] "(TARGET_64BIT || TARGET_SSE) + && !TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2061,6 +2118,45 @@ to stack may result in unaligned memory access. */ if (misaligned_operand (operands[0], TImode) || misaligned_operand (operands[1], TImode)) + return "%vmovups\t{%1, %0|%0, %1}"; + else + return "%vmovaps\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "x64,x64,*,*,*") + (set_attr "type" "multi,multi,sselog1,ssemov,ssemov") + (set_attr "prefix" "orig") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1") + (const_string "DI")] + (const_string "TI")))]) + +(define_insn "*movti_internal_sse2" + [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v, v ,m") + (match_operand:TI 1 "general_operand" "riFo,re,BC,vm,v"))] + "TARGET_SSE2 + && ((TARGET_64BIT + && !(MEM_P (operands[0]) && MEM_P (operands[1]))) + || (!TARGET_64BIT + && (register_operand (operands[0], TImode) + || register_operand (operands[1], TImode))))" +{ + switch (get_attr_type (insn)) + { + case TYPE_MULTI: + return "#"; + + case TYPE_SSELOG1: + return standard_sse_constant_opcode (insn, operands[1]); + + case TYPE_SSEMOV: + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) { if (get_attr_mode (insn) == MODE_V4SF) return "%vmovups\t{%1, %0|%0, %1}";