Message ID | 20240224031848.3866630-2-quic_apinski@quicinc.com |
---|---|
State | New |
Headers | show |
Series | [1/2] aarch64: Use fmov s/d/hN, FP_CST for some vector CST [PR113856] | expand |
Andrew Pinski <quic_apinski@quicinc.com> writes: > This enables construction of V4SF CST like `{1.0f, 1.0f, 0.0f, 0.0f}` > (and other fp enabled CSTs) by using `fmov v0.2s, 1.0` as the instruction > is designed to zero out the other bits. > This is a small extension on top of the code that creates fmov for the case > where the all but the first element is non-zero. Similarly to the second reply to 1/2, I think we should handle this by detecting when only the low 64 bits are nonzero, and then try to construct a simd_immediate_info for the low 64 bits. The technique is more general than just floats. The same thing would work for SVE too (if TARGET_SIMD). Thanks, Richard > Built and tested for aarch64-linux-gnu with no regressions. > > PR target/113856 > > gcc/ChangeLog: > > * config/aarch64/aarch64.cc (simd_immediate_info): Add bool to the > float mode constructor. Document modifier field for FMOV_SDH. > (aarch64_simd_valid_immediate): Recognize where the first half > of the const float vect is the same. > (aarch64_output_simd_mov_immediate): Handle the case where insn is > FMOV_SDH and modifier is MSL. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/fmov-zero-cst-3.c: New test. > > Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com> > --- > gcc/config/aarch64/aarch64.cc | 34 ++++++++++++++++--- > .../gcc.target/aarch64/fmov-zero-cst-3.c | 28 +++++++++++++++ > 2 files changed, 57 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c > > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index c4386591a9b..89bd0c5e5a6 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -130,7 +130,7 @@ struct simd_immediate_info > enum modifier_type { LSL, MSL }; > > simd_immediate_info () {} > - simd_immediate_info (scalar_float_mode, rtx, insn_type = MOV); > + simd_immediate_info (scalar_float_mode, rtx, insn_type = MOV, bool = false); > simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT, > insn_type = MOV, modifier_type = LSL, > unsigned int = 0); > @@ -153,6 +153,8 @@ struct simd_immediate_info > > /* The kind of shift modifier to use, and the number of bits to shift. > This is (LSL, 0) if no shift is needed. */ > + /* For FMOV_SDH, LSL says it is a single while MSL > + says if it is either .4h/.2s fmov. */ > modifier_type modifier; > unsigned int shift; > } mov; > @@ -173,12 +175,12 @@ struct simd_immediate_info > /* Construct a floating-point immediate in which each element has mode > ELT_MODE_IN and value VALUE_IN. */ > inline simd_immediate_info > -::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in, insn_type insn_in) > +::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in, insn_type insn_in, bool firsthalfsame) > : elt_mode (elt_mode_in), insn (insn_in) > { > gcc_assert (insn_in == MOV || insn_in == FMOV_SDH); > u.mov.value = value_in; > - u.mov.modifier = LSL; > + u.mov.modifier = firsthalfsame ? MSL : LSL; > u.mov.shift = 0; > } > > @@ -22944,10 +22946,23 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, > || aarch64_float_const_representable_p (elt)) > { > bool valid = true; > + bool firsthalfsame = false; > for (unsigned int i = 1; i < n_elts; i++) > { > rtx elt1 = CONST_VECTOR_ENCODED_ELT (op, i); > if (!aarch64_float_const_zero_rtx_p (elt1)) > + { > + if (i == 1) > + firsthalfsame = true; > + if (!firsthalfsame > + || i >= n_elts/2 > + || !rtx_equal_p (elt, elt1)) > + { > + valid = false; > + break; > + } > + } > + else if (firsthalfsame && i < n_elts/2) > { > valid = false; > break; > @@ -22957,7 +22972,8 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, > { > if (info) > *info = simd_immediate_info (elt_float_mode, elt, > - simd_immediate_info::FMOV_SDH); > + simd_immediate_info::FMOV_SDH, > + firsthalfsame); > return true; > } > } > @@ -25165,8 +25181,16 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width, > real_to_decimal_for_mode (float_buf, > CONST_DOUBLE_REAL_VALUE (info.u.mov.value), > buf_size, buf_size, 1, info.elt_mode); > - if (info.insn == simd_immediate_info::FMOV_SDH) > + if (info.insn == simd_immediate_info::FMOV_SDH > + && info.u.mov.modifier == simd_immediate_info::LSL) > snprintf (templ, sizeof (templ), "fmov\t%%%c0, %s", element_char, float_buf); > + else if (info.insn == simd_immediate_info::FMOV_SDH > + && info.u.mov.modifier == simd_immediate_info::MSL) > + { > + gcc_assert (element_char != 'd'); > + gcc_assert (lane_count > 2); > + snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s", lane_count/2, element_char, float_buf); > + } > else if (lane_count == 1) > snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf); > else > diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c b/gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c > new file mode 100644 > index 00000000000..7a78b6d3caf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c > @@ -0,0 +1,28 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mcmodel=tiny" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > +/* PR target/113856 */ > + > +#define vect64 __attribute__((vector_size(8) )) > +#define vect128 __attribute__((vector_size(16) )) > + > +/* > +** f2: > +** fmov v0.2s, 1.0e\+0 > +** ret > +*/ > +vect128 float f2() > +{ > + return (vect128 float){1.0f, 1.0f, 0, 0}; > +} > + > +/* > +** f3: > +** ldr q0, \.LC[0-9]+ > +** ret > +*/ > +vect128 float f3() > +{ > + return (vect128 float){1.0f, 1.0f, 1.0f, 0.0}; > +} > +
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index c4386591a9b..89bd0c5e5a6 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -130,7 +130,7 @@ struct simd_immediate_info enum modifier_type { LSL, MSL }; simd_immediate_info () {} - simd_immediate_info (scalar_float_mode, rtx, insn_type = MOV); + simd_immediate_info (scalar_float_mode, rtx, insn_type = MOV, bool = false); simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT, insn_type = MOV, modifier_type = LSL, unsigned int = 0); @@ -153,6 +153,8 @@ struct simd_immediate_info /* The kind of shift modifier to use, and the number of bits to shift. This is (LSL, 0) if no shift is needed. */ + /* For FMOV_SDH, LSL says it is a single while MSL + says if it is either .4h/.2s fmov. */ modifier_type modifier; unsigned int shift; } mov; @@ -173,12 +175,12 @@ struct simd_immediate_info /* Construct a floating-point immediate in which each element has mode ELT_MODE_IN and value VALUE_IN. */ inline simd_immediate_info -::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in, insn_type insn_in) +::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in, insn_type insn_in, bool firsthalfsame) : elt_mode (elt_mode_in), insn (insn_in) { gcc_assert (insn_in == MOV || insn_in == FMOV_SDH); u.mov.value = value_in; - u.mov.modifier = LSL; + u.mov.modifier = firsthalfsame ? MSL : LSL; u.mov.shift = 0; } @@ -22944,10 +22946,23 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, || aarch64_float_const_representable_p (elt)) { bool valid = true; + bool firsthalfsame = false; for (unsigned int i = 1; i < n_elts; i++) { rtx elt1 = CONST_VECTOR_ENCODED_ELT (op, i); if (!aarch64_float_const_zero_rtx_p (elt1)) + { + if (i == 1) + firsthalfsame = true; + if (!firsthalfsame + || i >= n_elts/2 + || !rtx_equal_p (elt, elt1)) + { + valid = false; + break; + } + } + else if (firsthalfsame && i < n_elts/2) { valid = false; break; @@ -22957,7 +22972,8 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, { if (info) *info = simd_immediate_info (elt_float_mode, elt, - simd_immediate_info::FMOV_SDH); + simd_immediate_info::FMOV_SDH, + firsthalfsame); return true; } } @@ -25165,8 +25181,16 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width, real_to_decimal_for_mode (float_buf, CONST_DOUBLE_REAL_VALUE (info.u.mov.value), buf_size, buf_size, 1, info.elt_mode); - if (info.insn == simd_immediate_info::FMOV_SDH) + if (info.insn == simd_immediate_info::FMOV_SDH + && info.u.mov.modifier == simd_immediate_info::LSL) snprintf (templ, sizeof (templ), "fmov\t%%%c0, %s", element_char, float_buf); + else if (info.insn == simd_immediate_info::FMOV_SDH + && info.u.mov.modifier == simd_immediate_info::MSL) + { + gcc_assert (element_char != 'd'); + gcc_assert (lane_count > 2); + snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s", lane_count/2, element_char, float_buf); + } else if (lane_count == 1) snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf); else diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c b/gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c new file mode 100644 index 00000000000..7a78b6d3caf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcmodel=tiny" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ +/* PR target/113856 */ + +#define vect64 __attribute__((vector_size(8) )) +#define vect128 __attribute__((vector_size(16) )) + +/* +** f2: +** fmov v0.2s, 1.0e\+0 +** ret +*/ +vect128 float f2() +{ + return (vect128 float){1.0f, 1.0f, 0, 0}; +} + +/* +** f3: +** ldr q0, \.LC[0-9]+ +** ret +*/ +vect128 float f3() +{ + return (vect128 float){1.0f, 1.0f, 1.0f, 0.0}; +} +
This enables construction of V4SF CST like `{1.0f, 1.0f, 0.0f, 0.0f}` (and other fp enabled CSTs) by using `fmov v0.2s, 1.0` as the instruction is designed to zero out the other bits. This is a small extension on top of the code that creates fmov for the case where the all but the first element is non-zero. Built and tested for aarch64-linux-gnu with no regressions. PR target/113856 gcc/ChangeLog: * config/aarch64/aarch64.cc (simd_immediate_info): Add bool to the float mode constructor. Document modifier field for FMOV_SDH. (aarch64_simd_valid_immediate): Recognize where the first half of the const float vect is the same. (aarch64_output_simd_mov_immediate): Handle the case where insn is FMOV_SDH and modifier is MSL. gcc/testsuite/ChangeLog: * gcc.target/aarch64/fmov-zero-cst-3.c: New test. Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com> --- gcc/config/aarch64/aarch64.cc | 34 ++++++++++++++++--- .../gcc.target/aarch64/fmov-zero-cst-3.c | 28 +++++++++++++++ 2 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c