Message ID | 20240202014228.268164-1-liwei@loongson.cn |
---|---|
State | New |
Headers | show |
Series | [v1] LoongArch: testsuite: Fix gcc.dg/vect/vect-reduc-mul_{1, 2}.c FAIL. | expand |
Pushed to r14-8784. 在 2024/2/2 上午9:42, Li Wei 写道: > This FAIL was introduced from r14-6908. The reason is that when merging > constant vector permutation implementations, the 128-bit matching situation > was not fully considered. In fact, the expansion of 128-bit vectors after > merging only supports value-based 4 elements set shuffle, so this time is a > complete implementation of the entire 128-bit vector constant permutation, > and some structural adjustments have also been made to the code. > > gcc/ChangeLog: > > * config/loongarch/loongarch.cc (loongarch_expand_vselect): Adjust. > (loongarch_expand_vselect_vconcat): Ditto. > (loongarch_try_expand_lsx_vshuf_const): New, use vshuf to implement > all 128-bit constant permutation situations. > (loongarch_expand_lsx_shuffle): Adjust and rename function name. > (loongarch_is_imm_set_shuffle): Renamed function name. > (loongarch_expand_vec_perm_even_odd): Function forward declaration. > (loongarch_expand_vec_perm_even_odd_1): Add implement for 128-bit > extract-even and extract-odd permutations. > (loongarch_is_odd_extraction): Delete. > (loongarch_is_even_extraction): Ditto. > (loongarch_expand_vec_perm_const): Adjust. > --- > gcc/config/loongarch/loongarch.cc | 218 ++++++++++++++++++++++-------- > 1 file changed, 163 insertions(+), 55 deletions(-) > > diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc > index 8bc18448753..61723844756 100644 > --- a/gcc/config/loongarch/loongarch.cc > +++ b/gcc/config/loongarch/loongarch.cc > @@ -8029,7 +8029,8 @@ struct expand_vec_perm_d > > static bool > loongarch_expand_vselect (rtx target, rtx op0, > - const unsigned char *perm, unsigned nelt) > + const unsigned char *perm, unsigned nelt, > + bool testing_p) > { > rtx rperm[MAX_VECT_LEN], x; > rtx_insn *insn; > @@ -8048,6 +8049,9 @@ loongarch_expand_vselect (rtx target, rtx op0, > remove_insn (insn); > return false; > } > + > + if (testing_p) > + remove_insn (insn); > return true; > } > > @@ -8055,7 +8059,8 @@ loongarch_expand_vselect (rtx target, rtx op0, > > static bool > loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, > - const unsigned char *perm, unsigned nelt) > + const unsigned char *perm, unsigned nelt, > + bool testing_p) > { > machine_mode v2mode; > rtx x; > @@ -8063,7 +8068,7 @@ loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, > if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) > return false; > x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); > - return loongarch_expand_vselect (target, x, perm, nelt); > + return loongarch_expand_vselect (target, x, perm, nelt, testing_p); > } > > static tree > @@ -8317,11 +8322,87 @@ loongarch_set_handled_components (sbitmap components) > #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" > #undef TARGET_ASM_ALIGNED_DI_OP > #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" > + > +/* Use the vshuf instruction to implement all 128-bit constant vector > + permuatation. */ > + > +static bool > +loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) > +{ > + int i; > + rtx target, op0, op1, sel, tmp; > + rtx rperm[MAX_VECT_LEN]; > + > + if (GET_MODE_SIZE (d->vmode) == 16) > + { > + target = d->target; > + op0 = d->op0; > + op1 = d->one_vector_p ? d->op0 : d->op1; > + > + if (GET_MODE (op0) != GET_MODE (op1) > + || GET_MODE (op0) != GET_MODE (target)) > + return false; > + > + if (d->testing_p) > + return true; > + > + for (i = 0; i < d->nelt; i += 1) > + rperm[i] = GEN_INT (d->perm[i]); > + > + if (d->vmode == E_V2DFmode) > + { > + sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); > + tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); > + emit_move_insn (tmp, sel); > + } > + else if (d->vmode == E_V4SFmode) > + { > + sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); > + tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); > + emit_move_insn (tmp, sel); > + } > + else > + { > + sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); > + emit_move_insn (d->target, sel); > + } > + > + switch (d->vmode) > + { > + case E_V2DFmode: > + emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0)); > + break; > + case E_V2DImode: > + emit_insn (gen_lsx_vshuf_d (target, target, op1, op0)); > + break; > + case E_V4SFmode: > + emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0)); > + break; > + case E_V4SImode: > + emit_insn (gen_lsx_vshuf_w (target, target, op1, op0)); > + break; > + case E_V8HImode: > + emit_insn (gen_lsx_vshuf_h (target, target, op1, op0)); > + break; > + case E_V16QImode: > + emit_insn (gen_lsx_vshuf_b (target, op1, op0, target)); > + break; > + default: > + break; > + } > + > + return true; > + } > + return false; > +} > + > /* Construct (set target (vec_select op0 (parallel selector))) and > - return true if that's a valid instruction in the active ISA. */ > + return true if that's a valid instruction in the active ISA. > + In fact, it matches the special constant vector with repeated > + 4-element sets. */ > > static bool > -loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) > +loongarch_is_imm_set_shuffle (struct expand_vec_perm_d *d) > { > rtx x, elts[MAX_VECT_LEN]; > rtvec v; > @@ -8340,6 +8421,9 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) > if (!loongarch_const_vector_shuffle_set_p (x, d->vmode)) > return false; > > + if (d->testing_p) > + return true; > + > x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x); > x = gen_rtx_SET (d->target, x); > > @@ -8352,6 +8436,27 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) > return true; > } > > +static bool > +loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *); > + > +/* Try to match and expand all kinds of 128-bit const vector permutation > + cases. */ > + > +static bool > +loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) > +{ > + if (!ISA_HAS_LSX && GET_MODE_SIZE (d->vmode) != 16) > + return false; > + > + if (loongarch_is_imm_set_shuffle (d)) > + return true; > + > + if (loongarch_expand_vec_perm_even_odd (d)) > + return true; > + > + return loongarch_try_expand_lsx_vshuf_const (d); > +} > + > /* Try to simplify a two vector permutation using 2 intra-lane interleave > insns and cross-lane shuffle for 32-byte vectors. */ > > @@ -8444,7 +8549,7 @@ loongarch_expand_vec_perm_interleave (struct expand_vec_perm_d *d) > return true; > } > > -/* Implement extract-even and extract-odd permutations. */ > +/* Implement 128-bit and 256-bit extract-even and extract-odd permutations. */ > > static bool > loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) > @@ -8459,6 +8564,50 @@ loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) > > switch (d->vmode) > { > + /* 128 bit. */ > + case E_V2DFmode: > + if (odd) > + emit_insn (gen_lsx_vilvh_d_f (d->target, d->op0, d->op1)); > + else > + emit_insn (gen_lsx_vilvl_d_f (d->target, d->op0, d->op1)); > + break; > + > + case E_V2DImode: > + if (odd) > + emit_insn (gen_lsx_vilvh_d (d->target, d->op0, d->op1)); > + else > + emit_insn (gen_lsx_vilvl_d (d->target, d->op0, d->op1)); > + break; > + > + case E_V4SFmode: > + if (odd) > + emit_insn (gen_lsx_vpickod_w_f (d->target, d->op0, d->op1)); > + else > + emit_insn (gen_lsx_vpickev_w_f (d->target, d->op0, d->op1)); > + break; > + > + case E_V4SImode: > + if (odd) > + emit_insn (gen_lsx_vpickod_w (d->target, d->op0, d->op1)); > + else > + emit_insn (gen_lsx_vpickev_w (d->target, d->op0, d->op1)); > + break; > + > + case E_V8HImode: > + if (odd) > + emit_insn (gen_lsx_vpickod_h (d->target, d->op0, d->op1)); > + else > + emit_insn (gen_lsx_vpickev_h (d->target, d->op0, d->op1)); > + break; > + > + case E_V16QImode: > + if (odd) > + emit_insn (gen_lsx_vpickod_b (d->target, d->op0, d->op1)); > + else > + emit_insn (gen_lsx_vpickev_b (d->target, d->op0, d->op1)); > + break; > + > + /* 256 bit. */ > case E_V4DFmode: > /* Shuffle the lanes around into { 0 4 2 6 } and { 1 5 3 7 }. */ > if (odd) > @@ -8533,7 +8682,7 @@ static bool > loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d) > { > unsigned i, odd, nelt = d->nelt; > - if (!ISA_HAS_LASX) > + if (!ISA_HAS_LASX && !ISA_HAS_LSX) > return false; > > odd = d->perm[0]; > @@ -8996,44 +9145,6 @@ loongarch_is_quad_duplicate (struct expand_vec_perm_d *d) > return result; > } > > -static bool > -loongarch_is_odd_extraction (struct expand_vec_perm_d *d) > -{ > - bool result = true; > - unsigned char buf = 1; > - > - for (int i = 0; i < d->nelt; i += 1) > - { > - if (buf != d->perm[i]) > - { > - result = false; > - break; > - } > - buf += 2; > - } > - > - return result; > -} > - > -static bool > -loongarch_is_even_extraction (struct expand_vec_perm_d *d) > -{ > - bool result = true; > - unsigned char buf = 0; > - > - for (int i = 0; i < d->nelt; i += 1) > - { > - if (buf != d->perm[i]) > - { > - result = false; > - break; > - } > - buf += 2; > - } > - > - return result; > -} > - > static bool > loongarch_is_extraction_permutation (struct expand_vec_perm_d *d) > { > @@ -9290,32 +9401,29 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d) > for (i = 1; i < d->nelt; i += 2) > perm2[i] += d->nelt; > if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, > - perm2, d->nelt)) > + perm2, d->nelt, d->testing_p)) > return true; > } > else > { > if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, > - d->perm, d->nelt)) > + d->perm, d->nelt, > + d->testing_p)) > return true; > > /* Try again with swapped operands. */ > for (i = 0; i < d->nelt; ++i) > perm2[i] = (d->perm[i] + d->nelt) & (2 * d->nelt - 1); > if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, > - perm2, d->nelt)) > + perm2, d->nelt, d->testing_p)) > return true; > } > > - if (loongarch_expand_lsx_shuffle (d)) > + if (loongarch_is_imm_set_shuffle (d)) > return true; > > - if (loongarch_is_odd_extraction (d) > - || loongarch_is_even_extraction (d)) > - { > - if (loongarch_expand_vec_perm_even_odd (d)) > - return true; > - } > + if (loongarch_expand_vec_perm_even_odd (d)) > + return true; > > if (loongarch_is_lasx_lowpart_interleave (d) > || loongarch_is_lasx_lowpart_interleave_2 (d)
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 8bc18448753..61723844756 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -8029,7 +8029,8 @@ struct expand_vec_perm_d static bool loongarch_expand_vselect (rtx target, rtx op0, - const unsigned char *perm, unsigned nelt) + const unsigned char *perm, unsigned nelt, + bool testing_p) { rtx rperm[MAX_VECT_LEN], x; rtx_insn *insn; @@ -8048,6 +8049,9 @@ loongarch_expand_vselect (rtx target, rtx op0, remove_insn (insn); return false; } + + if (testing_p) + remove_insn (insn); return true; } @@ -8055,7 +8059,8 @@ loongarch_expand_vselect (rtx target, rtx op0, static bool loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, - const unsigned char *perm, unsigned nelt) + const unsigned char *perm, unsigned nelt, + bool testing_p) { machine_mode v2mode; rtx x; @@ -8063,7 +8068,7 @@ loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) return false; x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); - return loongarch_expand_vselect (target, x, perm, nelt); + return loongarch_expand_vselect (target, x, perm, nelt, testing_p); } static tree @@ -8317,11 +8322,87 @@ loongarch_set_handled_components (sbitmap components) #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" #undef TARGET_ASM_ALIGNED_DI_OP #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" + +/* Use the vshuf instruction to implement all 128-bit constant vector + permuatation. */ + +static bool +loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) +{ + int i; + rtx target, op0, op1, sel, tmp; + rtx rperm[MAX_VECT_LEN]; + + if (GET_MODE_SIZE (d->vmode) == 16) + { + target = d->target; + op0 = d->op0; + op1 = d->one_vector_p ? d->op0 : d->op1; + + if (GET_MODE (op0) != GET_MODE (op1) + || GET_MODE (op0) != GET_MODE (target)) + return false; + + if (d->testing_p) + return true; + + for (i = 0; i < d->nelt; i += 1) + rperm[i] = GEN_INT (d->perm[i]); + + if (d->vmode == E_V2DFmode) + { + sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); + tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + } + else if (d->vmode == E_V4SFmode) + { + sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); + tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + } + else + { + sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); + emit_move_insn (d->target, sel); + } + + switch (d->vmode) + { + case E_V2DFmode: + emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0)); + break; + case E_V2DImode: + emit_insn (gen_lsx_vshuf_d (target, target, op1, op0)); + break; + case E_V4SFmode: + emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0)); + break; + case E_V4SImode: + emit_insn (gen_lsx_vshuf_w (target, target, op1, op0)); + break; + case E_V8HImode: + emit_insn (gen_lsx_vshuf_h (target, target, op1, op0)); + break; + case E_V16QImode: + emit_insn (gen_lsx_vshuf_b (target, op1, op0, target)); + break; + default: + break; + } + + return true; + } + return false; +} + /* Construct (set target (vec_select op0 (parallel selector))) and - return true if that's a valid instruction in the active ISA. */ + return true if that's a valid instruction in the active ISA. + In fact, it matches the special constant vector with repeated + 4-element sets. */ static bool -loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) +loongarch_is_imm_set_shuffle (struct expand_vec_perm_d *d) { rtx x, elts[MAX_VECT_LEN]; rtvec v; @@ -8340,6 +8421,9 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) if (!loongarch_const_vector_shuffle_set_p (x, d->vmode)) return false; + if (d->testing_p) + return true; + x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x); x = gen_rtx_SET (d->target, x); @@ -8352,6 +8436,27 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) return true; } +static bool +loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *); + +/* Try to match and expand all kinds of 128-bit const vector permutation + cases. */ + +static bool +loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) +{ + if (!ISA_HAS_LSX && GET_MODE_SIZE (d->vmode) != 16) + return false; + + if (loongarch_is_imm_set_shuffle (d)) + return true; + + if (loongarch_expand_vec_perm_even_odd (d)) + return true; + + return loongarch_try_expand_lsx_vshuf_const (d); +} + /* Try to simplify a two vector permutation using 2 intra-lane interleave insns and cross-lane shuffle for 32-byte vectors. */ @@ -8444,7 +8549,7 @@ loongarch_expand_vec_perm_interleave (struct expand_vec_perm_d *d) return true; } -/* Implement extract-even and extract-odd permutations. */ +/* Implement 128-bit and 256-bit extract-even and extract-odd permutations. */ static bool loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) @@ -8459,6 +8564,50 @@ loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) switch (d->vmode) { + /* 128 bit. */ + case E_V2DFmode: + if (odd) + emit_insn (gen_lsx_vilvh_d_f (d->target, d->op0, d->op1)); + else + emit_insn (gen_lsx_vilvl_d_f (d->target, d->op0, d->op1)); + break; + + case E_V2DImode: + if (odd) + emit_insn (gen_lsx_vilvh_d (d->target, d->op0, d->op1)); + else + emit_insn (gen_lsx_vilvl_d (d->target, d->op0, d->op1)); + break; + + case E_V4SFmode: + if (odd) + emit_insn (gen_lsx_vpickod_w_f (d->target, d->op0, d->op1)); + else + emit_insn (gen_lsx_vpickev_w_f (d->target, d->op0, d->op1)); + break; + + case E_V4SImode: + if (odd) + emit_insn (gen_lsx_vpickod_w (d->target, d->op0, d->op1)); + else + emit_insn (gen_lsx_vpickev_w (d->target, d->op0, d->op1)); + break; + + case E_V8HImode: + if (odd) + emit_insn (gen_lsx_vpickod_h (d->target, d->op0, d->op1)); + else + emit_insn (gen_lsx_vpickev_h (d->target, d->op0, d->op1)); + break; + + case E_V16QImode: + if (odd) + emit_insn (gen_lsx_vpickod_b (d->target, d->op0, d->op1)); + else + emit_insn (gen_lsx_vpickev_b (d->target, d->op0, d->op1)); + break; + + /* 256 bit. */ case E_V4DFmode: /* Shuffle the lanes around into { 0 4 2 6 } and { 1 5 3 7 }. */ if (odd) @@ -8533,7 +8682,7 @@ static bool loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d) { unsigned i, odd, nelt = d->nelt; - if (!ISA_HAS_LASX) + if (!ISA_HAS_LASX && !ISA_HAS_LSX) return false; odd = d->perm[0]; @@ -8996,44 +9145,6 @@ loongarch_is_quad_duplicate (struct expand_vec_perm_d *d) return result; } -static bool -loongarch_is_odd_extraction (struct expand_vec_perm_d *d) -{ - bool result = true; - unsigned char buf = 1; - - for (int i = 0; i < d->nelt; i += 1) - { - if (buf != d->perm[i]) - { - result = false; - break; - } - buf += 2; - } - - return result; -} - -static bool -loongarch_is_even_extraction (struct expand_vec_perm_d *d) -{ - bool result = true; - unsigned char buf = 0; - - for (int i = 0; i < d->nelt; i += 1) - { - if (buf != d->perm[i]) - { - result = false; - break; - } - buf += 2; - } - - return result; -} - static bool loongarch_is_extraction_permutation (struct expand_vec_perm_d *d) { @@ -9290,32 +9401,29 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d) for (i = 1; i < d->nelt; i += 2) perm2[i] += d->nelt; if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, - perm2, d->nelt)) + perm2, d->nelt, d->testing_p)) return true; } else { if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, - d->perm, d->nelt)) + d->perm, d->nelt, + d->testing_p)) return true; /* Try again with swapped operands. */ for (i = 0; i < d->nelt; ++i) perm2[i] = (d->perm[i] + d->nelt) & (2 * d->nelt - 1); if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, - perm2, d->nelt)) + perm2, d->nelt, d->testing_p)) return true; } - if (loongarch_expand_lsx_shuffle (d)) + if (loongarch_is_imm_set_shuffle (d)) return true; - if (loongarch_is_odd_extraction (d) - || loongarch_is_even_extraction (d)) - { - if (loongarch_expand_vec_perm_even_odd (d)) - return true; - } + if (loongarch_expand_vec_perm_even_odd (d)) + return true; if (loongarch_is_lasx_lowpart_interleave (d) || loongarch_is_lasx_lowpart_interleave_2 (d)