From patchwork Fri Dec 23 17:41:00 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 133103 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 0100DB71C9 for ; Sat, 24 Dec 2011 04:41:39 +1100 (EST) Received: (qmail 18379 invoked by alias); 23 Dec 2011 17:41:29 -0000 Received: (qmail 18353 invoked by uid 22791); 23 Dec 2011 17:41:22 -0000 X-SWARE-Spam-Status: No, hits=-2.2 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW, TW_CP X-Spam-Check-By: sourceware.org Received: from mail-vx0-f175.google.com (HELO mail-vx0-f175.google.com) (209.85.220.175) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 23 Dec 2011 17:41:05 +0000 Received: by vcbf1 with SMTP id f1so7078091vcb.20 for ; Fri, 23 Dec 2011 09:41:04 -0800 (PST) Received: by 10.220.155.19 with SMTP id q19mr10384836vcw.58.1324662064428; Fri, 23 Dec 2011 09:41:04 -0800 (PST) Received: from anchor.twiddle.home ([173.160.232.49]) by mx.google.com with ESMTPS id d1sm9917666vdj.22.2011.12.23.09.41.01 (version=TLSv1/SSLv3 cipher=OTHER); Fri, 23 Dec 2011 09:41:02 -0800 (PST) Message-ID: <4EF4BD2C.1070407@redhat.com> Date: Fri, 23 Dec 2011 09:41:00 -0800 From: Richard Henderson User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:8.0) Gecko/20111115 Thunderbird/8.0 MIME-Version: 1.0 To: mingjie.xing@gmail.com, gcc-patches@gcc.gnu.org, rdsandiford@googlemail.com Subject: Re: [PATCH v3 00/10] MIPS vectorization improvements References: <1324486822-18225-1-git-send-email-rth@redhat.com> <87hb0sz68y.fsf@firetop.home> In-Reply-To: <87hb0sz68y.fsf@firetop.home> X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On 12/22/2011 12:44 PM, Richard Sandiford wrote: > Woah, thanks, that's quite some work. OK for the patches I didn't > respond to. Here's a combined follow-on patch that I believe addresses all of the comments you had. Ok? r~ commit 824b5ca31ea21bb02cedabf79bb98e4348c34366 Author: Richard Henderson Date: Thu Dec 22 12:23:03 2011 -0800 mips: Feedback from rsandiford. diff --git a/gcc/config/mips/mips-modes.def b/gcc/config/mips/mips-modes.def index 85861a9..187c651 100644 --- a/gcc/config/mips/mips-modes.def +++ b/gcc/config/mips/mips-modes.def @@ -26,15 +26,15 @@ RESET_FLOAT_FORMAT (DF, mips_double_format); FLOAT_MODE (TF, 16, mips_quad_format); /* Vector modes. */ -VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ -VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ -VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ /* Double-sized vector modes for vec_concat. */ -VECTOR_MODE (INT, QI, 16); -VECTOR_MODE (INT, HI, 8); -VECTOR_MODE (INT, SI, 4); -VECTOR_MODE (FLOAT, SF, 4); +VECTOR_MODE (INT, QI, 16); /* V16QI */ +VECTOR_MODE (INT, HI, 8); /* V8HI */ +VECTOR_MODE (INT, SI, 4); /* V4SI */ +VECTOR_MODE (FLOAT, SF, 4); /* V4SF */ VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */ VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */ diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index bc76078..94d2c2f 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -4638,7 +4638,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum, /* The EABI conventions have traditionally been defined in terms of TYPE_MODE, regardless of the actual type. */ info->fpr_p = ((GET_MODE_CLASS (mode) == MODE_FLOAT - || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + || mode == V2SFmode) && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE); break; @@ -4653,7 +4653,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum, || SCALAR_FLOAT_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)) && (GET_MODE_CLASS (mode) == MODE_FLOAT - || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + || mode == V2SFmode) && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE); break; @@ -4666,7 +4666,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum, && (type == 0 || FLOAT_TYPE_P (type)) && (GET_MODE_CLASS (mode) == MODE_FLOAT || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT - || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + || mode == V2SFmode) && GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FPVALUE); /* ??? According to the ABI documentation, the real and imaginary @@ -5103,7 +5103,7 @@ static bool mips_return_mode_in_fpr_p (enum machine_mode mode) { return ((GET_MODE_CLASS (mode) == MODE_FLOAT - || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + || mode == V2SFmode || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) && GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_HWFPVALUE); } @@ -10786,8 +10786,14 @@ mips_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, enum reg_class rclass) { - /* There are several problems with changing the modes of values in - floating-point registers: + /* Allow conversions between different Loongson integer vectors, + and between those vectors and DImode. */ + if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8 + && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to)) + return false; + + /* Otherwise, there are several problems with changing the modes of + values in floating-point registers: - When a multi-word value is stored in paired floating-point registers, the first register always holds the low word. We @@ -10809,12 +10815,6 @@ mips_cannot_change_mode_class (enum machine_mode from, We therefore disallow all mode changes involving FPRs. */ - /* Except for Loongson and its integral vectors. We need to be able - to change between those modes easily. */ - if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8 - && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to)) - return false; - return reg_classes_intersect_p (FP_REGS, rclass); } @@ -16352,7 +16352,8 @@ struct expand_vec_perm_d return true if that's a valid instruction in the active ISA. */ static bool -expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt) +mips_expand_vselect (rtx target, rtx op0, + const unsigned char *perm, unsigned nelt) { rtx rperm[MAX_VECT_LEN], x; unsigned i; @@ -16376,15 +16377,15 @@ expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt) /* Similar, but generate a vec_concat from op0 and op1 as well. */ static bool -expand_vselect_vconcat (rtx target, rtx op0, rtx op1, - const unsigned char *perm, unsigned nelt) +mips_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, + const unsigned char *perm, unsigned nelt) { enum machine_mode v2mode; rtx x; v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0)); x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); - return expand_vselect (target, x, perm, nelt); + return mips_expand_vselect (target, x, perm, nelt); } /* Recognize patterns for even-odd extraction. */ @@ -16525,18 +16526,19 @@ mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) memcpy (perm2, d->perm, sizeof(perm2)); for (i = 1; i < nelt; i += 2) perm2[i] += nelt; - if (expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt)) + if (mips_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt)) return true; } else { - if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt)) + if (mips_expand_vselect_vconcat (d->target, d->op0, d->op1, + d->perm, nelt)) return true; /* Try again with swapped operands. */ for (i = 0; i < nelt; ++i) perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1); - if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt)) + if (mips_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt)) return true; } @@ -16556,7 +16558,9 @@ mips_expand_vec_perm_const (rtx operands[4]) { struct expand_vec_perm_d d; int i, nelt, which; + unsigned char orig_perm[MAX_VECT_LEN]; rtx sel; + bool ok; d.target = operands[0]; d.op0 = operands[1]; @@ -16573,8 +16577,9 @@ mips_expand_vec_perm_const (rtx operands[4]) rtx e = XVECEXP (sel, 0, i); int ei = INTVAL (e) & (2 * nelt - 1); which |= (ei < nelt ? 1 : 2); - d.perm[i] = ei; + orig_perm[i] = ei; } + memcpy (d.perm, orig_perm, MAX_VECT_LEN); switch (which) { @@ -16585,14 +16590,8 @@ mips_expand_vec_perm_const (rtx operands[4]) d.one_vector_p = false; if (!rtx_equal_p (d.op0, d.op1)) break; - - /* The backend (vec_select (vec_concat)) patterns are not duplicated - for single-operand. Try once with the original un-folded selector. */ - if (mips_expand_vec_perm_const_1 (&d)) - return true; - - /* Try again after folding the selector to a single operand. */ /* FALLTHRU */ + case 2: for (i = 0; i < nelt; ++i) d.perm[i] &= nelt - 1; @@ -16606,7 +16605,25 @@ mips_expand_vec_perm_const (rtx operands[4]) break; } - return mips_expand_vec_perm_const_1 (&d); + ok = mips_expand_vec_perm_const_1 (&d); + + /* If we were given a two-vector permutation which just happened to + have both input vectors equal, we folded this into a one-vector + permutation. There are several loongson patterns that are matched + via direct vec_select+vec_concat expansion, but we do not have + support in mips_expand_vec_perm_const_1 to guess the adjustment + that should be made for a single operand. Just try again with + the original permutation. */ + if (!ok && which == 3) + { + d.op0 = operands[1]; + d.op1 = operands[2]; + d.one_vector_p = false; + memcpy (d.perm, orig_perm, MAX_VECT_LEN); + ok = mips_expand_vec_perm_const_1 (&d); + } + + return ok; } /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ @@ -16732,7 +16749,7 @@ mips_expand_vi_broadcast (enum machine_mode vmode, rtx target, rtx elt) gcc_unreachable (); } - memset (&d, 0, sizeof(d)); + memset (&d, 0, sizeof (d)); d.target = target; d.op0 = t1; d.op1 = t1; @@ -16862,47 +16879,68 @@ mips_expand_vec_reduc (rtx target, rtx in, rtx (*gen)(rtx, rtx, rtx)) { enum machine_mode vmode = GET_MODE (in); unsigned char perm2[2]; - rtx tmp; + rtx last, next, fold, x; bool ok; - tmp = gen_reg_rtx (vmode); + last = in; + fold = gen_reg_rtx (vmode); switch (vmode) { case V2SFmode: /* Use PUL/PLU to produce { L, H } op { H, L }. - By reversing the pair order, rather a pure interleave high, - we don't produce erroneous exceptional conditions. */ + By reversing the pair order, rather than a pure interleave high, + we avoid erroneous exceptional conditions that we might otherwise + produce from the computation of H op H. */ perm2[0] = 1; perm2[1] = 2; - ok = expand_vselect_vconcat (tmp, in, in, perm2, 2); + ok = mips_expand_vselect_vconcat (fold, last, last, perm2, 2); gcc_assert (ok); break; case V2SImode: /* Use interleave to produce { H, L } op { H, H }. */ - emit_insn (gen_loongson_punpckhwd (tmp, in, in)); + emit_insn (gen_loongson_punpckhwd (fold, last, last)); break; case V4HImode: /* Perform the first reduction with interleave, and subsequent reductions with shifts. */ - emit_insn (gen_loongson_punpckhwd_hi (tmp, in, in)); - emit_insn (gen (in, in, tmp)); - emit_insn (gen_vec_shr_v4hi (tmp, in, force_reg (SImode, GEN_INT (16)))); + emit_insn (gen_loongson_punpckhwd_hi (fold, last, last)); + + next = gen_reg_rtx (vmode); + emit_insn (gen (next, last, fold)); + last = next; + + fold = gen_reg_rtx (vmode); + x = force_reg (SImode, GEN_INT (16)); + emit_insn (gen_vec_shr_v4hi (fold, last, x)); break; case V8QImode: - emit_insn (gen_loongson_punpckhwd_qi (tmp, in, in)); - emit_insn (gen (in, in, tmp)); - emit_insn (gen_vec_shr_v8qi (tmp, in, force_reg (SImode, GEN_INT (16)))); - emit_insn (gen (in, in, tmp)); - emit_insn (gen_vec_shr_v8qi (tmp, in, force_reg (SImode, GEN_INT (8)))); + emit_insn (gen_loongson_punpckhwd_qi (fold, last, last)); + + next = gen_reg_rtx (vmode); + emit_insn (gen (next, last, fold)); + last = next; + + fold = gen_reg_rtx (vmode); + x = force_reg (SImode, GEN_INT (16)); + emit_insn (gen_vec_shr_v8qi (fold, last, x)); + + next = gen_reg_rtx (vmode); + emit_insn (gen (next, last, fold)); + last = next; + + fold = gen_reg_rtx (vmode); + x = force_reg (SImode, GEN_INT (8)); + emit_insn (gen_vec_shr_v8qi (fold, last, x)); break; default: gcc_unreachable (); } - emit_insn (gen (target, in, tmp)); + + emit_insn (gen (target, last, fold)); } /* Expand a vector minimum/maximum. */