Detect a pack-unpack pattern in GCC vectorizer and optimize it.

Message ID	CAK=A3=2ZcCUrMi60=ViCsr8qe0M8eMXiAsTjog3q1HBDuiBPLQ@mail.gmail.com
State	New
Headers	show Return-Path: <gcc-patches-return-365877-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:date:message-id:subject:from:to:cc:content-type; q=dns; s=default; b=Qik3Jr1l2XnCY0+sJ9GE5i6mi8x5OgwCbY5Ah7sCFvw cAl7o4CLO54zgaw/CThUhYgMxljOvq57BM9tN4CQV66sB42Zt0xYOVgkrheNQbFt RWDESyvk601DQOzVJFl82JsQlgjkzGFH5XxfHzWHGetzEVN+NmzpWqg0D6O1KqUM = Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org MIME-Version: 1.0 Date: Thu, 24 Apr 2014 10:24:00 -0700 Message-ID: <CAK=A3=2ZcCUrMi60=ViCsr8qe0M8eMXiAsTjog3q1HBDuiBPLQ@mail.gmail.com> Subject: [PATCH] Detect a pack-unpack pattern in GCC vectorizer and optimize it. From: Cong Hou <congh@google.com> To: GCC Patches <gcc-patches@gcc.gnu.org> Cc: Richard Biener <rguenther@suse.de> Content-Type: multipart/mixed; boundary=001a11c1e5a26c383a04f7cd1d6b

diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 117cdd0..e7143f1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2014-04-23 Cong Hou <congh@google.com> + + * tree-vect-stmts.c (detect_pack_unpack_pattern): New function. + (vect_gen_widened_results_half): Call detect_pack_unpack_pattern. + 2014-04-23 David Malcolm <dmalcolm@redhat.com> * is-a.h: Update comments to reflect the following changes to the diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 62b07f4..a8755b3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2014-04-23 Cong Hou <congh@google.com> + + * gcc.dg/vect/vect-125.c: New test. + 2014-04-23 Jeff Law <law@redhat.com> PR tree-optimization/60902 diff --git a/gcc/testsuite/gcc.dg/vect/vect-125.c b/gcc/testsuite/gcc.dg/vect/vect-125.c new file mode 100644 index 0000000..988dea6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-125.c @@ -0,0 +1,122 @@ +/* { dg-require-effective-target vect_int } */ + +#include <limits.h> +#include "tree-vect.h" + +#define N 64 + +char b[N]; +unsigned char c[N]; +short d[N]; +unsigned short e[N]; + +__attribute__((noinline)) void +test1 () +{ + int a[N]; + int i; + for (i = 0; i < N/2; i++) + { + a[i] = b[i*2]; + a[i+N/2] = b[i*2+1]; + } + for (i = 0; i < N/2; i++) + if (a[i] != b[i*2] || a[i+N/2] != b[i*2+1]) + abort (); + + for (i = 0; i < N/2; i++) + { + a[i] = c[i*2]; + a[i+N/2] = c[i*2+1]; + } + for (i = 0; i < N/2; i++) + if (a[i] != c[i*2] || a[i+N/2] != c[i*2+1]) + abort (); + + for (i = 0; i < N/2; i++) + { + a[i] = d[i*2]; + a[i+N/2] = d[i*2+1]; + } + for (i = 0; i < N/2; i++) + if (a[i] != d[i*2] || a[i+N/2] != d[i*2+1]) + abort (); + + for (i = 0; i < N/2; i++) + { + a[i] = e[i*2]; + a[i+N/2] = e[i*2+1]; + } + for (i = 0; i < N/2; i++) + if (a[i] != e[i*2] || a[i+N/2] != e[i*2+1]) + abort (); +} + +__attribute__((noinline)) void +test2 () +{ + unsigned int a[N]; + int i; + for (i = 0; i < N/2; i++) + { + a[i] = b[i*2]; + a[i+N/2] = b[i*2+1]; + } + for (i = 0; i < N/2; i++) + if (a[i] != b[i*2] || a[i+N/2] != b[i*2+1]) + abort (); + + for (i = 0; i < N/2; i++) + { + a[i] = c[i*2]; + a[i+N/2] = c[i*2+1]; + } + for (i = 0; i < N/2; i++) + if (a[i] != c[i*2] || a[i+N/2] != c[i*2+1]) + abort (); + + for (i = 0; i < N/2; i++) + { + a[i] = d[i*2]; + a[i+N/2] = d[i*2+1]; + } + for (i = 0; i < N/2; i++) + if (a[i] != d[i*2] || a[i+N/2] != d[i*2+1]) + abort (); + + for (i = 0; i < N/2; i++) + { + a[i] = e[i*2]; + a[i+N/2] = e[i*2+1]; + } + for (i = 0; i < N/2; i++) + if (a[i] != e[i*2] || a[i+N/2] != e[i*2+1]) + abort (); +} + +int +main () +{ + b[0] = CHAR_MIN; + c[0] = UCHAR_MAX; + d[0] = SHRT_MIN; + e[0] = USHRT_MAX; + + int i; + for (i = 1; i < N; i++) + { + b[i] = b[i-1] + 1; + c[i] = c[i-1] - 1; + d[i] = d[i-1] + 1; + e[i] = e[i-1] - 1; + } + + test1 (); + test2 (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "A pack-unpack pattern is recognized" 32 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 1a51d6d..d0cf1f4 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3191,6 +3191,174 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi, } +/* Function detect_pack_unpack_pattern + + Detect the following pattern: + + S1 vect3 = VEC_PERM_EXPR <vect1, vect2, { 0, 2, 4, ... }>; + or + S1 vect3 = VEC_PERM_EXPR <vect1, vect2, { 1, 3, 5, ... }>; + + S2 vect4 = [vec_unpack_lo_expr] vect3; + or/and + S3 vect5 = [vec_unpack_hi_expr] vect3; + + S1 is usually generated from accessing even/odd elements of an array + and S2/S3 are generated from type promotion. In this case S1 and S2/S3 can + be replaced by less expensive statements. */ + +static gimple +detect_pack_unpack_pattern (enum tree_code code, tree vec_oprnd, int op_type, + tree vec_dest, gimple_stmt_iterator *gsi, + gimple stmt) +{ + if ((code != VEC_UNPACK_LO_EXPR && code != VEC_UNPACK_HI_EXPR) + || op_type != unary_op) + return NULL; + + if (TREE_CODE (vec_oprnd) != SSA_NAME) + return NULL; + + gimple def_stmt = SSA_NAME_DEF_STMT (vec_oprnd); + if (!is_gimple_assign (def_stmt)) + return NULL; + enum tree_code def_code = gimple_assign_rhs_code (def_stmt); + if (def_code != VEC_PERM_EXPR) + return NULL; + + bool unpack_lo = (code == VEC_UNPACK_LO_EXPR); + tree perm_vec = gimple_assign_rhs3 (def_stmt); + gcc_assert (TREE_CODE (perm_vec) == VECTOR_CST); + + /* Check if VEC_PERM_EXPR is generated from accessing even/odd elements of + an array. */ + int nelts = VECTOR_CST_NELTS (perm_vec); + int odd_num = 0; + for (int i = 0; i < nelts / 2; i++) + { + tree elt = VECTOR_CST_ELT (perm_vec, + unpack_lo ? i : i + nelts / 2); + int elt_val = (HOST_WIDE_INT) TREE_INT_CST_LOW (elt); + if (!unpack_lo) + elt_val -= nelts; + + if (elt_val / 2 != i) + return NULL; + odd_num += elt_val & 1; + } + + bool even_elem = false; + if (odd_num == 0) + even_elem = true; + else if (odd_num != nelts / 2) + return NULL; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "A pack-unpack pattern is recognized.\n"); + + tree vec_oprnd0 = unpack_lo ? gimple_assign_rhs1 (def_stmt) + : gimple_assign_rhs2 (def_stmt); + tree vec_type = TREE_TYPE (vec_oprnd0); + tree scalar_type = lang_hooks.types.type_for_mode ( + GET_MODE_INNER (TYPE_MODE (vec_type)), + TYPE_UNSIGNED (vec_type)); + tree vectype_out = TREE_TYPE (vec_dest); + + if (even_elem) + { + /* If even elements are packed, and if the original values are unsigned, + build a bit_and statement to replace the pack-unpack statements. + If the original values are signed, build a lshift statement and a + rshift statement to replace the pack-unpack statements. */ + + if (TYPE_UNSIGNED (vec_type)) + { + tree *mask = XALLOCAVEC (tree, nelts); + unsigned HOST_WIDE_INT max_val = TREE_INT_CST_LOW ( + TYPE_MAX_VALUE (scalar_type)); + for (int k = 0; k < nelts; ++k) + mask[k] = build_int_cst (scalar_type, (k & 1) ? 0 : max_val); + tree vec_oprnd1 = build_vector (TREE_TYPE (vec_oprnd0), mask); + tree temp = make_temp_ssa_name (vec_type, NULL, "vect_temp"); + + gimple new_stmt1, new_stmt2; + new_stmt1 = gimple_build_assign_with_ops (BIT_AND_EXPR, temp, + vec_oprnd0, vec_oprnd1); + new_stmt2 = gimple_build_assign_with_ops (NOP_EXPR, vec_dest, + temp, NULL_TREE); + gimple_assign_set_lhs (new_stmt2, + make_ssa_name (vec_dest, new_stmt2)); + + vect_finish_stmt_generation (stmt, new_stmt1, gsi); + vect_finish_stmt_generation (stmt, new_stmt2, gsi); + + return new_stmt2; + } + else + { + tree scalar_itype = lang_hooks.types.type_for_mode ( + GET_MODE_INNER (TYPE_MODE (vectype_out)), 0); + tree vecitype = get_vectype_for_scalar_type (scalar_itype); + tree shift_oprnd = build_int_cst (scalar_type, + TYPE_PRECISION (scalar_type)); + + tree temp1 = make_temp_ssa_name (vecitype, NULL, "vect_temp"); + tree temp2 = make_temp_ssa_name (vecitype, NULL, "vect_temp"); + tree temp3 = make_temp_ssa_name (vecitype, NULL, "vect_temp"); + + gimple new_stmt1, new_stmt2, new_stmt3, new_stmt4; + new_stmt1 = gimple_build_assign_with_ops (NOP_EXPR, temp1, + vec_oprnd0, NULL); + new_stmt2 = gimple_build_assign_with_ops (LSHIFT_EXPR, temp2, + temp1, shift_oprnd); + new_stmt3 = gimple_build_assign_with_ops (RSHIFT_EXPR, temp3, + temp2, shift_oprnd); + new_stmt4 = gimple_build_assign_with_ops (NOP_EXPR, vec_dest, + temp3, NULL); + gimple_assign_set_lhs (new_stmt4, + make_ssa_name (vec_dest, new_stmt4)); + + vect_finish_stmt_generation (stmt, new_stmt1, gsi); + vect_finish_stmt_generation (stmt, new_stmt2, gsi); + vect_finish_stmt_generation (stmt, new_stmt3, gsi); + vect_finish_stmt_generation (stmt, new_stmt4, gsi); + + return new_stmt4; + } + } + else + { + /* If odd elements are packed, build a rshift statement to replace + the pack-unpack statements. */ + + gimple new_stmt1, new_stmt2, new_stmt3; + tree scalar_itype = lang_hooks.types.type_for_mode ( + GET_MODE_INNER (TYPE_MODE (vectype_out)), + TYPE_UNSIGNED (scalar_type)); + tree vecitype = get_vectype_for_scalar_type (scalar_itype); + tree temp1 = make_temp_ssa_name (vecitype, NULL, "vect_temp"); + tree temp2 = make_temp_ssa_name (vecitype, NULL, "vect_temp"); + tree shift_oprnd = build_int_cst (scalar_type, + TYPE_PRECISION (scalar_type)); + + new_stmt1 = gimple_build_assign_with_ops (NOP_EXPR, temp1, + vec_oprnd0, NULL); + new_stmt2 = gimple_build_assign_with_ops (RSHIFT_EXPR, temp2, + temp1, shift_oprnd); + new_stmt3 = gimple_build_assign_with_ops (NOP_EXPR, vec_dest, + temp2, NULL); + gimple_assign_set_lhs (new_stmt3, + make_ssa_name (vec_dest, new_stmt2)); + + vect_finish_stmt_generation (stmt, new_stmt1, gsi); + vect_finish_stmt_generation (stmt, new_stmt2, gsi); + vect_finish_stmt_generation (stmt, new_stmt3, gsi); + + return new_stmt3; + } +} + /* Function vect_gen_widened_results_half Create a vector stmt whose code, type, number of arguments, and result @@ -3223,10 +3391,15 @@ vect_gen_widened_results_half (enum tree_code code, } else { + if ((new_stmt = detect_pack_unpack_pattern ( + code, vec_oprnd0, op_type, vec_dest, gsi, stmt))) + return new_stmt; + /* Generic support */ gcc_assert (op_type == TREE_CODE_LENGTH (code)); if (op_type != binary_op) vec_oprnd1 = NULL; + new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0, vec_oprnd1); new_temp = make_ssa_name (vec_dest, new_stmt);

Detect a pack-unpack pattern in GCC vectorizer and optimize it.

Commit Message

Comments

Patch