Message ID | mpt1ry0dmhq.fsf@arm.com |
---|---|
State | New |
Headers | show |
Series | Fold MASK_LOAD/STORE with an all-true mask | expand |
On Mon, Aug 5, 2019 at 10:50 AM Richard Sandiford <richard.sandiford@arm.com> wrote: > > This patch folds IFN_MASK_LOAD and IFN_MASK_STOREs to normal accesses > if the mask is all-true. This can happen for fully-masked loops that > didn't actually need to be (which could be handled by the vectoriser > instead), or for unrolled fully-masked loops whose first iteration is > guaranteed to operate on a full vector. It's also useful when the > accesses are generated directly by intrinsics (to follow for SVE). > > Tested on aarch64-linux-gnu, armeb-eabi and x86_64-linux-gnu. > OK to install? OK. > Richard > > > 2019-08-05 Richard Sandiford <richard.sandiford@arm.com> > > gcc/ > * gimple-fold.c (gimple_fold_mask_load_store_mem_ref) > (gimple_fold_mask_load, gimple_fold_mask_store): New functions. > (gimple_fold_call): Use them to fold IFN_MASK_LOAD and > IFN_MASK_STORE. > > gcc/testsuite/ > * gcc.target/aarch64/sve/mask_load_1.c: New test. > > Index: gcc/gimple-fold.c > =================================================================== > --- gcc/gimple-fold.c 2019-08-05 09:47:38.821896600 +0100 > +++ gcc/gimple-fold.c 2019-08-05 09:49:29.233091006 +0100 > @@ -4180,6 +4180,63 @@ arith_overflowed_p (enum tree_code code, > return wi::min_precision (wres, sign) > TYPE_PRECISION (type); > } > > +/* If IFN_MASK_LOAD/STORE call CALL is unconditional, return a MEM_REF > + for the memory it references, otherwise return null. VECTYPE is the > + type of the memory vector. */ > + > +static tree > +gimple_fold_mask_load_store_mem_ref (gcall *call, tree vectype) > +{ > + tree ptr = gimple_call_arg (call, 0); > + tree alias_align = gimple_call_arg (call, 1); > + tree mask = gimple_call_arg (call, 2); > + if (!tree_fits_uhwi_p (alias_align) || !integer_all_onesp (mask)) > + return NULL_TREE; > + > + unsigned HOST_WIDE_INT align = tree_to_uhwi (alias_align) * BITS_PER_UNIT; > + if (TYPE_ALIGN (vectype) != align) > + vectype = build_aligned_type (vectype, align); > + tree offset = build_zero_cst (TREE_TYPE (alias_align)); > + return fold_build2 (MEM_REF, vectype, ptr, offset); > +} > + > +/* Try to fold IFN_MASK_LOAD call CALL. Return true on success. */ > + > +static bool > +gimple_fold_mask_load (gimple_stmt_iterator *gsi, gcall *call) > +{ > + tree lhs = gimple_call_lhs (call); > + if (!lhs) > + return false; > + > + if (tree rhs = gimple_fold_mask_load_store_mem_ref (call, TREE_TYPE (lhs))) > + { > + gassign *new_stmt = gimple_build_assign (lhs, rhs); > + gimple_set_location (new_stmt, gimple_location (call)); > + gimple_move_vops (new_stmt, call); > + gsi_replace (gsi, new_stmt, false); > + return true; > + } > + return false; > +} > + > +/* Try to fold IFN_MASK_STORE call CALL. Return true on success. */ > + > +static bool > +gimple_fold_mask_store (gimple_stmt_iterator *gsi, gcall *call) > +{ > + tree rhs = gimple_call_arg (call, 3); > + if (tree lhs = gimple_fold_mask_load_store_mem_ref (call, TREE_TYPE (rhs))) > + { > + gassign *new_stmt = gimple_build_assign (lhs, rhs); > + gimple_set_location (new_stmt, gimple_location (call)); > + gimple_move_vops (new_stmt, call); > + gsi_replace (gsi, new_stmt, false); > + return true; > + } > + return false; > +} > + > /* Attempt to fold a call statement referenced by the statement iterator GSI. > The statement may be replaced by another statement, e.g., if the call > simplifies to a constant value. Return true if any changes were made. > @@ -4409,6 +4466,12 @@ gimple_fold_call (gimple_stmt_iterator * > subcode = MULT_EXPR; > cplx_result = true; > break; > + case IFN_MASK_LOAD: > + changed |= gimple_fold_mask_load (gsi, stmt); > + break; > + case IFN_MASK_STORE: > + changed |= gimple_fold_mask_store (gsi, stmt); > + break; > default: > break; > } > Index: gcc/testsuite/gcc.target/aarch64/sve/mask_load_1.c > =================================================================== > --- /dev/null 2019-07-30 08:53:31.317691683 +0100 > +++ gcc/testsuite/gcc.target/aarch64/sve/mask_load_1.c 2019-08-05 09:49:29.233091006 +0100 > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 -fdump-tree-optimized" } */ > + > +void > +f (int *x) > +{ > + for (int i = 0; i < 8; ++i) > + x[i] += 1; > +} > + > +/* { dg-final { scan-tree-dump { = MEM <vector\(8\) int>} "optimized" } } */ > +/* { dg-final { scan-tree-dump { MEM <vector\(8\) int> \[[^]]*\] = } "optimized" } } */
Index: gcc/gimple-fold.c =================================================================== --- gcc/gimple-fold.c 2019-08-05 09:47:38.821896600 +0100 +++ gcc/gimple-fold.c 2019-08-05 09:49:29.233091006 +0100 @@ -4180,6 +4180,63 @@ arith_overflowed_p (enum tree_code code, return wi::min_precision (wres, sign) > TYPE_PRECISION (type); } +/* If IFN_MASK_LOAD/STORE call CALL is unconditional, return a MEM_REF + for the memory it references, otherwise return null. VECTYPE is the + type of the memory vector. */ + +static tree +gimple_fold_mask_load_store_mem_ref (gcall *call, tree vectype) +{ + tree ptr = gimple_call_arg (call, 0); + tree alias_align = gimple_call_arg (call, 1); + tree mask = gimple_call_arg (call, 2); + if (!tree_fits_uhwi_p (alias_align) || !integer_all_onesp (mask)) + return NULL_TREE; + + unsigned HOST_WIDE_INT align = tree_to_uhwi (alias_align) * BITS_PER_UNIT; + if (TYPE_ALIGN (vectype) != align) + vectype = build_aligned_type (vectype, align); + tree offset = build_zero_cst (TREE_TYPE (alias_align)); + return fold_build2 (MEM_REF, vectype, ptr, offset); +} + +/* Try to fold IFN_MASK_LOAD call CALL. Return true on success. */ + +static bool +gimple_fold_mask_load (gimple_stmt_iterator *gsi, gcall *call) +{ + tree lhs = gimple_call_lhs (call); + if (!lhs) + return false; + + if (tree rhs = gimple_fold_mask_load_store_mem_ref (call, TREE_TYPE (lhs))) + { + gassign *new_stmt = gimple_build_assign (lhs, rhs); + gimple_set_location (new_stmt, gimple_location (call)); + gimple_move_vops (new_stmt, call); + gsi_replace (gsi, new_stmt, false); + return true; + } + return false; +} + +/* Try to fold IFN_MASK_STORE call CALL. Return true on success. */ + +static bool +gimple_fold_mask_store (gimple_stmt_iterator *gsi, gcall *call) +{ + tree rhs = gimple_call_arg (call, 3); + if (tree lhs = gimple_fold_mask_load_store_mem_ref (call, TREE_TYPE (rhs))) + { + gassign *new_stmt = gimple_build_assign (lhs, rhs); + gimple_set_location (new_stmt, gimple_location (call)); + gimple_move_vops (new_stmt, call); + gsi_replace (gsi, new_stmt, false); + return true; + } + return false; +} + /* Attempt to fold a call statement referenced by the statement iterator GSI. The statement may be replaced by another statement, e.g., if the call simplifies to a constant value. Return true if any changes were made. @@ -4409,6 +4466,12 @@ gimple_fold_call (gimple_stmt_iterator * subcode = MULT_EXPR; cplx_result = true; break; + case IFN_MASK_LOAD: + changed |= gimple_fold_mask_load (gsi, stmt); + break; + case IFN_MASK_STORE: + changed |= gimple_fold_mask_store (gsi, stmt); + break; default: break; } Index: gcc/testsuite/gcc.target/aarch64/sve/mask_load_1.c =================================================================== --- /dev/null 2019-07-30 08:53:31.317691683 +0100 +++ gcc/testsuite/gcc.target/aarch64/sve/mask_load_1.c 2019-08-05 09:49:29.233091006 +0100 @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 -fdump-tree-optimized" } */ + +void +f (int *x) +{ + for (int i = 0; i < 8; ++i) + x[i] += 1; +} + +/* { dg-final { scan-tree-dump { = MEM <vector\(8\) int>} "optimized" } } */ +/* { dg-final { scan-tree-dump { MEM <vector\(8\) int> \[[^]]*\] = } "optimized" } } */