commit ffd2fc57dd0a8048334349aa2e923cd921e1dc31
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Tue Jul 12 12:30:47 2016 +0100
GIMPLE store merging pass
@@ -1295,6 +1295,7 @@ OBJS = \
gimple-ssa-isolate-paths.o \
gimple-ssa-nonnull-compare.o \
gimple-ssa-split-paths.o \
+ gimple-ssa-store-merging.o \
gimple-ssa-strength-reduction.o \
gimple-streamer-in.o \
gimple-streamer-out.o \
@@ -1452,6 +1452,10 @@ fstrict-volatile-bitfields
Common Report Var(flag_strict_volatile_bitfields) Init(-1) Optimization
Force bitfield accesses to match their type width.
+fstore-merging
+Common Var(flag_store_merging) Optimization
+Use the tree store merging pass.
+
fguess-branch-probability
Common Report Var(flag_guess_branch_prob) Optimization
Enable guessing of branch probabilities.
@@ -401,7 +401,7 @@ Objective-C and Objective-C++ Dialects}.
-fsingle-precision-constant -fsplit-ivs-in-unroller @gol
-fsplit-paths @gol
-fsplit-wide-types -fssa-backprop -fssa-phiopt @gol
--fstdarg-opt -fstrict-aliasing @gol
+-fstdarg-opt -fstore-merging -fstrict-aliasing @gol
-fstrict-overflow -fthread-jumps -ftracer -ftree-bit-ccp @gol
-ftree-builtin-call-dce -ftree-ccp -ftree-ch @gol
-ftree-coalesce-vars -ftree-copy-prop -ftree-dce -ftree-dominator-opts @gol
@@ -412,8 +412,8 @@ Objective-C and Objective-C++ Dialects}.
-ftree-loop-vectorize @gol
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
-ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol
--ftree-switch-conversion -ftree-tail-merge -ftree-ter @gol
--ftree-vectorize -ftree-vrp -funconstrained-commons @gol
+-ftree-switch-conversion -ftree-tail-merge @gol
+-ftree-ter -ftree-vectorize -ftree-vrp -funconstrained-commons @gol
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
-funsafe-math-optimizations -funswitch-loops @gol
-fipa-ra -fvariable-expansion-in-unroller -fvect-cost-model -fvpt @gol
@@ -6325,6 +6325,7 @@ compilation time.
-fsplit-wide-types @gol
-fssa-backprop @gol
-fssa-phiopt @gol
+-fstore-merging @gol
-ftree-bit-ccp @gol
-ftree-ccp @gol
-ftree-ch @gol
@@ -7652,6 +7653,13 @@ Perform scalar replacement of aggregates. This pass replaces structure
references with scalars to prevent committing structures to memory too
early. This flag is enabled by default at @option{-O} and higher.
+@item -fstore-merging
+@opindex fstore-merging
+Perform merging of narrow stores to consecutive memory addresses. This pass
+merges contigous stores of immediate values narrower than a word into fewer
+wider stores to reduce the number of instructions. This is enabled by default
+at @option{-O} and higher.
+
@item -ftree-ter
@opindex ftree-ter
Perform temporary expression replacement during the SSA->normal phase. Single
new file mode 100644
@@ -0,0 +1,963 @@
+/* GIMPLE store merging pass.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ Contributed by ARM Ltd.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+/* The purpose of this pass is to combine multiple memory stores of
+ constant values to consecutive memory locations into fewer wider stores.
+ For example, if we have a sequence peforming four byte stores to
+ consecutive memory locations:
+ [p ] := imm1;
+ [p + 1B] := imm2;
+ [p + 2B] := imm3;
+ [p + 3B] := imm4;
+ we can transform this into a single 4-byte store if the target supports it:
+ [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness.
+
+ The algorithm is applied to each basic block in three phases:
+
+ 1) Scan through the basic block recording constant assignments to
+ destinations that can be expressed as a store to memory of a certain size
+ at a certain bit offset. Record store chains to different bases in a
+ hash_map (m_stores) and make sure to terminate such chains when appropriate
+ (for example when when the stored values get used subsequently).
+ These stores can be a result of structure element initializers, array stores
+ etc. A store_immediate_info object is recorded for every such store.
+ Record as many such assignments to a single base as possible until a
+ statement that interferes with the store sequence is encountered.
+
+ 2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of
+ store_immediate_info objects) and coalesce contiguous stores into
+ merged_store_group objects.
+
+ For example, given the stores:
+ [p ] := 0;
+ [p + 1B] := 1;
+ [p + 3B] := 0;
+ [p + 4B] := 1;
+ [p + 5B] := 0;
+ [p + 6B] := 0;
+ This phase would produce two merged_store_group objects, one recording the
+ two bytes stored in the memory region [p : p + 1] and another
+ recording the four bytes stored in the memory region [p + 3 : p + 6].
+
+ 3) The merged_store_group objects produced in phase 2) are processed
+ to generate the sequence of wider stores that set the contiguous memory
+ regions to the sequence of bytes that correspond to it. This may emit
+ multiple stores per store group to handle contiguous stores that are not
+ of a size that is a power of 2. For example it can try to emit a 40-bit
+ store as a 32-bit store followed by an 8-bit store.
+ We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or
+ SLOW_UNALIGNED_ACCESS rules.
+
+ Note on endianness and example:
+ The bitposition that is written in each access is determined by
+ get_inner_reference which calculates the position in the object rather than
+ the memory itself so some care must be taken when coalescing stores and
+ synthesizing the result in steps 2) and 3) respectively to handle
+ byte-endianness correctly.
+ Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
+ [p ] := 0x1234;
+ [p + 2B] := 0x5678;
+ [p + 4B] := 0xab;
+ [p + 5B] := 0xcd;
+
+ The memory layout for little-endian (LE) and big-endian (BE) must be:
+ p |LE|BE|
+ ---------
+ 0 |34|12|
+ 1 |12|34|
+ 2 |78|56|
+ 3 |56|78|
+ 4 |ab|ab|
+ 5 |cd|cd|
+
+ To merge these into a single 48-bit merged value 'val' in phase 2)
+ on little-endian we insert stores to higher (consecutive) bitpositions
+ into the most significant bits of the merged value.
+ For the example above that would be:
+ val = 0x1234;
+ val = val | (0x5678 << 16);
+ val = val | (0xab << 32);
+ val = val | (0xcd << 40);
+ To produce a wide_int containing: 0xcdab56781234
+
+ For big-endian we insert stores to higher bitpositions into the least
+ significant bits of the merged value. So for the above example the
+ operations would be:
+ val = 0x1234;
+ val = 0x5678 | (val << 16);
+ val = 0xab | (val << 8);
+ val = 0xcd | (val << 8);
+ to produce a wide_int containing: 0x12345678abcd
+
+ Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
+ followed by a 16-bit store. Again, we must consider endianness when
+ breaking down the 48-bit value 'val' computed above.
+ For little endian we emit:
+ [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
+ [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
+
+ Whereas for big-endian we emit:
+ [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
+ [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff;
+
+ The same effect could be achieved without changing the insertion order
+ during phase 2) and changing the extraction logic in phase 3) by
+ byte-swapping the values when they are being merged in phase 2) and again
+ after extracting them in phase 3). But that approach would be harder to
+ extend to deal with potential future improvements including bitfields that
+ are not a multiple of BITS_PER_UNIT or non-constant contiguous stores. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "builtins.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "gimple-pretty-print.h"
+#include "alias.h"
+#include "fold-const.h"
+#include "params.h"
+#include "print-tree.h"
+#include "tree-hash-traits.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+
+/* The maximum size (in bits) of the stores this pass should generate. */
+#define MAX_STORE_BITSIZE (BITS_PER_WORD)
+
+namespace {
+
+/* Struct recording the information about a single store of an immediate
+ to memory. These are created in the first phase and coalesced into
+ merged_store_group objects in the second phase. */
+
+struct store_immediate_info
+{
+ HOST_WIDE_INT bitsize;
+ HOST_WIDE_INT bitpos;
+ tree val;
+ tree dest;
+ gimple *stmt;
+ unsigned int order;
+ store_immediate_info (HOST_WIDE_INT, HOST_WIDE_INT, tree, tree, gimple *,
+ unsigned int);
+};
+
+store_immediate_info::store_immediate_info (HOST_WIDE_INT bs, HOST_WIDE_INT bp,
+ tree v, tree d, gimple *st,
+ unsigned int ord)
+ : bitsize (bs), bitpos (bp), val (v), dest (d), stmt (st), order (ord)
+{
+}
+
+/* Struct representing a group of stores to contiguous memory locations.
+ These are produced by the second phase (coalescing) and consumed in the
+ third phase that outputs the widened stores. */
+
+struct merged_store_group
+{
+ HOST_WIDE_INT start;
+ HOST_WIDE_INT width;
+ wide_int val;
+ unsigned int align;
+ auto_vec<gimple *> stmts;
+ /* We record the first and last original statements in the sequence because
+ because we'll need their vuse/vdef and replacement position. */
+ gimple *last_stmt;
+ unsigned int last_order;
+
+ gimple *first_stmt;
+ unsigned int first_order;
+ merged_store_group (store_immediate_info *);
+ void merge_into (store_immediate_info *);
+ bool merge_overlapping (store_immediate_info *);
+};
+
+/* Initialize a merged_store_group object from a store_immediate_info
+ object. Initialize the wide_int recording the value and the vector
+ of statements that will be replaced by this store. */
+
+merged_store_group::merged_store_group (store_immediate_info *info)
+{
+ start = info->bitpos;
+ width = info->bitsize;
+
+ val = wide_int::from (info->val, MAX_STORE_BITSIZE, UNSIGNED);
+ align = get_object_alignment (info->dest);
+ stmts.create (1);
+ stmts.safe_push (info->stmt);
+ last_stmt = info->stmt;
+ last_order = info->order;
+ first_stmt = last_stmt;
+ first_order = last_order;
+}
+
+/* Merge a store recorded by INFO into this merged store.
+ Insert the value stored into the val field in the appropriate
+ position (byte-swapping as appropriate for endianness).
+ Record the original statement that produced the store.
+ See note on endianness at top of file for an explanation of the
+ BYTES_BIG_ENDIAN logic. */
+
+void
+merged_store_group::merge_into (store_immediate_info *info)
+{
+ HOST_WIDE_INT wid = info->bitsize;
+ wide_int tmp = wide_int::from (info->val, MAX_STORE_BITSIZE, UNSIGNED);
+
+ /* Make sure we're inserting in the position we think we're inserting. */
+ gcc_assert (info->bitpos == start + width);
+ if (BYTES_BIG_ENDIAN)
+ val = wi::insert (tmp, val, wid, width);
+ else
+ val = wi::insert (val, tmp, width, wid);
+
+ width += wid;
+ gimple *stmt = info->stmt;
+ stmts.safe_push (stmt);
+ if (info->order > last_order)
+ {
+ last_order = info->order;
+ last_stmt = stmt;
+ }
+ else if (info->order < first_order)
+ {
+ first_order = info->order;
+ first_stmt = stmt;
+ }
+}
+
+/* Merge a store described by INFO into this merged store.
+ INFO overlaps in some way with the current store (i.e. it's not contiguous
+ which is handled by merged_store_group::merge_into). Return true
+ iff it was possible to perform the merge. */
+
+bool
+merged_store_group::merge_overlapping (store_immediate_info *info)
+{
+ /* Currently only deal with cases where we know the value we're inserting
+ overwrites part of the existing value or is overwritten by it. */
+ if (IN_RANGE (info->order, first_order, last_order))
+ return false;
+
+ bool over_p = info->order > last_order;
+ gimple *stmt = info->stmt;
+
+ HOST_WIDE_INT wid = info->bitsize;
+ gcc_assert (info->bitpos >= start);
+ HOST_WIDE_INT pos = info->bitpos - start;
+ wide_int tmp = wide_int::from (info->val, MAX_STORE_BITSIZE, UNSIGNED);
+
+ /* Don't handle situations where part of the store 'sticks out'.
+ It's unlikely to come up often. */
+ if (pos + wid > width)
+ return false;
+
+ if (!over_p)
+ {
+ /* New value is completely overwritten by existing value. */
+ first_order = info->order;
+ first_stmt = stmt;
+ stmts.safe_push (stmt);
+ return true;
+ }
+
+ val = wi::insert (val, tmp, pos, wid);
+ last_order = info->order;
+ last_stmt = stmt;
+ stmts.safe_push (stmt);
+ return true;
+}
+
+/* Structure describing the store chain. */
+
+struct imm_store_chain_info
+{
+ auto_vec<struct store_immediate_info *> m_store_info;
+ auto_vec<merged_store_group *> m_merged_store_groups;
+
+ bool terminate_and_process_chain (tree);
+ bool coalesce_immediate_stores ();
+ bool output_merged_store (tree, merged_store_group *);
+ bool output_merged_stores (tree);
+};
+
+const pass_data pass_data_tree_store_merging = {
+ GIMPLE_PASS, /* type */
+ "store-merging", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_store_merging : public gimple_opt_pass
+{
+public:
+ pass_store_merging (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_tree_store_merging, ctxt)
+ {
+ }
+
+ virtual bool
+ gate (function *)
+ {
+ return flag_store_merging && optimize;
+ }
+
+ virtual unsigned int execute (function *);
+
+private:
+ hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores;
+
+ bool terminate_and_process_all_chains ();
+ bool terminate_all_aliasing_chains (tree, tree, gimple *);
+ bool terminate_and_release_chain (tree);
+}; // class pass_store_merging
+
+/* Terminate and process all recorded chains. Return true if any changes
+ were made. */
+
+bool
+pass_store_merging::terminate_and_process_all_chains ()
+{
+ hash_map<tree_operand_hash, struct imm_store_chain_info *>::iterator iter
+ = m_stores.begin ();
+ bool ret = false;
+ for (; iter != m_stores.end (); ++iter)
+ ret |= terminate_and_release_chain ((*iter).first);
+
+ return ret;
+}
+
+/* Terminate all chains that are affected by the assignment to DEST, appearing
+ in statement STMT and ultimately points to the object BASE. Return true if
+ at least one aliasing chain was terminated. */
+
+bool
+pass_store_merging::terminate_all_aliasing_chains (tree dest, tree base,
+ gimple *stmt)
+{
+ bool ret = false;
+ struct imm_store_chain_info **chain_info = m_stores.get (base);
+ if (chain_info)
+ {
+ struct store_immediate_info *info;
+ unsigned int i;
+ FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info)
+ {
+ if (refs_may_alias_p (info->dest, dest))
+ {
+ terminate_and_release_chain (base);
+ ret = true;
+ break;
+ }
+ }
+ }
+
+ hash_map<tree_operand_hash, struct imm_store_chain_info *>::iterator iter
+ = m_stores.begin ();
+
+ for (; iter != m_stores.end (); ++iter)
+ {
+ if (chain_info && (*chain_info) == (*iter).second)
+ continue;
+
+ tree key = (*iter).first;
+ if (ref_maybe_used_by_stmt_p (stmt, key)
+ || stmt_may_clobber_ref_p (stmt, key))
+ {
+ terminate_and_release_chain (key);
+ ret = true;
+ }
+ }
+
+ return ret;
+}
+
+/* Helper function. Terminate the recorded chain storing to base object
+ BASE. Return true if the merging and output was successful. The m_stores
+ entry is removed after the processing in any case. */
+
+bool
+pass_store_merging::terminate_and_release_chain (tree base)
+{
+ struct imm_store_chain_info **chain_info = m_stores.get (base);
+
+ gcc_assert (chain_info);
+ gcc_assert (*chain_info);
+
+ bool ret = (*chain_info)->terminate_and_process_chain (base);
+ delete *chain_info;
+ m_stores.remove (base);
+ return ret;
+}
+
+/* Sorting function for store_immediate_info objects.
+ Sorts them by bitposition. */
+
+static int
+sort_by_bitpos (const void *x, const void *y)
+{
+ store_immediate_info *const *tmp = (store_immediate_info * const *) x;
+ store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
+
+ if ((*tmp)->bitpos <= (*tmp2)->bitpos)
+ return -1;
+ else if ((*tmp)->bitpos > (*tmp2)->bitpos)
+ return 1;
+
+ gcc_unreachable ();
+ return 0;
+}
+
+/* Go through the candidate stores recorded in m_store_info and merge them
+ into merged_store_group objects recorded into m_merged_store_groups
+ representing the widened stores. Return true if coalescing was successful
+ and the number of widened stores is fewer than the original number
+ of stores. */
+
+bool
+imm_store_chain_info::coalesce_immediate_stores ()
+{
+ /* Anything less can't be processed. */
+ if (m_store_info.length () < 2)
+ return false;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n",
+ m_store_info.length ());
+
+ store_immediate_info *info;
+ unsigned int i;
+
+ /* Order the stores by the bitposition they write to. */
+ m_store_info.qsort (sort_by_bitpos);
+
+ info = m_store_info[0];
+ merged_store_group *merged_store = new merged_store_group (info);
+ m_merged_store_groups.safe_push (merged_store);
+
+ FOR_EACH_VEC_ELT (m_store_info, i, info)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
+ " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n",
+ i, info->bitsize, info->bitpos);
+ print_generic_expr (dump_file, info->val, 0);
+ fprintf (dump_file, "\n------------\n");
+ }
+
+ if (i == 0)
+ continue;
+
+ /* |---store 1---|
+ |---store 2---|
+ Partially overlapping stores are not handled at the moment. */
+ HOST_WIDE_INT start = info->bitpos;
+ if (IN_RANGE (start, merged_store->start,
+ merged_store->start + merged_store->width - 1))
+ {
+ if (merged_store->merge_overlapping (info))
+ continue;
+
+ return false;
+ }
+
+ /* |---store 1---| <gap> |---store 2---|.
+ Gap between stores. Start a new group. */
+ if (start != merged_store->start + merged_store->width)
+ {
+ merged_store = new merged_store_group (info);
+ m_merged_store_groups.safe_push (merged_store);
+ continue;
+ }
+
+ /* |---store 1---||---store 2---|
+ This store is consecutive to the previous one. */
+
+ unsigned int prev_size = merged_store->width;
+ /* If we will be exceeding the maximum store size start a new group but
+ record the alignment of the new store appropriately. */
+ if (prev_size + info->bitsize > MAX_STORE_BITSIZE)
+ {
+ merged_store_group *new_merged_store = new merged_store_group (info);
+ new_merged_store->align
+ = MIN (merged_store->align, merged_store->width);
+ merged_store = new_merged_store;
+ m_merged_store_groups.safe_push (merged_store);
+ continue;
+ }
+ /* Merge it into the current store group. */
+ else
+ {
+ merged_store->merge_into (info);
+ continue;
+ }
+
+ gcc_unreachable ();
+ return false;
+ }
+
+ gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
+ bool success = m_merged_store_groups.length () < m_store_info.length ();
+ if (dump_file)
+ {
+ if (success)
+ fprintf (dump_file, "Coalescing successful!\n"
+ "Merged into %u stores\n",
+ m_merged_store_groups.length ());
+ }
+
+ return success;
+}
+
+/* Return the type to use for the merged stores described by GROUP.
+ This is needed to get the alias sets right. */
+
+static tree
+get_type_for_merged_store (merged_store_group *group)
+{
+ gimple *stmt;
+ unsigned int i;
+ tree lhs = gimple_assign_lhs (group->stmts[0]);
+ tree type = reference_alias_ptr_type (lhs);
+ FOR_EACH_VEC_ELT (group->stmts, i, stmt)
+ {
+ if (i == 0)
+ continue;
+
+ lhs = gimple_assign_lhs (stmt);
+ tree type1 = reference_alias_ptr_type (lhs);
+ if (!alias_ptr_types_compatible_p (type, type1))
+ return ptr_type_node;
+ }
+ return type;
+}
+
+/* Return the location_t information we can find among the statements
+ in GROUP. */
+
+static location_t
+get_merged_store_location (merged_store_group *group)
+{
+ gimple *stmt;
+ unsigned int i;
+
+ FOR_EACH_VEC_ELT (group->stmts, i, stmt)
+ {
+ if (gimple_has_location (stmt))
+ return gimple_location (stmt);
+ }
+ return UNKNOWN_LOCATION;
+}
+
+/* Return true if storing an integer of bitsize SIZE using an unaligned
+ access if prohibitively slow. */
+
+static bool
+slow_unaligned_size_access (unsigned int size)
+{
+ machine_mode mode = mode_for_size (size, MODE_INT, 0);
+ gcc_assert (mode != BLKmode);
+ return SLOW_UNALIGNED_ACCESS (mode, GET_MODE_ALIGNMENT (mode));
+}
+
+/* Given a merged store group GROUP output the widened version of it.
+ The store chain is against the base object BASE.
+ Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
+ unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
+ Make sure that the number of statements output is less than the number of
+ original statements. If a better sequence is possible emit it and
+ return true. See note on endianness at top of file for an explanation of
+ the BYTES_BIG_ENDIAN logic. */
+
+bool
+imm_store_chain_info::output_merged_store (tree base, merged_store_group *group)
+{
+ HOST_WIDE_INT pos = group->start;
+ HOST_WIDE_INT size = group->width;
+ HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
+ unsigned int align = group->align;
+ gcc_assert (IN_RANGE (size, 0, MAX_STORE_BITSIZE));
+
+ unsigned int orig_num_stmts = group->stmts.length ();
+ if (orig_num_stmts < 2)
+ return false;
+
+ /* We don't handle partial bitfields for now. */
+ if ((size % BITS_PER_UNIT != 0) || (pos % BITS_PER_UNIT != 0))
+ return false;
+
+ bool allow_unaligned
+ = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
+ unsigned int try_size = MAX_STORE_BITSIZE;
+ while (try_size > size
+ || ((!allow_unaligned || slow_unaligned_size_access (try_size))
+ && try_size > align))
+ {
+ try_size /= 2;
+ if (try_size < BITS_PER_UNIT)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Failed to output merged store.\n"
+ "Failed to find starting size meeting"
+ " alignment requirements.\n");
+ }
+ return false;
+ }
+ }
+
+ HOST_WIDE_INT try_pos = bytepos;
+ int wi_offset = BYTES_BIG_ENDIAN ? size - try_size : 0;
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "Trying to output merged store at pos " HOST_WIDE_INT_PRINT_DEC
+ ", of size " HOST_WIDE_INT_PRINT_DEC ", "
+ "starting size %u and alignment %u\n",
+ pos, size, try_size, align);
+ }
+
+ gimple_seq seq = NULL;
+ unsigned int num_stmts = 0;
+ tree offset_type = get_type_for_merged_store (group);
+ tree last_vdef, new_vuse;
+ last_vdef = gimple_vdef (group->last_stmt);
+ new_vuse = gimple_vuse (group->last_stmt);
+ location_t loc = get_merged_store_location (group);
+ gimple *stmt = NULL;
+ gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
+
+ while (size > 0)
+ {
+ tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
+ SET_TYPE_ALIGN (int_type, align);
+ tree addr = build_fold_addr_expr (base);
+ tree dest = fold_build2 (MEM_REF, int_type, addr,
+ build_int_cst (offset_type, try_pos));
+
+ wide_int wi_val = group->val;
+
+ wi_val
+ = wi::bit_and (wi_val, wi::shifted_mask (wi_offset, try_size, false,
+ MAX_STORE_BITSIZE));
+
+ wi_val
+ = wi::rshift (wi_val, wide_int::from (wi_offset, MAX_STORE_BITSIZE,
+ UNSIGNED),
+ UNSIGNED);
+
+ tree src = wide_int_to_tree (int_type, wi_val);
+ stmt = gimple_build_assign (dest, src);
+ gimple_set_vuse (stmt, new_vuse);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ gimple_set_bb (stmt, gsi_bb (last_gsi));
+
+ num_stmts++;
+
+ try_pos += try_size / BITS_PER_UNIT;
+
+ if (!BYTES_BIG_ENDIAN)
+ wi_offset += try_size;
+ size -= try_size;
+ align = try_size;
+ while (size < try_size)
+ try_size /= 2;
+
+ if (BYTES_BIG_ENDIAN)
+ wi_offset -= try_size;
+
+ if (num_stmts >= orig_num_stmts)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Exceeded original number of stmts (%u)."
+ " Not profitable to emit new sequence.\n",
+ orig_num_stmts);
+ }
+ return false;
+ }
+
+ tree new_vdef
+ = size ? make_ssa_name (gimple_vop (cfun), stmt) : last_vdef;
+ gimple_set_vdef (stmt, new_vdef);
+ SSA_NAME_DEF_STMT (new_vdef) = stmt;
+ new_vuse = new_vdef;
+ }
+ gcc_assert (size == 0);
+ gcc_assert (seq);
+ annotate_all_with_location (seq, loc);
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "New sequence of %u stmts to replace old one of %u stmts\n",
+ num_stmts, orig_num_stmts);
+ if (dump_flags & TDF_DETAILS)
+ print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
+ }
+ gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
+
+ return true;
+}
+
+/* Process the merged_store_group objects created in the coalescing phase.
+ The stores are all against the base object BASE.
+ Try to output the widened stores and delete the original statements if
+ successful. Return true iff any changes were made. */
+
+bool
+imm_store_chain_info::output_merged_stores (tree base)
+{
+ unsigned int i;
+ merged_store_group *merged_store;
+ bool ret = false;
+ FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
+ {
+ bool successful_p = output_merged_store (base, merged_store);
+ if (successful_p)
+ {
+ gimple *stmt;
+ unsigned int j;
+ FOR_EACH_VEC_ELT (merged_store->stmts, j, stmt)
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ gsi_remove (&gsi, true);
+ if (stmt != merged_store->last_stmt)
+ {
+ unlink_stmt_vdef (stmt);
+ release_defs (stmt);
+ }
+ }
+ }
+ ret |= successful_p;
+ }
+ if (ret && dump_file)
+ fprintf (dump_file, "Merging successful!\n");
+
+ return ret;
+}
+
+/* Coalesce the store_immediate_info objects recorded against the base object
+ BASE in the first phase and output them.
+ Delete the allocated structures.
+ Return true if any changes were made. */
+
+bool
+imm_store_chain_info::terminate_and_process_chain (tree base)
+{
+ /* Process store chain. */
+ bool ret = false;
+ if (m_store_info.length () > 1)
+ {
+ ret = coalesce_immediate_stores ();
+ if (ret)
+ ret = output_merged_stores (base);
+ }
+
+ /* Delete all the entries we allocated ourselves. */
+ store_immediate_info *info;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (m_store_info, i, info)
+ delete info;
+
+ merged_store_group *merged_info;
+ FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
+ delete merged_info;
+
+ return ret;
+}
+
+/* Return true iff LHS is a destination potentially interesting for
+ store merging. */
+
+static bool
+lhs_code_valid_for_store_merging_p (tree lhs)
+{
+ tree_code code = TREE_CODE (lhs);
+ if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF
+ || code == COMPONENT_REF || code == BIT_FIELD_REF)
+ return true;
+
+ return false;
+}
+
+/* Entry point for the pass. Go over each basic block recording chains of
+ immediate stores. Upon encountering a terminating statement (as defined
+ by stmt_terminates_chain_p) process the recorded stores and emit the widened
+ variants. */
+
+unsigned int
+pass_store_merging::execute (function *fun)
+{
+ basic_block bb;
+ hash_set<gimple *> orig_stmts;
+
+ FOR_EACH_BB_FN (bb, fun)
+ {
+ gimple_stmt_iterator gsi;
+ HOST_WIDE_INT num_statements = 0;
+ /* Record the original statements so that we can keep track of
+ statements emitted in this pass and not re-process new
+ statements. */
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple_set_visited (gsi_stmt (gsi), false);
+ num_statements++;
+ }
+
+ if (num_statements < 2)
+ continue;
+
+ if (dump_file)
+ fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
+
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+
+ if (is_gimple_debug (stmt))
+ continue;
+
+ if (gimple_has_volatile_ops (stmt))
+ {
+ /* Terminate all chains. */
+ if (dump_file)
+ fprintf (dump_file, "Volatile access terminates "
+ "all chains\n");
+ terminate_and_process_all_chains ();
+ continue;
+ }
+
+ if (is_gimple_assign (stmt))
+ {
+ tree lhs = gimple_assign_lhs (stmt);
+ tree rhs = gimple_assign_rhs1 (stmt);
+
+ HOST_WIDE_INT bitsize, bitpos;
+ machine_mode mode;
+ int unsignedp = 0, reversep = 0, volatilep = 0;
+ tree offset, base_addr;
+ base_addr
+ = get_inner_reference (lhs, &bitsize, &bitpos, &offset, &mode,
+ &unsignedp, &reversep, &volatilep);
+ /* The ultimate base object may be volatile so catch it here. */
+ if (TREE_THIS_VOLATILE (base_addr))
+ {
+ /* Terminate all chains. */
+ if (dump_file)
+ fprintf (dump_file, "Volatile access terminates "
+ "all chains\n");
+ terminate_and_process_all_chains ();
+ continue;
+ }
+
+ gcc_assert (!volatilep);
+ bool invalid = offset || reversep || bitsize > MAX_STORE_BITSIZE
+ || gimple_assign_rhs_code (stmt) != INTEGER_CST
+ || !lhs_code_valid_for_store_merging_p (
+ gimple_assign_lhs (stmt));
+ struct imm_store_chain_info **chain_info
+ = m_stores.get (base_addr);
+ if (invalid)
+ {
+ if (gimple_vuse (stmt) || gimple_vdef (stmt))
+ terminate_all_aliasing_chains (lhs, base_addr, stmt);
+
+ continue;
+ }
+ else
+ {
+
+ store_immediate_info *info;
+ if (chain_info)
+ {
+ info = new store_immediate_info (
+ bitsize, bitpos, rhs, lhs, stmt,
+ (*chain_info)->m_store_info.length ());
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "Recording immediate store from stmt:\n");
+ print_gimple_stmt (dump_file, stmt, 0, 0);
+ }
+ (*chain_info)->m_store_info.safe_push (info);
+ continue;
+ }
+ else if (invalid)
+ {
+ terminate_all_aliasing_chains (lhs, base_addr, stmt);
+ continue;
+ }
+
+ /* Store aliases any existing chain? */
+ terminate_all_aliasing_chains (lhs, base_addr, stmt);
+ /* Start a new chain. */
+ struct imm_store_chain_info *new_chain
+ = new imm_store_chain_info;
+ info = new store_immediate_info (bitsize, bitpos, rhs, lhs,
+ stmt, 0);
+ new_chain->m_store_info.safe_push (info);
+ m_stores.put (base_addr, new_chain);
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "Starting new chain with statement:\n");
+ print_gimple_stmt (dump_file, stmt, 0, 0);
+ }
+ continue;
+ }
+
+ continue;
+ }
+ /* Check if load/store aliases any existing chains. */
+ hash_map<tree_operand_hash, struct imm_store_chain_info *>::iterator
+ iter
+ = m_stores.begin ();
+ for (; iter != m_stores.end (); ++iter)
+ {
+ tree key = (*iter).first;
+ if (stmt_may_clobber_ref_p (stmt, key)
+ || ref_maybe_used_by_stmt_p (stmt, key))
+ terminate_and_release_chain (key);
+ }
+ }
+ terminate_and_process_all_chains ();
+ }
+ return 0;
+}
+
+} // anon namespace
+
+/* Construct and return a store merging pass object. */
+
+gimple_opt_pass *
+make_pass_store_merging (gcc::context *ctxt)
+{
+ return new pass_store_merging (ctxt);
+}
@@ -463,6 +463,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_1_PLUS, OPT_ftree_dse, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_ftree_ter, NULL, 1 },
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_sra, NULL, 1 },
+ { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fstore_merging, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_ftree_fre, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_ftree_copy_prop, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_ftree_sink, NULL, 1 },
@@ -1100,6 +1100,12 @@ DEFPARAM (PARAM_MAX_TAIL_MERGE_COMPARISONS,
"Maximum amount of similar bbs to compare a bb with.",
10, 0, 0)
+DEFPARAM (PARAM_STORE_MERGING_ALLOW_UNALIGNED,
+ "store-merging-allow-unaligned",
+ "Allow the store merging pass to introduce unaligned stores "
+ "if it is legal to do so",
+ 1, 0, 1)
+
DEFPARAM (PARAM_MAX_TAIL_MERGE_ITERATIONS,
"max-tail-merge-iterations",
"Maximum amount of iterations of the pass over a function.",
@@ -325,6 +325,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_phiopt);
NEXT_PASS (pass_fold_builtins);
NEXT_PASS (pass_optimize_widening_mul);
+ NEXT_PASS (pass_store_merging);
NEXT_PASS (pass_tail_calls);
/* If DCE is not run before checking for uninitialized uses,
we may get false warnings (e.g., testsuite/gcc.dg/uninit-5.c).
new file mode 100644
@@ -0,0 +1,122 @@
+/* PR middle-end/22141 */
+
+extern void abort (void);
+
+struct S
+{
+ struct T
+ {
+ char a;
+ char b;
+ char c;
+ char d;
+ } t;
+} u;
+
+struct U
+{
+ struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+ if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+ abort ();
+ __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+ c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+ c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+ u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+ u.t.a = 1;
+ u.t.b = 2;
+ u.t.c = 3;
+ u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+ u.t.d = 4;
+ u.t.b = 2;
+ u.t.a = 1;
+ u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+ struct S v;
+ v.t.a = 1;
+ v.t.b = 2;
+ v.t.c = 3;
+ v.t.d = 4;
+ c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+ p->t.a = 1;
+ p->t.c = 3;
+ p->t.d = 4;
+ p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+ struct U v;
+ v.s[2].t.a = 1;
+ v.s[2].t.b = 2;
+ v.s[2].t.c = 3;
+ v.s[2].t.d = 4;
+ c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+ p->s[2].t.a = 1;
+ p->s[2].t.c = 3;
+ p->s[2].t.d = 4;
+ p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+ struct U w;
+ f1 ();
+ c2 (&u);
+ f2 ();
+ c1 (&u.t);
+ f3 ();
+ c2 (&u);
+ f4 ();
+ f5 (&u);
+ c2 (&u);
+ f6 ();
+ f7 (&w);
+ c3 (&w);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,122 @@
+/* PR middle-end/22141 */
+
+extern void abort (void);
+
+struct S
+{
+ struct T
+ {
+ char a;
+ char b;
+ char c;
+ char d;
+ } t;
+} u __attribute__((aligned));
+
+struct U
+{
+ struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+ if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+ abort ();
+ __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+ c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+ c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+ u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+ u.t.a = 1;
+ u.t.b = 2;
+ u.t.c = 3;
+ u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+ u.t.d = 4;
+ u.t.b = 2;
+ u.t.a = 1;
+ u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+ struct S v __attribute__((aligned));
+ v.t.a = 1;
+ v.t.b = 2;
+ v.t.c = 3;
+ v.t.d = 4;
+ c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+ p->t.a = 1;
+ p->t.c = 3;
+ p->t.d = 4;
+ p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+ struct U v __attribute__((aligned));
+ v.s[2].t.a = 1;
+ v.s[2].t.b = 2;
+ v.s[2].t.c = 3;
+ v.s[2].t.d = 4;
+ c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+ p->s[2].t.a = 1;
+ p->s[2].t.c = 3;
+ p->s[2].t.d = 4;
+ p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+ struct U w __attribute__((aligned));
+ f1 ();
+ c2 (&u);
+ f2 ();
+ c1 (&u.t);
+ f3 ();
+ c2 (&u);
+ f4 ();
+ f5 (&u);
+ c2 (&u);
+ f6 ();
+ f7 (&w);
+ c3 (&w);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O -fdump-tree-store-merging" } */
+
+struct bar {
+ int a;
+ char b;
+ char c;
+ char d;
+ char e;
+ char f;
+ char g;
+};
+
+void
+foo1 (struct bar *p)
+{
+ p->b = 0;
+ p->a = 0;
+ p->c = 0;
+ p->d = 0;
+ p->e = 0;
+}
+
+void
+foo2 (struct bar *p)
+{
+ p->b = 0;
+ p->a = 0;
+ p->c = 1;
+ p->d = 0;
+ p->e = 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
new file mode 100644
@@ -0,0 +1,80 @@
+/* { dg-do run } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O -fdump-tree-store-merging" } */
+
+struct bar
+{
+ int a;
+ unsigned char b;
+ unsigned char c;
+ short d;
+ unsigned char e;
+ unsigned char f;
+ unsigned char g;
+};
+
+__attribute__ ((noinline)) void
+foozero (struct bar *p)
+{
+ p->b = 0;
+ p->a = 0;
+ p->c = 0;
+ p->d = 0;
+ p->e = 0;
+ p->f = 0;
+ p->g = 0;
+}
+
+__attribute__ ((noinline)) void
+foo1 (struct bar *p)
+{
+ p->b = 1;
+ p->a = 2;
+ p->c = 3;
+ p->d = 4;
+ p->e = 5;
+ p->f = 0;
+ p->g = 0xff;
+}
+
+__attribute__ ((noinline)) void
+foo2 (struct bar *p, struct bar *p2)
+{
+ p->b = 0xff;
+ p2->b = 0xa;
+ p->a = 0xfffff;
+ p2->c = 0xc;
+ p->c = 0xff;
+ p2->d = 0xbf;
+ p->d = 0xfff;
+}
+
+int
+main (void)
+{
+ struct bar b1, b2;
+ foozero (&b1);
+ foozero (&b2);
+
+ foo1 (&b1);
+ if (b1.b != 1 || b1.a != 2 || b1.c != 3 || b1.d != 4 || b1.e != 5
+ || b1.f != 0 || b1.g != 0xff)
+ __builtin_abort ();
+
+ foozero (&b1);
+ /* Make sure writes to aliasing struct pointers preserve the
+ correct order. */
+ foo2 (&b1, &b1);
+ if (b1.b != 0xa || b1.a != 0xfffff || b1.c != 0xff || b1.d != 0xfff)
+ __builtin_abort ();
+
+ foozero (&b1);
+ foo2 (&b1, &b2);
+ if (b1.a != 0xfffff || b1.b != 0xff || b1.c != 0xff || b1.d != 0xfff
+ || b2.b != 0xa || b2.c != 0xc || b2.d != 0xbf)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O -fdump-tree-store-merging" } */
+
+/* Make sure stores to volatile addresses don't get combined with
+ other accesses. */
+
+struct bar
+{
+ int a;
+ char b;
+ char c;
+ volatile short d;
+ char e;
+ char f;
+ char g;
+};
+
+void
+foozero (struct bar *p)
+{
+ p->b = 0xa;
+ p->a = 0xb;
+ p->c = 0xc;
+ p->d = 0;
+ p->e = 0xd;
+ p->f = 0xe;
+ p->g = 0xf;
+}
+
+/* { dg-final { scan-tree-dump "Volatile access terminates all chains" "store-merging" } } */
+/* { dg-final { scan-tree-dump-times "=\{v\} 0;" 1 "store-merging" } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O -fdump-tree-store-merging" } */
+
+/* Check that we can merge interleaving stores that are guaranteed
+ to be non-aliasing. */
+
+struct bar
+{
+ int a;
+ char b;
+ char c;
+ short d;
+ char e;
+ char f;
+ char g;
+};
+
+void
+foozero (struct bar *restrict p, struct bar *restrict p2)
+{
+ p->b = 0xff;
+ p2->b = 0xa;
+ p->a = 0xfffff;
+ p2->a = 0xab;
+ p2->c = 0xc;
+ p->c = 0xff;
+ p2->d = 0xbf;
+ p->d = 0xfff;
+}
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O -fdump-tree-store-merging" } */
+
+/* Make sure that non-aliasing non-constant interspersed stores do not
+ stop chains. */
+
+struct bar {
+ int a;
+ char b;
+ char c;
+ char d;
+ char e;
+ char g;
+};
+
+void
+foo1 (struct bar *p, char tmp)
+{
+ p->a = 0;
+ p->b = 0;
+ p->g = tmp;
+ p->c = 0;
+ p->d = 0;
+ p->e = 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 1 "store-merging" } } */
+/* { dg-final { scan-tree-dump-times "MEM\\\[.*\\\]" 1 "store-merging" } } */
new file mode 100644
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O -fdump-tree-store-merging" } */
+
+/* Check that we can widen accesses to bitfields. */
+
+struct bar {
+ int a : 3;
+ unsigned char b : 4;
+ unsigned char c : 1;
+ char d;
+ char e;
+ char f;
+ char g;
+};
+
+__attribute__ ((noinline)) void
+foozero (struct bar *p)
+{
+ p->b = 0;
+ p->a = 0;
+ p->c = 0;
+ p->d = 0;
+ p->e = 0;
+ p->f = 0;
+ p->g = 0;
+}
+
+__attribute__ ((noinline)) void
+foo1 (struct bar *p)
+{
+ p->b = 3;
+ p->a = 2;
+ p->c = 1;
+ p->d = 4;
+ p->e = 5;
+}
+
+int
+main (void)
+{
+ struct bar p;
+ foozero (&p);
+ foo1 (&p);
+ if (p.a != 2 || p.b != 3 || p.c != 1 || p.d != 4 || p.e != 5
+ || p.f != 0 || p.g != 0)
+ __builtin_abort ();
+
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
@@ -3,22 +3,22 @@
int arr[4][4];
void
-foo ()
+foo (int x, int y)
{
- arr[0][1] = 1;
- arr[1][0] = -1;
- arr[2][0] = 1;
- arr[1][1] = -1;
- arr[0][2] = 1;
- arr[0][3] = -1;
- arr[1][2] = 1;
- arr[2][1] = -1;
- arr[3][0] = 1;
- arr[3][1] = -1;
- arr[2][2] = 1;
- arr[1][3] = -1;
- arr[2][3] = 1;
- arr[3][2] = -1;
+ arr[0][1] = x;
+ arr[1][0] = y;
+ arr[2][0] = x;
+ arr[1][1] = y;
+ arr[0][2] = x;
+ arr[0][3] = y;
+ arr[1][2] = x;
+ arr[2][1] = y;
+ arr[3][0] = x;
+ arr[3][1] = y;
+ arr[2][2] = x;
+ arr[1][3] = y;
+ arr[2][3] = x;
+ arr[3][2] = y;
}
/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]" 7 } } */
new file mode 100644
@@ -0,0 +1,126 @@
+/* PR middle-end/22141 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern void abort (void);
+
+struct S
+{
+ struct T
+ {
+ char a;
+ char b;
+ char c;
+ char d;
+ } t;
+} u;
+
+struct U
+{
+ struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+ if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+ abort ();
+ __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+ c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+ c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+ u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+ u.t.a = 1;
+ u.t.b = 2;
+ u.t.c = 3;
+ u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+ u.t.d = 4;
+ u.t.b = 2;
+ u.t.a = 1;
+ u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+ struct S v;
+ v.t.a = 1;
+ v.t.b = 2;
+ v.t.c = 3;
+ v.t.d = 4;
+ c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+ p->t.a = 1;
+ p->t.c = 3;
+ p->t.d = 4;
+ p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+ struct U v;
+ v.s[2].t.a = 1;
+ v.s[2].t.b = 2;
+ v.s[2].t.c = 3;
+ v.s[2].t.d = 4;
+ c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+ p->s[2].t.a = 1;
+ p->s[2].t.c = 3;
+ p->s[2].t.d = 4;
+ p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+ struct U w;
+ f1 ();
+ c2 (&u);
+ f2 ();
+ c1 (&u.t);
+ f3 ();
+ c2 (&u);
+ f4 ();
+ f5 (&u);
+ c2 (&u);
+ f6 ();
+ f7 (&w);
+ c3 (&w);
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "67305985\|4030201" 7 } } */
@@ -424,6 +424,7 @@ extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_cse_sincos (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_store_merging (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_warn_function_return (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_warn_function_noreturn (gcc::context *ctxt);