@@ -1,3 +1,94 @@
+2015-02-16 H.J. Lu <hongjiu.lu@intel.com>
+
+ Backported from 4.9 branch
+ 2015-01-13 Jakub Jelinek <jakub@redhat.com>
+
+ PR rtl-optimization/64286
+ * ree.c (combine_reaching_defs): Move part of comment earlier,
+ remove !SCALAR_INT_MODE_P check.
+ (add_removable_extension): Don't add vector mode
+ extensions if all uses of the source register aren't the same
+ vector extensions.
+
+2015-02-15 H.J. Lu <hongjiu.lu@intel.com>
+
+ Backport from mainline:
+ 2014-06-13 Jeff Law <law@redhat.com>
+
+ PR rtl-optimization/61094
+ PR rtl-optimization/61446
+ * ree.c (combine_reaching_defs): Get the mode for the copy from
+ the extension insn rather than the defining insn.
+
+ 2014-06-02 Jeff Law <law@redhat.com>
+
+ PR rtl-optimization/61094
+ * ree.c (combine_reaching_defs): Do not reextend an insn if it
+ was marked as do_no_reextend. If a copy is needed to eliminate
+ an extension, then mark it as do_not_reextend.
+
+ 2014-02-14 Jeff Law <law@redhat.com>
+
+ PR rtl-optimization/60131
+ * ree.c (get_extended_src_reg): New function.
+ (combine_reaching_defs): Use it rather than assuming location
+ of REG.
+ (find_and_remove_re): Verify first operand of extension is
+ a REG before adding the insns to the copy list.
+
+ 2014-01-17 Jeff Law <law@redhat.com>
+
+ * ree.c (combine_set_extension): Temporarily disable test for
+ changing number of hard registers.
+
+ 2014-01-15 Jeff Law <law@redhat.com>
+
+ PR tree-optimization/59747
+ * ree.c (find_and_remove_re): Properly handle case where a second
+ eliminated extension requires widening a copy created for elimination
+ of a prior extension.
+ (combine_set_extension): Ensure that the number of hard regs needed
+ for a destination register does not change when we widen it.
+
+ 2014-01-10 Jeff Law <law@redhat.com>
+
+ PR middle-end/59743
+ * ree.c (combine_reaching_defs): Ensure the defining statement
+ occurs before the extension when optimizing extensions with
+ different source and destination hard registers.
+
+ 2014-01-10 Jakub Jelinek <jakub@redhat.com>
+
+ PR rtl-optimization/59754
+ * ree.c (combine_reaching_defs): Disallow !SCALAR_INT_MODE_P
+ modes in the REGNO != REGNO case.
+
+ 2014-01-08 Jeff Law <law@redhat.com>
+
+ * ree.c (get_sub_rtx): New function, extracted from...
+ (merge_def_and_ext): Here.
+ (combine_reaching_defs): Use get_sub_rtx.
+
+ 2014-01-07 Jeff Law <law@redhat.com>
+
+ PR middle-end/53623
+ * ree.c (combine_set_extension): Handle case where source
+ and destination registers in an extension insn are different.
+ (combine_reaching_defs): Allow source and destination
+ registers in extension to be different under limited
+ circumstances.
+ (add_removable_extension): Remove restriction that the
+ source and destination registers in the extension are the
+ same.
+ (find_and_remove_re): Emit a copy from the extension's
+ destination to its source after the defining insn if
+ the source and destination registers are different.
+
+ 2013-12-12 Jeff Law <law@redhat.com>
+
+ * i386.md (simple LEA peephole2): Add missing mode to zero_extend
+ for zero-extended MULT simple LEA pattern.
+
2015-02-12 Jakub Jelinek <jakub@redhat.com>
Backported from mainline
@@ -17265,7 +17265,7 @@
&& REGNO (operands[0]) == REGNO (operands[1])
&& peep2_regno_dead_p (0, FLAGS_REG)"
[(parallel [(set (match_dup 0)
- (zero_extend (ashift:SI (match_dup 1) (match_dup 2))))
+ (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))
(clobber (reg:CC FLAGS_REG))])]
"operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
@@ -327,8 +327,30 @@ combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
{
rtx orig_src = SET_SRC (*orig_set);
enum machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
- rtx new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
rtx new_set;
+ rtx cand_pat = PATTERN (cand->insn);
+
+ /* If the extension's source/destination registers are not the same
+ then we need to change the original load to reference the destination
+ of the extension. Then we need to emit a copy from that destination
+ to the original destination of the load. */
+ rtx new_reg;
+ bool copy_needed
+ = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
+ if (copy_needed)
+ new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat)));
+ else
+ new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
+
+#if 0
+ /* Rethinking test. Temporarily disabled. */
+ /* We're going to be widening the result of DEF_INSN, ensure that doing so
+ doesn't change the number of hard registers needed for the result. */
+ if (HARD_REGNO_NREGS (REGNO (new_reg), cand->mode)
+ != HARD_REGNO_NREGS (REGNO (SET_DEST (*orig_set)),
+ GET_MODE (SET_DEST (*orig_set))))
+ return false;
+#endif
/* Merge constants by directly moving the constant into the register under
some conditions. Recall that RTL constants are sign-extended. */
@@ -387,7 +409,8 @@ combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
if (dump_file)
{
fprintf (dump_file,
- "Tentatively merged extension with definition:\n");
+ "Tentatively merged extension with definition %s:\n",
+ (copy_needed) ? "(copy needed)" : "");
print_rtl_single (dump_file, curr_insn);
}
return true;
@@ -531,6 +554,8 @@ struct ATTRIBUTE_PACKED ext_modified
/* Kind of modification of the insn. */
ENUM_BITFIELD(ext_modified_kind) kind : 2;
+ unsigned int do_not_reextend : 1;
+
/* True if the insn is scheduled to be deleted. */
unsigned int deleted : 1;
};
@@ -614,27 +639,21 @@ make_defs_and_copies_lists (rtx extend_insn, const_rtx set_pat,
return ret;
}
-/* Merge the DEF_INSN with an extension. Calls combine_set_extension
- on the SET pattern. */
-
-static bool
-merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
+/* If DEF_INSN has single SET expression, possibly buried inside
+ a PARALLEL, return the address of the SET expression, else
+ return NULL. This is similar to single_set, except that
+ single_set allows multiple SETs when all but one is dead. */
+static rtx *
+get_sub_rtx (rtx def_insn)
{
- enum machine_mode ext_src_mode;
- enum rtx_code code;
- rtx *sub_rtx;
- rtx s_expr;
- int i;
-
- ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
- code = GET_CODE (PATTERN (def_insn));
- sub_rtx = NULL;
+ enum rtx_code code = GET_CODE (PATTERN (def_insn));
+ rtx *sub_rtx = NULL;
if (code == PARALLEL)
{
- for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
+ for (int i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
{
- s_expr = XVECEXP (PATTERN (def_insn), 0, i);
+ rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
if (GET_CODE (s_expr) != SET)
continue;
@@ -643,7 +662,7 @@ merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
else
{
/* PARALLEL with multiple SETs. */
- return false;
+ return NULL;
}
}
}
@@ -652,10 +671,27 @@ merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
else
{
/* It is not a PARALLEL or a SET, what could it be ? */
- return false;
+ return NULL;
}
gcc_assert (sub_rtx != NULL);
+ return sub_rtx;
+}
+
+/* Merge the DEF_INSN with an extension. Calls combine_set_extension
+ on the SET pattern. */
+
+static bool
+merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
+{
+ enum machine_mode ext_src_mode;
+ rtx *sub_rtx;
+
+ ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
+ sub_rtx = get_sub_rtx (def_insn);
+
+ if (sub_rtx == NULL)
+ return false;
if (REG_P (SET_DEST (*sub_rtx))
&& (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
@@ -683,6 +719,18 @@ merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
return false;
}
+/* Given SRC, which should be one or more extensions of a REG, strip
+ away the extensions and return the REG. */
+
+static inline rtx
+get_extended_src_reg (rtx src)
+{
+ while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
+ src = XEXP (src, 0);
+ gcc_assert (REG_P (src));
+ return src;
+}
+
/* This function goes through all reaching defs of the source
of the candidate for elimination (CAND) and tries to combine
the extension with the definition instruction. The changes
@@ -709,6 +757,107 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
if (!outcome)
return false;
+ /* If the destination operand of the extension is a different
+ register than the source operand, then additional restrictions
+ are needed. Note we have to handle cases where we have nested
+ extensions in the source operand. */
+ bool copy_needed
+ = (REGNO (SET_DEST (PATTERN (cand->insn)))
+ != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn)))));
+ if (copy_needed)
+ {
+ /* Considering transformation of
+ (set (reg1) (expression))
+ ...
+ (set (reg2) (any_extend (reg1)))
+
+ into
+
+ (set (reg2) (any_extend (expression)))
+ (set (reg1) (reg2))
+ ... */
+
+ /* In theory we could handle more than one reaching def, it
+ just makes the code to update the insn stream more complex. */
+ if (state->defs_list.length () != 1)
+ return false;
+
+ /* We require the candidate not already be modified. It may,
+ for example have been changed from a (sign_extend (reg))
+ into (zero_extend (sign_extend (reg))).
+
+ Handling that case shouldn't be terribly difficult, but the code
+ here and the code to emit copies would need auditing. Until
+ we see a need, this is the safe thing to do. */
+ if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
+ return false;
+
+ /* There's only one reaching def. */
+ rtx def_insn = state->defs_list[0];
+
+ /* The defining statement must not have been modified either. */
+ if (state->modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE)
+ return false;
+
+ /* The defining statement and candidate insn must be in the same block.
+ This is merely to keep the test for safety and updating the insn
+ stream simple. Also ensure that within the block the candidate
+ follows the defining insn. */
+ if (BLOCK_FOR_INSN (cand->insn) != BLOCK_FOR_INSN (def_insn)
+ || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn))
+ return false;
+
+ /* If there is an overlap between the destination of DEF_INSN and
+ CAND->insn, then this transformation is not safe. Note we have
+ to test in the widened mode. */
+ rtx *dest_sub_rtx = get_sub_rtx (def_insn);
+ if (dest_sub_rtx == NULL
+ || !REG_P (SET_DEST (*dest_sub_rtx)))
+ return false;
+
+ rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand->insn))),
+ REGNO (SET_DEST (*dest_sub_rtx)));
+ if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand->insn))))
+ return false;
+
+ /* The destination register of the extension insn must not be
+ used or set between the def_insn and cand->insn exclusive. */
+ if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)),
+ def_insn, cand->insn)
+ || reg_set_between_p (SET_DEST (PATTERN (cand->insn)),
+ def_insn, cand->insn))
+ return false;
+
+ /* We must be able to copy between the two registers. Generate,
+ recognize and verify constraints of the copy. Also fail if this
+ generated more than one insn.
+
+ This generates garbage since we throw away the insn when we're
+ done, only to recreate it later if this test was successful.
+
+ Make sure to get the mode from the extension (cand->insn). This
+ is different than in the code to emit the copy as we have not
+ modified the defining insn yet. */
+ start_sequence ();
+ rtx pat = PATTERN (cand->insn);
+ rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
+ REGNO (XEXP (SET_SRC (pat), 0)));
+ rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
+ REGNO (SET_DEST (pat)));
+ emit_move_insn (new_dst, new_src);
+
+ rtx insn = get_insns();
+ end_sequence ();
+ if (NEXT_INSN (insn))
+ return false;
+ if (recog_memoized (insn) == -1)
+ return false;
+ extract_insn (insn);
+ if (!constrain_operands (1))
+ return false;
+ }
+
+
/* If cand->insn has been already modified, update cand->mode to a wider
mode if possible, or punt. */
if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
@@ -772,11 +921,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
fprintf (dump_file, "All merges were successful.\n");
FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
- if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
- state->modified[INSN_UID (def_insn)].kind
- = (cand->code == ZERO_EXTEND
- ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT);
+ {
+ ext_modified *modified = &state->modified[INSN_UID (def_insn)];
+ if (modified->kind == EXT_MODIFIED_NONE)
+ modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
+ : EXT_MODIFIED_SEXT);
+ if (copy_needed)
+ modified->do_not_reextend = 1;
+ }
return true;
}
else
@@ -825,8 +978,7 @@ add_removable_extension (const_rtx expr, rtx insn,
if (REG_P (dest)
&& (code == SIGN_EXTEND || code == ZERO_EXTEND)
- && REG_P (XEXP (src, 0))
- && REGNO (dest) == REGNO (XEXP (src, 0)))
+ && REG_P (XEXP (src, 0)))
{
struct df_link *defs, *def;
ext_cand *cand;
@@ -848,6 +1000,7 @@ add_removable_extension (const_rtx expr, rtx insn,
different extension. FIXME: this obviously can be improved. */
for (def = defs; def; def = def->next)
if ((idx = def_map[INSN_UID(DF_REF_INSN (def->ref))])
+ && idx != -1U
&& (cand = &(*insn_list)[idx - 1])
&& cand->code != code)
{
@@ -859,6 +1012,57 @@ add_removable_extension (const_rtx expr, rtx insn,
}
return;
}
+ /* For vector mode extensions, ensure that all uses of the
+ XEXP (src, 0) register are the same extension (both code
+ and to which mode), as unlike integral extensions lowpart
+ subreg of the sign/zero extended register are not equal
+ to the original register, so we have to change all uses or
+ none. */
+ else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0))))
+ {
+ if (idx == 0)
+ {
+ struct df_link *ref_chain, *ref_link;
+
+ ref_chain = DF_REF_CHAIN (def->ref);
+ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+ {
+ if (ref_link->ref == NULL
+ || DF_REF_INSN_INFO (ref_link->ref) == NULL)
+ {
+ idx = -1U;
+ break;
+ }
+ rtx use_insn = DF_REF_INSN (ref_link->ref);
+ const_rtx use_set;
+ if (use_insn == insn || DEBUG_INSN_P (use_insn))
+ continue;
+ if (!(use_set = single_set (use_insn))
+ || !REG_P (SET_DEST (use_set))
+ || GET_MODE (SET_DEST (use_set)) != GET_MODE (dest)
+ || GET_CODE (SET_SRC (use_set)) != code
+ || !rtx_equal_p (XEXP (SET_SRC (use_set), 0),
+ XEXP (src, 0)))
+ {
+ idx = -1U;
+ break;
+ }
+ }
+ if (idx == -1U)
+ def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
+ }
+ if (idx == -1U)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Cannot eliminate extension:\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file,
+ " because some vector uses aren't extension\n");
+ }
+ return;
+ }
+ }
/* Then add the candidate to the list and insert the reaching definitions
into the definition map. */
@@ -910,6 +1114,7 @@ find_and_remove_re (void)
int num_re_opportunities = 0, num_realized = 0, i;
vec<ext_cand> reinsn_list;
vec<rtx> reinsn_del_list;
+ vec<rtx> reinsn_copy_list;
ext_state state;
/* Construct DU chain to get all reaching definitions of each
@@ -921,6 +1126,7 @@ find_and_remove_re (void)
max_insn_uid = get_max_uid ();
reinsn_del_list.create (0);
+ reinsn_copy_list.create (0);
reinsn_list = find_removable_extensions ();
state.defs_list.create (0);
state.copies_list.create (0);
@@ -947,17 +1153,62 @@ find_and_remove_re (void)
if (dump_file)
fprintf (dump_file, "Eliminated the extension.\n");
num_realized++;
- reinsn_del_list.safe_push (curr_cand->insn);
+ /* If the RHS of the current candidate is not (extend (reg)), then
+ we do not allow the optimization of extensions where
+ the source and destination registers do not match. Thus
+ checking REG_P here is correct. */
+ if (REG_P (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))
+ && (REGNO (SET_DEST (PATTERN (curr_cand->insn)))
+ != REGNO (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))))
+ {
+ reinsn_copy_list.safe_push (curr_cand->insn);
+ reinsn_copy_list.safe_push (state.defs_list[0]);
+ }
+ reinsn_del_list.safe_push (curr_cand->insn);
state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
}
}
+ /* The copy list contains pairs of insns which describe copies we
+ need to insert into the INSN stream.
+
+ The first insn in each pair is the extension insn, from which
+ we derive the source and destination of the copy.
+
+ The second insn in each pair is the memory reference where the
+ extension will ultimately happen. We emit the new copy
+ immediately after this insn.
+
+ It may first appear that the arguments for the copy are reversed.
+ Remember that the memory reference will be changed to refer to the
+ destination of the extention. So we're actually emitting a copy
+ from the new destination to the old destination. */
+ for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
+ {
+ rtx curr_insn = reinsn_copy_list[i];
+ rtx def_insn = reinsn_copy_list[i + 1];
+
+ /* Use the mode of the destination of the defining insn
+ for the mode of the copy. This is necessary if the
+ defining insn was used to eliminate a second extension
+ that was wider than the first. */
+ rtx sub_rtx = *get_sub_rtx (def_insn);
+ rtx pat = PATTERN (curr_insn);
+ rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
+ REGNO (XEXP (SET_SRC (pat), 0)));
+ rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
+ REGNO (SET_DEST (pat)));
+ rtx set = gen_rtx_SET (VOIDmode, new_dst, new_src);
+ emit_insn_after (set, def_insn);
+ }
+
/* Delete all useless extensions here in one sweep. */
FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
delete_insn (curr_insn);
reinsn_list.release ();
reinsn_del_list.release ();
+ reinsn_copy_list.release ();
state.defs_list.release ();
state.copies_list.release ();
state.modified_list.release ();
@@ -1,3 +1,40 @@
+2015-02-16 H.J. Lu <hongjiu.lu@intel.com>
+
+ Backported from 4.9 branch
+ PR rtl-optimization/64286
+ 2015-01-13 Jakub Jelinek <jakub@redhat.com>
+
+ * gcc.target/i386/avx2-pr64286.c: New test.
+
+2015-02-15 H.J. Lu <hongjiu.lu@intel.com>
+
+ Backport from mainline:
+ 2014-06-02 Jeff Law <law@redhat.com>
+
+ PR rtl-optimization/61094
+ * g++.dg/pr61094: New test.
+
+ 2014-02-14 Jeff Law <law@redhat.com>
+
+ PR rtl-optimization/60131
+ * g++.dg/torture/pr60131.C: New test.
+
+ 2014-01-15 Jeff Law <law@redhat.com>
+
+ PR tree-optimization/59747
+ * gcc.c-torture/execute/pr59747.c: New test.
+
+ 2014-01-10 Jeff Law <law@redhat.com>
+
+ PR middle-end/59743
+ * gcc.c-torture/compile/pr59743.c: New test.
+
+ Backport from mainline:
+ 2014-01-07 Jeff Law <law@redhat.com>
+
+ PR middle-end/53623
+ * gcc.target/i386/pr53623.c: New test.
+
2015-02-13 Mikael Morin <mikael@gcc.gnu.org>
PR fortran/63744
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+template <typename> struct A {
+ unsigned _width, _height, _depth, _spectrum;
+ template <typename t> A(t p1) {
+ int a = p1.size();
+ if (a) {
+ _width = p1._width;
+ _depth = _height = _spectrum = p1._spectrum;
+ }
+ }
+ long size() { return (long)_width * _height * _depth * _spectrum; }
+};
+
+int d;
+void fn1(void *);
+A<int> *fn2();
+void fn3() {
+ int b;
+ for (;;) {
+ A<char> c(*fn2());
+ fn1(&c);
+ if (d || !b)
+ throw;
+ }
+}
+
+
+
+
new file mode 100644
@@ -0,0 +1,23 @@
+// { dg-do compile }
+struct A { short a; };
+int **b;
+unsigned long c;
+
+bool foo ();
+unsigned bar (unsigned i);
+extern void baz () __attribute__((noreturn));
+
+int *
+test (unsigned x, struct A *y)
+{
+ unsigned v;
+ if (foo () || y[x].a == -1)
+ {
+ c = bar (x);
+ return 0;
+ }
+ v = y[x].a;
+ if (v >= 23)
+ baz ();
+ return b[v];
+}
new file mode 100644
@@ -0,0 +1,23 @@
+/* PR middle-end/59743 */
+
+typedef union {
+ long all;
+ struct {
+ int low;
+ int high;
+ } s;
+} udwords;
+int a, b, c, d;
+void __udivmoddi4() {
+ udwords r;
+ d = __builtin_clz(0);
+ r.s.low = 0;
+ for (; d; --d) {
+ r.s.high = r.s.high << 1 | r.s.low >> a;
+ r.s.low = r.s.low << b >> 1;
+ int s = -r.all;
+ c = s;
+ r.all--;
+ }
+}
+
new file mode 100644
@@ -0,0 +1,27 @@
+extern void abort (void);
+extern void exit (int);
+
+int a[6], b, c = 1, d;
+short e;
+
+int __attribute__ ((noinline))
+fn1 (int p)
+{
+ b = a[p];
+}
+
+int
+main ()
+{
+ if (sizeof (long long) != 8)
+ exit (0);
+
+ a[0] = 1;
+ if (c)
+ e--;
+ d = e;
+ long long f = e;
+ if (fn1 ((f >> 56) & 1) != 0)
+ abort ();
+ exit (0);
+}
new file mode 100644
@@ -0,0 +1,37 @@
+/* PR rtl-optimization/64286 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-require-effective-target avx2 } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <x86intrin.h>
+#include "avx2-check.h"
+
+__m128i v;
+__m256i w;
+
+__attribute__((noinline, noclone)) void
+foo (__m128i *p, __m128i *q)
+{
+ __m128i a = _mm_loadu_si128 (p);
+ __m128i b = _mm_xor_si128 (a, v);
+ w = _mm256_cvtepu8_epi16 (a);
+ *q = b;
+}
+
+static void
+avx2_test (void)
+{
+ v = _mm_set1_epi8 (0x40);
+ __m128i c = _mm_set_epi8 (16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
+ __m128i d;
+ foo (&c, &d);
+ __m128i e = _mm_set_epi8 (0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49,
+ 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41);
+ __m256i f = _mm256_set_epi16 (16, 15, 14, 13, 12, 11, 10, 9,
+ 8, 7, 6, 5, 4, 3, 2, 1);
+ if (memcmp (&w, &f, sizeof (w)) != 0
+ || memcmp (&d, &e, sizeof (d)) != 0)
+ abort ();
+}
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile { target {! ia32 } } } */
+/* { dg-options "-O2 -fdump-rtl-ree" } */
+
+
+#include <stdint.h>
+
+typedef (*inst_t)(int64_t rdi, int64_t rsi, int64_t rdx);
+
+int16_t code[256];
+inst_t dispatch[256];
+
+void an_inst(int64_t rdi, int64_t rsi, int64_t rdx) {
+ rdx = code[rdx];
+ uint8_t inst = (uint8_t) rdx;
+ rdx >>= 8;
+ dispatch[inst](rdi, rsi, rdx);
+}
+
+int main(void) {
+ return 0;
+}
+
+/* { dg-final { scan-rtl-dump "copy needed" "ree" } } */
+/* { dg-final { cleanup-rtl-dump "ree" } } */
+