diff mbox

[4.8,branch] PATCH: PR middle-end/53623: [4.7/4.8 Regression] sign extension is effectively split into two x86-64 instructions

Message ID CAMe9rOqNO3Qbh_cVD1rRZFB84q+mpkgya-rREKyf9kcSsy+Rzg@mail.gmail.com
State New
Headers show

Commit Message

H.J. Lu Feb. 25, 2015, 5:28 p.m. UTC
On Tue, Feb 17, 2015 at 4:47 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Feb 16, 2015 at 5:24 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Mon, Feb 16, 2015 at 5:18 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>>> On Mon, Feb 16, 2015 at 05:15:02AM -0800, H.J. Lu wrote:
>>>> On Mon, Feb 16, 2015 at 4:30 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> > On Mon, Feb 16, 2015 at 1:35 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>>>> >> On Sun, Feb 15, 2015 at 12:53:39PM -0800, H.J. Lu wrote:
>>>> >>> This is a backport of the patch for PR middle-end/53623 plus all bug
>>>> >>> fixes caused by it.  Tested on Linux/x86-32, Linux/x86-64 and x32.  OK
>>>> >>> for 4.8 branch?
>>>> >>
>>>> >> What about PR64286 and PR63659, are you sure those aren't related?
>>>> >> I mean, they are on the 4.9 branch and I don't see why they couldn't affect
>>>> >> the 4.8 backport.
>>>> >>
>>>> >>         Jakub
>>>> >
>>>> > Fix for PR 63659 has been backported to 4.8 branch.  I will check if
>>>> > fix for PR 64286 is needed.
>>>> >
>>>> > --
>>>> > H.J.
>>>>
>>>> The fix for PR 64286 is an updated fix for PR 59754 which is caused by
>>>> the fix for PR 53623.  But the testcase in the fix for PR 64286 doesn't
>>>> fail on 4.8 branch + my backport of the fix for PR 53623 on Haswell.
>>>> I suggest
>>>>
>>>> 1. We go without my current backport and backport the fix for PR 64286
>>>> in a separate patch.  Or
>>>> 2. We go without my backport minus the backport of the PR 59754
>>>> fix and backport the fixes for PR 59754 plus PR 64286 in a separate patch
>>>
>>> I think keeping the branch broken is bad, even if we don't have a testcase
>>> that really fails, pressumably the issue is just latent.
>>> So I'd strongly prefer
>>> 3. Add the PR64286 fix to the patch being tested and commit only when it as
>>> whole is tested, as one commit.
>>>
>>
>> I will do that and restart the testing.
>>
>
> I tested it on x86-32, x86-64 and x32.  There are no regressions.
>

Here is the patch.  OK for 4.8 branch?

Thanks.
diff mbox

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 469ee31..01749de 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,94 @@ 
+2015-02-16  H.J. Lu  <hongjiu.lu@intel.com>
+
+	Backported from 4.9 branch
+	2015-01-13  Jakub Jelinek  <jakub@redhat.com>
+
+	PR rtl-optimization/64286
+	* ree.c (combine_reaching_defs): Move part of comment earlier,
+	remove !SCALAR_INT_MODE_P check.
+	(add_removable_extension): Don't add vector mode
+	extensions if all uses of the source register aren't the same
+	vector extensions.
+
+2015-02-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+	Backport from mainline:
+	2014-06-13  Jeff Law  <law@redhat.com>
+
+	PR rtl-optimization/61094
+	PR rtl-optimization/61446
+	* ree.c (combine_reaching_defs): Get the mode for the copy from
+	the extension insn rather than the defining insn.
+
+	2014-06-02  Jeff Law  <law@redhat.com>
+
+	PR rtl-optimization/61094
+	* ree.c (combine_reaching_defs): Do not reextend an insn if it
+	was marked as do_no_reextend.  If a copy is needed to eliminate
+	an extension, then mark it as do_not_reextend.
+
+	2014-02-14  Jeff Law  <law@redhat.com>
+
+	PR rtl-optimization/60131
+	* ree.c (get_extended_src_reg): New function.
+	(combine_reaching_defs): Use it rather than assuming location
+	of REG.
+	(find_and_remove_re): Verify first operand of extension is
+	a REG before adding the insns to the copy list.
+
+	2014-01-17  Jeff Law  <law@redhat.com>
+
+	* ree.c (combine_set_extension): Temporarily disable test for
+	changing number of hard registers.
+
+	2014-01-15  Jeff Law  <law@redhat.com>
+
+	PR tree-optimization/59747
+	* ree.c (find_and_remove_re): Properly handle case where a second
+	eliminated extension requires widening a copy created for elimination
+	of a prior extension.
+	(combine_set_extension): Ensure that the number of hard regs needed
+	for a destination register does not change when we widen it.
+
+	2014-01-10  Jeff Law  <law@redhat.com>
+
+	PR middle-end/59743
+	* ree.c (combine_reaching_defs): Ensure the defining statement
+	occurs before the extension when optimizing extensions with
+	different source and destination hard registers.
+
+	2014-01-10  Jakub Jelinek  <jakub@redhat.com>
+
+	PR rtl-optimization/59754
+	* ree.c (combine_reaching_defs): Disallow !SCALAR_INT_MODE_P
+	modes in the REGNO != REGNO case.
+
+	2014-01-08  Jeff Law  <law@redhat.com>
+
+	* ree.c (get_sub_rtx): New function, extracted from...
+	(merge_def_and_ext): Here.
+	(combine_reaching_defs): Use get_sub_rtx.
+
+	2014-01-07  Jeff Law  <law@redhat.com>
+
+	PR middle-end/53623
+	* ree.c (combine_set_extension): Handle case where source
+	and destination registers in an extension insn are different.
+	(combine_reaching_defs): Allow source and destination
+	registers in extension to be different under limited
+	circumstances.
+	(add_removable_extension): Remove restriction that the
+	source and destination registers in the extension are the
+	same.
+	(find_and_remove_re): Emit a copy from the extension's
+	destination to its source after the defining insn if
+	the source and destination registers are different.
+
+	2013-12-12  Jeff Law  <law@redhat.com>
+
+	* i386.md (simple LEA peephole2): Add missing mode to zero_extend
+	for zero-extended MULT simple LEA pattern.
+
 2015-02-12  Jakub Jelinek  <jakub@redhat.com>
 
 	Backported from mainline
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 372ae63..aabd6ec 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17265,7 +17265,7 @@ 
    && REGNO (operands[0]) == REGNO (operands[1])
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0)
-		   (zero_extend (ashift:SI (match_dup 1) (match_dup 2))))
+		   (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))
 	      (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
 
diff --git a/gcc/ree.c b/gcc/ree.c
index c7e106f..2db7b10 100644
--- a/gcc/ree.c
+++ b/gcc/ree.c
@@ -327,8 +327,30 @@  combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
   enum machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
-  rtx new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
   rtx new_set;
+  rtx cand_pat = PATTERN (cand->insn);
+
+  /* If the extension's source/destination registers are not the same
+     then we need to change the original load to reference the destination
+     of the extension.  Then we need to emit a copy from that destination
+     to the original destination of the load.  */
+  rtx new_reg;
+  bool copy_needed
+    = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
+  if (copy_needed)
+    new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat)));
+  else
+    new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
+
+#if 0
+  /* Rethinking test.  Temporarily disabled.  */
+  /* We're going to be widening the result of DEF_INSN, ensure that doing so
+     doesn't change the number of hard registers needed for the result.  */
+  if (HARD_REGNO_NREGS (REGNO (new_reg), cand->mode)
+      != HARD_REGNO_NREGS (REGNO (SET_DEST (*orig_set)),
+			   GET_MODE (SET_DEST (*orig_set))))
+	return false;
+#endif
 
   /* Merge constants by directly moving the constant into the register under
      some conditions.  Recall that RTL constants are sign-extended.  */
@@ -387,7 +409,8 @@  combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
       if (dump_file)
         {
           fprintf (dump_file,
-		   "Tentatively merged extension with definition:\n");
+		   "Tentatively merged extension with definition %s:\n",
+		   (copy_needed) ? "(copy needed)" : "");
           print_rtl_single (dump_file, curr_insn);
         }
       return true;
@@ -531,6 +554,8 @@  struct ATTRIBUTE_PACKED ext_modified
   /* Kind of modification of the insn.  */
   ENUM_BITFIELD(ext_modified_kind) kind : 2;
 
+  unsigned int do_not_reextend : 1;
+
   /* True if the insn is scheduled to be deleted.  */
   unsigned int deleted : 1;
 };
@@ -614,27 +639,21 @@  make_defs_and_copies_lists (rtx extend_insn, const_rtx set_pat,
   return ret;
 }
 
-/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
-   on the SET pattern.  */
-
-static bool
-merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
+/* If DEF_INSN has single SET expression, possibly buried inside
+   a PARALLEL, return the address of the SET expression, else
+   return NULL.  This is similar to single_set, except that
+   single_set allows multiple SETs when all but one is dead.  */
+static rtx *
+get_sub_rtx (rtx def_insn)
 {
-  enum machine_mode ext_src_mode;
-  enum rtx_code code;
-  rtx *sub_rtx;
-  rtx s_expr;
-  int i;
-
-  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
-  code = GET_CODE (PATTERN (def_insn));
-  sub_rtx = NULL;
+  enum rtx_code code = GET_CODE (PATTERN (def_insn));
+  rtx *sub_rtx = NULL;
 
   if (code == PARALLEL)
     {
-      for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
+      for (int i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
         {
-          s_expr = XVECEXP (PATTERN (def_insn), 0, i);
+          rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
           if (GET_CODE (s_expr) != SET)
             continue;
 
@@ -643,7 +662,7 @@  merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
           else
             {
               /* PARALLEL with multiple SETs.  */
-              return false;
+              return NULL;
             }
         }
     }
@@ -652,10 +671,27 @@  merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
   else
     {
       /* It is not a PARALLEL or a SET, what could it be ? */
-      return false;
+      return NULL;
     }
 
   gcc_assert (sub_rtx != NULL);
+  return sub_rtx;
+}
+
+/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
+   on the SET pattern.  */
+
+static bool
+merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
+{
+  enum machine_mode ext_src_mode;
+  rtx *sub_rtx;
+
+  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
+  sub_rtx = get_sub_rtx (def_insn);
+
+  if (sub_rtx == NULL)
+    return false;
 
   if (REG_P (SET_DEST (*sub_rtx))
       && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
@@ -683,6 +719,18 @@  merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
   return false;
 }
 
+/* Given SRC, which should be one or more extensions of a REG, strip
+   away the extensions and return the REG.  */
+
+static inline rtx
+get_extended_src_reg (rtx src)
+{
+  while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
+    src = XEXP (src, 0);
+  gcc_assert (REG_P (src));
+  return src;
+}
+
 /* This function goes through all reaching defs of the source
    of the candidate for elimination (CAND) and tries to combine
    the extension with the definition instruction.  The changes
@@ -709,6 +757,107 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
   if (!outcome)
     return false;
 
+  /* If the destination operand of the extension is a different
+     register than the source operand, then additional restrictions
+     are needed.  Note we have to handle cases where we have nested
+     extensions in the source operand.  */
+  bool copy_needed
+    = (REGNO (SET_DEST (PATTERN (cand->insn)))
+       != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn)))));
+  if (copy_needed)
+    {
+      /* Considering transformation of
+	 (set (reg1) (expression))
+	 ...
+	 (set (reg2) (any_extend (reg1)))
+
+	 into
+
+	 (set (reg2) (any_extend (expression)))
+	 (set (reg1) (reg2))
+	 ...  */
+
+      /* In theory we could handle more than one reaching def, it
+	 just makes the code to update the insn stream more complex.  */
+      if (state->defs_list.length () != 1)
+	return false;
+
+      /* We require the candidate not already be modified.  It may,
+	 for example have been changed from a (sign_extend (reg))
+	 into (zero_extend (sign_extend (reg))).
+
+	 Handling that case shouldn't be terribly difficult, but the code
+	 here and the code to emit copies would need auditing.  Until
+	 we see a need, this is the safe thing to do.  */
+      if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
+	return false;
+
+      /* There's only one reaching def.  */
+      rtx def_insn = state->defs_list[0];
+
+      /* The defining statement must not have been modified either.  */
+      if (state->modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE)
+	return false;
+
+      /* The defining statement and candidate insn must be in the same block.
+	 This is merely to keep the test for safety and updating the insn
+	 stream simple.  Also ensure that within the block the candidate
+	 follows the defining insn.  */
+      if (BLOCK_FOR_INSN (cand->insn) != BLOCK_FOR_INSN (def_insn)
+	  || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn))
+	return false;
+
+      /* If there is an overlap between the destination of DEF_INSN and
+	 CAND->insn, then this transformation is not safe.  Note we have
+	 to test in the widened mode.  */
+      rtx *dest_sub_rtx = get_sub_rtx (def_insn);
+      if (dest_sub_rtx == NULL
+	  || !REG_P (SET_DEST (*dest_sub_rtx)))
+	return false;
+
+      rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand->insn))),
+				 REGNO (SET_DEST (*dest_sub_rtx)));
+      if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand->insn))))
+	return false;
+
+      /* The destination register of the extension insn must not be
+	 used or set between the def_insn and cand->insn exclusive.  */
+      if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)),
+			      def_insn, cand->insn)
+	  || reg_set_between_p (SET_DEST (PATTERN (cand->insn)),
+				def_insn, cand->insn))
+	return false;
+
+      /* We must be able to copy between the two registers.   Generate,
+	 recognize and verify constraints of the copy.  Also fail if this
+	 generated more than one insn.
+
+         This generates garbage since we throw away the insn when we're
+	 done, only to recreate it later if this test was successful. 
+
+	 Make sure to get the mode from the extension (cand->insn).  This
+	 is different than in the code to emit the copy as we have not
+	 modified the defining insn yet.  */
+      start_sequence ();
+      rtx pat = PATTERN (cand->insn);
+      rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
+                                 REGNO (XEXP (SET_SRC (pat), 0)));
+      rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
+                                 REGNO (SET_DEST (pat)));
+      emit_move_insn (new_dst, new_src);
+
+      rtx insn = get_insns();
+      end_sequence ();
+      if (NEXT_INSN (insn))
+	return false;
+      if (recog_memoized (insn) == -1)
+	return false;
+      extract_insn (insn);
+      if (!constrain_operands (1))
+	return false;
+    }
+
+
   /* If cand->insn has been already modified, update cand->mode to a wider
      mode if possible, or punt.  */
   if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
@@ -772,11 +921,15 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
             fprintf (dump_file, "All merges were successful.\n");
 
 	  FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
-	    if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
-	      state->modified[INSN_UID (def_insn)].kind
-		= (cand->code == ZERO_EXTEND
-		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT);
+	    {
+	      ext_modified *modified = &state->modified[INSN_UID (def_insn)];
+	      if (modified->kind == EXT_MODIFIED_NONE)
+		modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
+						            : EXT_MODIFIED_SEXT);
 
+	      if (copy_needed)
+		modified->do_not_reextend = 1;
+	    }
           return true;
         }
       else
@@ -825,8 +978,7 @@  add_removable_extension (const_rtx expr, rtx insn,
 
   if (REG_P (dest)
       && (code == SIGN_EXTEND || code == ZERO_EXTEND)
-      && REG_P (XEXP (src, 0))
-      && REGNO (dest) == REGNO (XEXP (src, 0)))
+      && REG_P (XEXP (src, 0)))
     {
       struct df_link *defs, *def;
       ext_cand *cand;
@@ -848,6 +1000,7 @@  add_removable_extension (const_rtx expr, rtx insn,
 	 different extension.  FIXME: this obviously can be improved.  */
       for (def = defs; def; def = def->next)
 	if ((idx = def_map[INSN_UID(DF_REF_INSN (def->ref))])
+	    && idx != -1U
 	    && (cand = &(*insn_list)[idx - 1])
 	    && cand->code != code)
 	  {
@@ -859,6 +1012,57 @@  add_removable_extension (const_rtx expr, rtx insn,
 	      }
 	    return;
 	  }
+	/* For vector mode extensions, ensure that all uses of the
+	   XEXP (src, 0) register are the same extension (both code
+	   and to which mode), as unlike integral extensions lowpart
+	   subreg of the sign/zero extended register are not equal
+	   to the original register, so we have to change all uses or
+	   none.  */
+	else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0))))
+	  {
+	    if (idx == 0)
+	      {
+		struct df_link *ref_chain, *ref_link;
+
+		ref_chain = DF_REF_CHAIN (def->ref);
+		for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+		  {
+		    if (ref_link->ref == NULL
+			|| DF_REF_INSN_INFO (ref_link->ref) == NULL)
+		      {
+			idx = -1U;
+			break;
+		      }
+		    rtx use_insn = DF_REF_INSN (ref_link->ref);
+		    const_rtx use_set;
+		    if (use_insn == insn || DEBUG_INSN_P (use_insn))
+		      continue;
+		    if (!(use_set = single_set (use_insn))
+			|| !REG_P (SET_DEST (use_set))
+			|| GET_MODE (SET_DEST (use_set)) != GET_MODE (dest)
+			|| GET_CODE (SET_SRC (use_set)) != code
+			|| !rtx_equal_p (XEXP (SET_SRC (use_set), 0),
+					 XEXP (src, 0)))
+		      {
+			idx = -1U;
+			break;
+		      }
+		  }
+		if (idx == -1U)
+		  def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
+	      }
+	    if (idx == -1U)
+	      {
+		if (dump_file)
+		  {
+		    fprintf (dump_file, "Cannot eliminate extension:\n");
+		    print_rtl_single (dump_file, insn);
+		    fprintf (dump_file,
+			     " because some vector uses aren't extension\n");
+		  }
+		return;
+	      }
+	  }
 
       /* Then add the candidate to the list and insert the reaching definitions
          into the definition map.  */
@@ -910,6 +1114,7 @@  find_and_remove_re (void)
   int num_re_opportunities = 0, num_realized = 0, i;
   vec<ext_cand> reinsn_list;
   vec<rtx> reinsn_del_list;
+  vec<rtx> reinsn_copy_list;
   ext_state state;
 
   /* Construct DU chain to get all reaching definitions of each
@@ -921,6 +1126,7 @@  find_and_remove_re (void)
 
   max_insn_uid = get_max_uid ();
   reinsn_del_list.create (0);
+  reinsn_copy_list.create (0);
   reinsn_list = find_removable_extensions ();
   state.defs_list.create (0);
   state.copies_list.create (0);
@@ -947,17 +1153,62 @@  find_and_remove_re (void)
           if (dump_file)
             fprintf (dump_file, "Eliminated the extension.\n");
           num_realized++;
-          reinsn_del_list.safe_push (curr_cand->insn);
+	  /* If the RHS of the current candidate is not (extend (reg)), then
+	     we do not allow the optimization of extensions where
+	     the source and destination registers do not match.  Thus
+	     checking REG_P here is correct.  */
+	  if (REG_P (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))
+	      && (REGNO (SET_DEST (PATTERN (curr_cand->insn)))
+		  != REGNO (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))))
+	    {
+              reinsn_copy_list.safe_push (curr_cand->insn);
+              reinsn_copy_list.safe_push (state.defs_list[0]);
+	    }
+	  reinsn_del_list.safe_push (curr_cand->insn);
 	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
         }
     }
 
+  /* The copy list contains pairs of insns which describe copies we
+     need to insert into the INSN stream.
+
+     The first insn in each pair is the extension insn, from which
+     we derive the source and destination of the copy.
+
+     The second insn in each pair is the memory reference where the
+     extension will ultimately happen.  We emit the new copy
+     immediately after this insn.
+
+     It may first appear that the arguments for the copy are reversed.
+     Remember that the memory reference will be changed to refer to the
+     destination of the extention.  So we're actually emitting a copy
+     from the new destination to the old destination.  */
+  for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
+    {
+      rtx curr_insn = reinsn_copy_list[i];
+      rtx def_insn = reinsn_copy_list[i + 1];
+
+      /* Use the mode of the destination of the defining insn
+	 for the mode of the copy.  This is necessary if the
+	 defining insn was used to eliminate a second extension
+	 that was wider than the first.  */
+      rtx sub_rtx = *get_sub_rtx (def_insn);
+      rtx pat = PATTERN (curr_insn);
+      rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
+				 REGNO (XEXP (SET_SRC (pat), 0)));
+      rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
+				 REGNO (SET_DEST (pat)));
+      rtx set = gen_rtx_SET (VOIDmode, new_dst, new_src);
+      emit_insn_after (set, def_insn);
+    }
+
   /* Delete all useless extensions here in one sweep.  */
   FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
     delete_insn (curr_insn);
 
   reinsn_list.release ();
   reinsn_del_list.release ();
+  reinsn_copy_list.release ();
   state.defs_list.release ();
   state.copies_list.release ();
   state.modified_list.release ();
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ff0934b..1403a91 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,40 @@ 
+2015-02-16  H.J. Lu  <hongjiu.lu@intel.com>
+
+	Backported from 4.9 branch
+	PR rtl-optimization/64286
+	2015-01-13  Jakub Jelinek  <jakub@redhat.com>
+
+	* gcc.target/i386/avx2-pr64286.c: New test.
+
+2015-02-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+	Backport from mainline:
+	2014-06-02  Jeff Law  <law@redhat.com>
+
+	PR rtl-optimization/61094
+	* g++.dg/pr61094: New test.
+
+	2014-02-14  Jeff Law  <law@redhat.com>
+
+	PR rtl-optimization/60131
+	* g++.dg/torture/pr60131.C: New test.
+
+	2014-01-15  Jeff Law  <law@redhat.com>
+
+	PR tree-optimization/59747
+	* gcc.c-torture/execute/pr59747.c: New test.
+
+	2014-01-10  Jeff Law  <law@redhat.com>
+
+	PR middle-end/59743
+	* gcc.c-torture/compile/pr59743.c: New test.
+
+	Backport from mainline:
+	2014-01-07  Jeff Law  <law@redhat.com>
+
+	PR middle-end/53623
+	* gcc.target/i386/pr53623.c: New test.
+
 2015-02-13  Mikael Morin  <mikael@gcc.gnu.org>
 
 	PR fortran/63744
diff --git a/gcc/testsuite/g++.dg/pr61094.C b/gcc/testsuite/g++.dg/pr61094.C
new file mode 100644
index 0000000..35adc25
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr61094.C
@@ -0,0 +1,31 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" }  */
+
+template <typename> struct A {
+  unsigned _width, _height, _depth, _spectrum;
+  template <typename t> A(t p1) {
+    int a = p1.size();
+    if (a) {
+      _width = p1._width;
+      _depth = _height = _spectrum = p1._spectrum;
+    }
+  }
+  long size() { return (long)_width * _height * _depth * _spectrum; }
+};
+
+int d;
+void fn1(void *);
+A<int> *fn2();
+void fn3() {
+  int b;
+  for (;;) {
+    A<char> c(*fn2());
+    fn1(&c);
+    if (d || !b)
+      throw;
+  }
+}
+
+
+
+
diff --git a/gcc/testsuite/g++.dg/torture/pr60131.C b/gcc/testsuite/g++.dg/torture/pr60131.C
new file mode 100644
index 0000000..23dde31
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr60131.C
@@ -0,0 +1,23 @@ 
+// { dg-do compile }
+struct A { short a; };
+int **b;
+unsigned long c;
+
+bool foo ();
+unsigned bar (unsigned i);
+extern void baz () __attribute__((noreturn));
+
+int *
+test (unsigned x, struct A *y)
+{
+  unsigned v;
+  if (foo () || y[x].a == -1)
+    {
+      c = bar (x);
+      return 0;
+    }
+  v = y[x].a;
+  if (v >= 23)
+    baz ();
+  return b[v];
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr59743.c b/gcc/testsuite/gcc.c-torture/compile/pr59743.c
new file mode 100644
index 0000000..8dadba5
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr59743.c
@@ -0,0 +1,23 @@ 
+/* PR middle-end/59743 */
+
+typedef union {
+  long all;
+  struct {
+    int low;
+    int high;
+  } s;
+} udwords;
+int a, b, c, d;
+void __udivmoddi4() {
+  udwords r;
+  d = __builtin_clz(0);
+  r.s.low = 0;
+  for (; d; --d) {
+    r.s.high = r.s.high << 1 | r.s.low >> a;
+    r.s.low = r.s.low << b >> 1;
+    int s = -r.all;
+    c = s;
+    r.all--;
+  }
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr59747.c b/gcc/testsuite/gcc.c-torture/execute/pr59747.c
new file mode 100644
index 0000000..d45a908
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr59747.c
@@ -0,0 +1,27 @@ 
+extern void abort (void);
+extern void exit (int);
+
+int a[6], b, c = 1, d;
+short e;
+
+int __attribute__ ((noinline))
+fn1 (int p)
+{
+  b = a[p];
+}
+
+int
+main ()
+{
+  if (sizeof (long long) != 8)
+    exit (0);
+
+  a[0] = 1;
+  if (c)
+    e--;
+  d = e;
+  long long f = e;
+  if (fn1 ((f >> 56) & 1) != 0)
+    abort ();
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr64286.c b/gcc/testsuite/gcc.target/i386/avx2-pr64286.c
new file mode 100644
index 0000000..2edb321
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr64286.c
@@ -0,0 +1,37 @@ 
+/* PR rtl-optimization/64286 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-require-effective-target avx2 } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <x86intrin.h>
+#include "avx2-check.h"
+
+__m128i v;
+__m256i w;
+
+__attribute__((noinline, noclone)) void
+foo (__m128i *p, __m128i *q)
+{
+  __m128i a = _mm_loadu_si128 (p);
+  __m128i b = _mm_xor_si128 (a, v);
+  w = _mm256_cvtepu8_epi16 (a);
+  *q = b;
+}
+
+static void
+avx2_test (void)
+{
+  v = _mm_set1_epi8 (0x40);
+  __m128i c = _mm_set_epi8 (16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
+  __m128i d;
+  foo (&c, &d);
+  __m128i e = _mm_set_epi8 (0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49,
+			    0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41);
+  __m256i f = _mm256_set_epi16 (16, 15, 14, 13, 12, 11, 10, 9,
+				8, 7, 6, 5, 4, 3, 2, 1);
+  if (memcmp (&w, &f, sizeof (w)) != 0
+      || memcmp (&d, &e, sizeof (d)) != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr53623.c b/gcc/testsuite/gcc.target/i386/pr53623.c
new file mode 100644
index 0000000..35c578b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr53623.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile { target {! ia32 } } } */
+/* { dg-options "-O2 -fdump-rtl-ree" } */
+
+
+#include <stdint.h>
+
+typedef (*inst_t)(int64_t rdi, int64_t rsi, int64_t rdx);
+
+int16_t code[256];
+inst_t dispatch[256];
+
+void an_inst(int64_t rdi, int64_t rsi, int64_t rdx) {
+  rdx = code[rdx];
+  uint8_t inst = (uint8_t) rdx;
+  rdx >>= 8;
+  dispatch[inst](rdi, rsi, rdx);
+}
+
+int main(void) {
+  return 0;
+}
+
+/* { dg-final { scan-rtl-dump "copy needed" "ree" } } */
+/* { dg-final { cleanup-rtl-dump "ree" } } */
+