diff mbox

[4.8,branch] PATCH: PR middle-end/53623: [4.7/4.8 Regression] sign extension is effectively split into two x86-64 instructions

Message ID 20150215205339.GA26523@gmail.com
State New
Headers show

Commit Message

H.J. Lu Feb. 15, 2015, 8:53 p.m. UTC
Hi,

This is a backport of the patch for PR middle-end/53623 plus all bug
fixes caused by it.  Tested on Linux/x86-32, Linux/x86-64 and x32.  OK
for 4.8 branch?

Thanks.


H.J.
---

Comments

Richard Biener Feb. 16, 2015, 9:29 a.m. UTC | #1
On Sun, Feb 15, 2015 at 9:53 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> Hi,
>
> This is a backport of the patch for PR middle-end/53623 plus all bug
> fixes caused by it.  Tested on Linux/x86-32, Linux/x86-64 and x32.  OK
> for 4.8 branch?

Ok if nobody objects within 24h (you changed the bug to wrong-code
which is why I am considering it - a missed-optimization fix this big
shouldn't be backported).

Richard.

> Thanks.
>
>
> H.J.
> ---
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 469ee31..44bf322 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,82 @@
> +2015-02-15  H.J. Lu  <hongjiu.lu@intel.com>
> +
> +       Backport from mainline:
> +       2014-06-13  Jeff Law  <law@redhat.com>
> +
> +       PR rtl-optimization/61094
> +       PR rtl-optimization/61446
> +       * ree.c (combine_reaching_defs): Get the mode for the copy from
> +       the extension insn rather than the defining insn.
> +
> +       2014-06-02  Jeff Law  <law@redhat.com>
> +
> +       PR rtl-optimization/61094
> +       * ree.c (combine_reaching_defs): Do not reextend an insn if it
> +       was marked as do_no_reextend.  If a copy is needed to eliminate
> +       an extension, then mark it as do_not_reextend.
> +
> +       2014-02-14  Jeff Law  <law@redhat.com>
> +
> +       PR rtl-optimization/60131
> +       * ree.c (get_extended_src_reg): New function.
> +       (combine_reaching_defs): Use it rather than assuming location
> +       of REG.
> +       (find_and_remove_re): Verify first operand of extension is
> +       a REG before adding the insns to the copy list.
> +
> +       2014-01-17  Jeff Law  <law@redhat.com>
> +
> +       * ree.c (combine_set_extension): Temporarily disable test for
> +       changing number of hard registers.
> +
> +       2014-01-15  Jeff Law  <law@redhat.com>
> +
> +       PR tree-optimization/59747
> +       * ree.c (find_and_remove_re): Properly handle case where a second
> +       eliminated extension requires widening a copy created for elimination
> +       of a prior extension.
> +       (combine_set_extension): Ensure that the number of hard regs needed
> +       for a destination register does not change when we widen it.
> +
> +       2014-01-10  Jeff Law  <law@redhat.com>
> +
> +       PR middle-end/59743
> +       * ree.c (combine_reaching_defs): Ensure the defining statement
> +       occurs before the extension when optimizing extensions with
> +       different source and destination hard registers.
> +
> +       2014-01-10  Jakub Jelinek  <jakub@redhat.com>
> +
> +       PR rtl-optimization/59754
> +       * ree.c (combine_reaching_defs): Disallow !SCALAR_INT_MODE_P
> +       modes in the REGNO != REGNO case.
> +
> +       2014-01-08  Jeff Law  <law@redhat.com>
> +
> +       * ree.c (get_sub_rtx): New function, extracted from...
> +       (merge_def_and_ext): Here.
> +       (combine_reaching_defs): Use get_sub_rtx.
> +
> +       2014-01-07  Jeff Law  <law@redhat.com>
> +
> +       PR middle-end/53623
> +       * ree.c (combine_set_extension): Handle case where source
> +       and destination registers in an extension insn are different.
> +       (combine_reaching_defs): Allow source and destination
> +       registers in extension to be different under limited
> +       circumstances.
> +       (add_removable_extension): Remove restriction that the
> +       source and destination registers in the extension are the
> +       same.
> +       (find_and_remove_re): Emit a copy from the extension's
> +       destination to its source after the defining insn if
> +       the source and destination registers are different.
> +
> +       2013-12-12  Jeff Law  <law@redhat.com>
> +
> +       * i386.md (simple LEA peephole2): Add missing mode to zero_extend
> +       for zero-extended MULT simple LEA pattern.
> +
>  2015-02-12  Jakub Jelinek  <jakub@redhat.com>
>
>         Backported from mainline
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 372ae63..aabd6ec 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -17265,7 +17265,7 @@
>     && REGNO (operands[0]) == REGNO (operands[1])
>     && peep2_regno_dead_p (0, FLAGS_REG)"
>    [(parallel [(set (match_dup 0)
> -                  (zero_extend (ashift:SI (match_dup 1) (match_dup 2))))
> +                  (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))
>               (clobber (reg:CC FLAGS_REG))])]
>    "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
>
> diff --git a/gcc/ree.c b/gcc/ree.c
> index c7e106f..bc566ad 100644
> --- a/gcc/ree.c
> +++ b/gcc/ree.c
> @@ -327,8 +327,30 @@ combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
>  {
>    rtx orig_src = SET_SRC (*orig_set);
>    enum machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
> -  rtx new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
>    rtx new_set;
> +  rtx cand_pat = PATTERN (cand->insn);
> +
> +  /* If the extension's source/destination registers are not the same
> +     then we need to change the original load to reference the destination
> +     of the extension.  Then we need to emit a copy from that destination
> +     to the original destination of the load.  */
> +  rtx new_reg;
> +  bool copy_needed
> +    = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
> +  if (copy_needed)
> +    new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat)));
> +  else
> +    new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
> +
> +#if 0
> +  /* Rethinking test.  Temporarily disabled.  */
> +  /* We're going to be widening the result of DEF_INSN, ensure that doing so
> +     doesn't change the number of hard registers needed for the result.  */
> +  if (HARD_REGNO_NREGS (REGNO (new_reg), cand->mode)
> +      != HARD_REGNO_NREGS (REGNO (SET_DEST (*orig_set)),
> +                          GET_MODE (SET_DEST (*orig_set))))
> +       return false;
> +#endif
>
>    /* Merge constants by directly moving the constant into the register under
>       some conditions.  Recall that RTL constants are sign-extended.  */
> @@ -387,7 +409,8 @@ combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
>        if (dump_file)
>          {
>            fprintf (dump_file,
> -                  "Tentatively merged extension with definition:\n");
> +                  "Tentatively merged extension with definition %s:\n",
> +                  (copy_needed) ? "(copy needed)" : "");
>            print_rtl_single (dump_file, curr_insn);
>          }
>        return true;
> @@ -531,6 +554,8 @@ struct ATTRIBUTE_PACKED ext_modified
>    /* Kind of modification of the insn.  */
>    ENUM_BITFIELD(ext_modified_kind) kind : 2;
>
> +  unsigned int do_not_reextend : 1;
> +
>    /* True if the insn is scheduled to be deleted.  */
>    unsigned int deleted : 1;
>  };
> @@ -614,27 +639,21 @@ make_defs_and_copies_lists (rtx extend_insn, const_rtx set_pat,
>    return ret;
>  }
>
> -/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
> -   on the SET pattern.  */
> -
> -static bool
> -merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
> +/* If DEF_INSN has single SET expression, possibly buried inside
> +   a PARALLEL, return the address of the SET expression, else
> +   return NULL.  This is similar to single_set, except that
> +   single_set allows multiple SETs when all but one is dead.  */
> +static rtx *
> +get_sub_rtx (rtx def_insn)
>  {
> -  enum machine_mode ext_src_mode;
> -  enum rtx_code code;
> -  rtx *sub_rtx;
> -  rtx s_expr;
> -  int i;
> -
> -  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
> -  code = GET_CODE (PATTERN (def_insn));
> -  sub_rtx = NULL;
> +  enum rtx_code code = GET_CODE (PATTERN (def_insn));
> +  rtx *sub_rtx = NULL;
>
>    if (code == PARALLEL)
>      {
> -      for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
> +      for (int i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
>          {
> -          s_expr = XVECEXP (PATTERN (def_insn), 0, i);
> +          rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
>            if (GET_CODE (s_expr) != SET)
>              continue;
>
> @@ -643,7 +662,7 @@ merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
>            else
>              {
>                /* PARALLEL with multiple SETs.  */
> -              return false;
> +              return NULL;
>              }
>          }
>      }
> @@ -652,10 +671,27 @@ merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
>    else
>      {
>        /* It is not a PARALLEL or a SET, what could it be ? */
> -      return false;
> +      return NULL;
>      }
>
>    gcc_assert (sub_rtx != NULL);
> +  return sub_rtx;
> +}
> +
> +/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
> +   on the SET pattern.  */
> +
> +static bool
> +merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
> +{
> +  enum machine_mode ext_src_mode;
> +  rtx *sub_rtx;
> +
> +  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
> +  sub_rtx = get_sub_rtx (def_insn);
> +
> +  if (sub_rtx == NULL)
> +    return false;
>
>    if (REG_P (SET_DEST (*sub_rtx))
>        && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
> @@ -683,6 +719,18 @@ merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
>    return false;
>  }
>
> +/* Given SRC, which should be one or more extensions of a REG, strip
> +   away the extensions and return the REG.  */
> +
> +static inline rtx
> +get_extended_src_reg (rtx src)
> +{
> +  while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
> +    src = XEXP (src, 0);
> +  gcc_assert (REG_P (src));
> +  return src;
> +}
> +
>  /* This function goes through all reaching defs of the source
>     of the candidate for elimination (CAND) and tries to combine
>     the extension with the definition instruction.  The changes
> @@ -709,6 +757,108 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>    if (!outcome)
>      return false;
>
> +  /* If the destination operand of the extension is a different
> +     register than the source operand, then additional restrictions
> +     are needed.  Note we have to handle cases where we have nested
> +     extensions in the source operand.  */
> +  bool copy_needed
> +    = (REGNO (SET_DEST (PATTERN (cand->insn)))
> +       != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn)))));
> +  if (copy_needed)
> +    {
> +      /* In theory we could handle more than one reaching def, it
> +        just makes the code to update the insn stream more complex.  */
> +      if (state->defs_list.length () != 1)
> +       return false;
> +
> +      /* We require the candidate not already be modified.  It may,
> +        for example have been changed from a (sign_extend (reg))
> +        into (zero_extend (sign_extend (reg))).
> +
> +        Handling that case shouldn't be terribly difficult, but the code
> +        here and the code to emit copies would need auditing.  Until
> +        we see a need, this is the safe thing to do.  */
> +      if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
> +       return false;
> +
> +      /* Transformation of
> +        (set (reg1) (expression))
> +        (set (reg2) (any_extend (reg1)))
> +        into
> +        (set (reg2) (any_extend (expression)))
> +        (set (reg1) (reg2))
> +        is only valid for scalar integral modes, as it relies on the low
> +        subreg of reg1 to have the value of (expression), which is not true
> +        e.g. for vector modes.  */
> +      if (!SCALAR_INT_MODE_P (GET_MODE (SET_DEST (PATTERN (cand->insn)))))
> +       return false;
> +
> +      /* There's only one reaching def.  */
> +      rtx def_insn = state->defs_list[0];
> +
> +      /* The defining statement must not have been modified either.  */
> +      if (state->modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE)
> +       return false;
> +
> +      /* The defining statement and candidate insn must be in the same block.
> +        This is merely to keep the test for safety and updating the insn
> +        stream simple.  Also ensure that within the block the candidate
> +        follows the defining insn.  */
> +      if (BLOCK_FOR_INSN (cand->insn) != BLOCK_FOR_INSN (def_insn)
> +         || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn))
> +       return false;
> +
> +      /* If there is an overlap between the destination of DEF_INSN and
> +        CAND->insn, then this transformation is not safe.  Note we have
> +        to test in the widened mode.  */
> +      rtx *dest_sub_rtx = get_sub_rtx (def_insn);
> +      if (dest_sub_rtx == NULL
> +         || !REG_P (SET_DEST (*dest_sub_rtx)))
> +       return false;
> +
> +      rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand->insn))),
> +                                REGNO (SET_DEST (*dest_sub_rtx)));
> +      if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand->insn))))
> +       return false;
> +
> +      /* The destination register of the extension insn must not be
> +        used or set between the def_insn and cand->insn exclusive.  */
> +      if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)),
> +                             def_insn, cand->insn)
> +         || reg_set_between_p (SET_DEST (PATTERN (cand->insn)),
> +                               def_insn, cand->insn))
> +       return false;
> +
> +      /* We must be able to copy between the two registers.   Generate,
> +        recognize and verify constraints of the copy.  Also fail if this
> +        generated more than one insn.
> +
> +         This generates garbage since we throw away the insn when we're
> +        done, only to recreate it later if this test was successful.
> +
> +        Make sure to get the mode from the extension (cand->insn).  This
> +        is different than in the code to emit the copy as we have not
> +        modified the defining insn yet.  */
> +      start_sequence ();
> +      rtx pat = PATTERN (cand->insn);
> +      rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
> +                                 REGNO (XEXP (SET_SRC (pat), 0)));
> +      rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
> +                                 REGNO (SET_DEST (pat)));
> +      emit_move_insn (new_dst, new_src);
> +
> +      rtx insn = get_insns();
> +      end_sequence ();
> +      if (NEXT_INSN (insn))
> +       return false;
> +      if (recog_memoized (insn) == -1)
> +       return false;
> +      extract_insn (insn);
> +      if (!constrain_operands (1))
> +       return false;
> +    }
> +
> +
>    /* If cand->insn has been already modified, update cand->mode to a wider
>       mode if possible, or punt.  */
>    if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
> @@ -772,11 +922,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>              fprintf (dump_file, "All merges were successful.\n");
>
>           FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
> -           if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
> -             state->modified[INSN_UID (def_insn)].kind
> -               = (cand->code == ZERO_EXTEND
> -                  ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT);
> +           {
> +             ext_modified *modified = &state->modified[INSN_UID (def_insn)];
> +             if (modified->kind == EXT_MODIFIED_NONE)
> +               modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
> +                                                           : EXT_MODIFIED_SEXT);
>
> +             if (copy_needed)
> +               modified->do_not_reextend = 1;
> +           }
>            return true;
>          }
>        else
> @@ -825,8 +979,7 @@ add_removable_extension (const_rtx expr, rtx insn,
>
>    if (REG_P (dest)
>        && (code == SIGN_EXTEND || code == ZERO_EXTEND)
> -      && REG_P (XEXP (src, 0))
> -      && REGNO (dest) == REGNO (XEXP (src, 0)))
> +      && REG_P (XEXP (src, 0)))
>      {
>        struct df_link *defs, *def;
>        ext_cand *cand;
> @@ -910,6 +1063,7 @@ find_and_remove_re (void)
>    int num_re_opportunities = 0, num_realized = 0, i;
>    vec<ext_cand> reinsn_list;
>    vec<rtx> reinsn_del_list;
> +  vec<rtx> reinsn_copy_list;
>    ext_state state;
>
>    /* Construct DU chain to get all reaching definitions of each
> @@ -921,6 +1075,7 @@ find_and_remove_re (void)
>
>    max_insn_uid = get_max_uid ();
>    reinsn_del_list.create (0);
> +  reinsn_copy_list.create (0);
>    reinsn_list = find_removable_extensions ();
>    state.defs_list.create (0);
>    state.copies_list.create (0);
> @@ -947,17 +1102,62 @@ find_and_remove_re (void)
>            if (dump_file)
>              fprintf (dump_file, "Eliminated the extension.\n");
>            num_realized++;
> -          reinsn_del_list.safe_push (curr_cand->insn);
> +         /* If the RHS of the current candidate is not (extend (reg)), then
> +            we do not allow the optimization of extensions where
> +            the source and destination registers do not match.  Thus
> +            checking REG_P here is correct.  */
> +         if (REG_P (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))
> +             && (REGNO (SET_DEST (PATTERN (curr_cand->insn)))
> +                 != REGNO (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))))
> +           {
> +              reinsn_copy_list.safe_push (curr_cand->insn);
> +              reinsn_copy_list.safe_push (state.defs_list[0]);
> +           }
> +         reinsn_del_list.safe_push (curr_cand->insn);
>           state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
>          }
>      }
>
> +  /* The copy list contains pairs of insns which describe copies we
> +     need to insert into the INSN stream.
> +
> +     The first insn in each pair is the extension insn, from which
> +     we derive the source and destination of the copy.
> +
> +     The second insn in each pair is the memory reference where the
> +     extension will ultimately happen.  We emit the new copy
> +     immediately after this insn.
> +
> +     It may first appear that the arguments for the copy are reversed.
> +     Remember that the memory reference will be changed to refer to the
> +     destination of the extention.  So we're actually emitting a copy
> +     from the new destination to the old destination.  */
> +  for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
> +    {
> +      rtx curr_insn = reinsn_copy_list[i];
> +      rtx def_insn = reinsn_copy_list[i + 1];
> +
> +      /* Use the mode of the destination of the defining insn
> +        for the mode of the copy.  This is necessary if the
> +        defining insn was used to eliminate a second extension
> +        that was wider than the first.  */
> +      rtx sub_rtx = *get_sub_rtx (def_insn);
> +      rtx pat = PATTERN (curr_insn);
> +      rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
> +                                REGNO (XEXP (SET_SRC (pat), 0)));
> +      rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
> +                                REGNO (SET_DEST (pat)));
> +      rtx set = gen_rtx_SET (VOIDmode, new_dst, new_src);
> +      emit_insn_after (set, def_insn);
> +    }
> +
>    /* Delete all useless extensions here in one sweep.  */
>    FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
>      delete_insn (curr_insn);
>
>    reinsn_list.release ();
>    reinsn_del_list.release ();
> +  reinsn_copy_list.release ();
>    state.defs_list.release ();
>    state.copies_list.release ();
>    state.modified_list.release ();
> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> index ff0934b..a408fed 100644
> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -1,3 +1,32 @@
> +2015-02-15  H.J. Lu  <hongjiu.lu@intel.com>
> +
> +       Backport from mainline:
> +       2014-06-02  Jeff Law  <law@redhat.com>
> +
> +       PR rtl-optimization/61094
> +       * g++.dg/pr61094: New test.
> +
> +       2014-02-14  Jeff Law  <law@redhat.com>
> +
> +       PR rtl-optimization/60131
> +       * g++.dg/torture/pr60131.C: New test.
> +
> +       2014-01-15  Jeff Law  <law@redhat.com>
> +
> +       PR tree-optimization/59747
> +       * gcc.c-torture/execute/pr59747.c: New test.
> +
> +       2014-01-10  Jeff Law  <law@redhat.com>
> +
> +       PR middle-end/59743
> +       * gcc.c-torture/compile/pr59743.c: New test.
> +
> +       Backport from mainline:
> +       2014-01-07  Jeff Law  <law@redhat.com>
> +
> +       PR middle-end/53623
> +       * gcc.target/i386/pr53623.c: New test.
> +
>  2015-02-13  Mikael Morin  <mikael@gcc.gnu.org>
>
>         PR fortran/63744
> diff --git a/gcc/testsuite/g++.dg/pr61094.C b/gcc/testsuite/g++.dg/pr61094.C
> new file mode 100644
> index 0000000..35adc25
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr61094.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" }  */
> +
> +template <typename> struct A {
> +  unsigned _width, _height, _depth, _spectrum;
> +  template <typename t> A(t p1) {
> +    int a = p1.size();
> +    if (a) {
> +      _width = p1._width;
> +      _depth = _height = _spectrum = p1._spectrum;
> +    }
> +  }
> +  long size() { return (long)_width * _height * _depth * _spectrum; }
> +};
> +
> +int d;
> +void fn1(void *);
> +A<int> *fn2();
> +void fn3() {
> +  int b;
> +  for (;;) {
> +    A<char> c(*fn2());
> +    fn1(&c);
> +    if (d || !b)
> +      throw;
> +  }
> +}
> +
> +
> +
> +
> diff --git a/gcc/testsuite/g++.dg/torture/pr60131.C b/gcc/testsuite/g++.dg/torture/pr60131.C
> new file mode 100644
> index 0000000..23dde31
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/torture/pr60131.C
> @@ -0,0 +1,23 @@
> +// { dg-do compile }
> +struct A { short a; };
> +int **b;
> +unsigned long c;
> +
> +bool foo ();
> +unsigned bar (unsigned i);
> +extern void baz () __attribute__((noreturn));
> +
> +int *
> +test (unsigned x, struct A *y)
> +{
> +  unsigned v;
> +  if (foo () || y[x].a == -1)
> +    {
> +      c = bar (x);
> +      return 0;
> +    }
> +  v = y[x].a;
> +  if (v >= 23)
> +    baz ();
> +  return b[v];
> +}
> diff --git a/gcc/testsuite/gcc.c-torture/compile/pr59743.c b/gcc/testsuite/gcc.c-torture/compile/pr59743.c
> new file mode 100644
> index 0000000..8dadba5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/compile/pr59743.c
> @@ -0,0 +1,23 @@
> +/* PR middle-end/59743 */
> +
> +typedef union {
> +  long all;
> +  struct {
> +    int low;
> +    int high;
> +  } s;
> +} udwords;
> +int a, b, c, d;
> +void __udivmoddi4() {
> +  udwords r;
> +  d = __builtin_clz(0);
> +  r.s.low = 0;
> +  for (; d; --d) {
> +    r.s.high = r.s.high << 1 | r.s.low >> a;
> +    r.s.low = r.s.low << b >> 1;
> +    int s = -r.all;
> +    c = s;
> +    r.all--;
> +  }
> +}
> +
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr59747.c b/gcc/testsuite/gcc.c-torture/execute/pr59747.c
> new file mode 100644
> index 0000000..d45a908
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr59747.c
> @@ -0,0 +1,27 @@
> +extern void abort (void);
> +extern void exit (int);
> +
> +int a[6], b, c = 1, d;
> +short e;
> +
> +int __attribute__ ((noinline))
> +fn1 (int p)
> +{
> +  b = a[p];
> +}
> +
> +int
> +main ()
> +{
> +  if (sizeof (long long) != 8)
> +    exit (0);
> +
> +  a[0] = 1;
> +  if (c)
> +    e--;
> +  d = e;
> +  long long f = e;
> +  if (fn1 ((f >> 56) & 1) != 0)
> +    abort ();
> +  exit (0);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr53623.c b/gcc/testsuite/gcc.target/i386/pr53623.c
> new file mode 100644
> index 0000000..35c578b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr53623.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile { target {! ia32 } } } */
> +/* { dg-options "-O2 -fdump-rtl-ree" } */
> +
> +
> +#include <stdint.h>
> +
> +typedef (*inst_t)(int64_t rdi, int64_t rsi, int64_t rdx);
> +
> +int16_t code[256];
> +inst_t dispatch[256];
> +
> +void an_inst(int64_t rdi, int64_t rsi, int64_t rdx) {
> +  rdx = code[rdx];
> +  uint8_t inst = (uint8_t) rdx;
> +  rdx >>= 8;
> +  dispatch[inst](rdi, rsi, rdx);
> +}
> +
> +int main(void) {
> +  return 0;
> +}
> +
> +/* { dg-final { scan-rtl-dump "copy needed" "ree" } } */
> +/* { dg-final { cleanup-rtl-dump "ree" } } */
> +
Jakub Jelinek Feb. 16, 2015, 9:35 a.m. UTC | #2
On Sun, Feb 15, 2015 at 12:53:39PM -0800, H.J. Lu wrote:
> This is a backport of the patch for PR middle-end/53623 plus all bug
> fixes caused by it.  Tested on Linux/x86-32, Linux/x86-64 and x32.  OK
> for 4.8 branch?

What about PR64286 and PR63659, are you sure those aren't related?
I mean, they are on the 4.9 branch and I don't see why they couldn't affect
the 4.8 backport.

	Jakub
H.J. Lu Feb. 16, 2015, 12:30 p.m. UTC | #3
On Mon, Feb 16, 2015 at 1:35 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Sun, Feb 15, 2015 at 12:53:39PM -0800, H.J. Lu wrote:
>> This is a backport of the patch for PR middle-end/53623 plus all bug
>> fixes caused by it.  Tested on Linux/x86-32, Linux/x86-64 and x32.  OK
>> for 4.8 branch?
>
> What about PR64286 and PR63659, are you sure those aren't related?
> I mean, they are on the 4.9 branch and I don't see why they couldn't affect
> the 4.8 backport.
>
>         Jakub

Fix for PR 63659 has been backported to 4.8 branch.  I will check if
fix for PR 64286 is needed.
H.J. Lu Feb. 16, 2015, 1:15 p.m. UTC | #4
On Mon, Feb 16, 2015 at 4:30 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Feb 16, 2015 at 1:35 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>> On Sun, Feb 15, 2015 at 12:53:39PM -0800, H.J. Lu wrote:
>>> This is a backport of the patch for PR middle-end/53623 plus all bug
>>> fixes caused by it.  Tested on Linux/x86-32, Linux/x86-64 and x32.  OK
>>> for 4.8 branch?
>>
>> What about PR64286 and PR63659, are you sure those aren't related?
>> I mean, they are on the 4.9 branch and I don't see why they couldn't affect
>> the 4.8 backport.
>>
>>         Jakub
>
> Fix for PR 63659 has been backported to 4.8 branch.  I will check if
> fix for PR 64286 is needed.
>
> --
> H.J.

The fix for PR 64286 is an updated fix for PR 59754 which is caused by
the fix for PR 53623.  But the testcase in the fix for PR 64286 doesn't
fail on 4.8 branch + my backport of the fix for PR 53623 on Haswell.
I suggest

1. We go without my current backport and backport the fix for PR 64286
in a separate patch.  Or
2. We go without my backport minus the backport of the PR 59754
fix and backport the fixes for PR 59754 plus PR 64286 in a separate patch
Jakub Jelinek Feb. 16, 2015, 1:18 p.m. UTC | #5
On Mon, Feb 16, 2015 at 05:15:02AM -0800, H.J. Lu wrote:
> On Mon, Feb 16, 2015 at 4:30 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> > On Mon, Feb 16, 2015 at 1:35 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> >> On Sun, Feb 15, 2015 at 12:53:39PM -0800, H.J. Lu wrote:
> >>> This is a backport of the patch for PR middle-end/53623 plus all bug
> >>> fixes caused by it.  Tested on Linux/x86-32, Linux/x86-64 and x32.  OK
> >>> for 4.8 branch?
> >>
> >> What about PR64286 and PR63659, are you sure those aren't related?
> >> I mean, they are on the 4.9 branch and I don't see why they couldn't affect
> >> the 4.8 backport.
> >>
> >>         Jakub
> >
> > Fix for PR 63659 has been backported to 4.8 branch.  I will check if
> > fix for PR 64286 is needed.
> >
> > --
> > H.J.
> 
> The fix for PR 64286 is an updated fix for PR 59754 which is caused by
> the fix for PR 53623.  But the testcase in the fix for PR 64286 doesn't
> fail on 4.8 branch + my backport of the fix for PR 53623 on Haswell.
> I suggest
> 
> 1. We go without my current backport and backport the fix for PR 64286
> in a separate patch.  Or
> 2. We go without my backport minus the backport of the PR 59754
> fix and backport the fixes for PR 59754 plus PR 64286 in a separate patch

I think keeping the branch broken is bad, even if we don't have a testcase
that really fails, pressumably the issue is just latent.
So I'd strongly prefer
3. Add the PR64286 fix to the patch being tested and commit only when it as
whole is tested, as one commit.

	Jakub
H.J. Lu Feb. 16, 2015, 1:24 p.m. UTC | #6
On Mon, Feb 16, 2015 at 5:18 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Feb 16, 2015 at 05:15:02AM -0800, H.J. Lu wrote:
>> On Mon, Feb 16, 2015 at 4:30 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> > On Mon, Feb 16, 2015 at 1:35 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>> >> On Sun, Feb 15, 2015 at 12:53:39PM -0800, H.J. Lu wrote:
>> >>> This is a backport of the patch for PR middle-end/53623 plus all bug
>> >>> fixes caused by it.  Tested on Linux/x86-32, Linux/x86-64 and x32.  OK
>> >>> for 4.8 branch?
>> >>
>> >> What about PR64286 and PR63659, are you sure those aren't related?
>> >> I mean, they are on the 4.9 branch and I don't see why they couldn't affect
>> >> the 4.8 backport.
>> >>
>> >>         Jakub
>> >
>> > Fix for PR 63659 has been backported to 4.8 branch.  I will check if
>> > fix for PR 64286 is needed.
>> >
>> > --
>> > H.J.
>>
>> The fix for PR 64286 is an updated fix for PR 59754 which is caused by
>> the fix for PR 53623.  But the testcase in the fix for PR 64286 doesn't
>> fail on 4.8 branch + my backport of the fix for PR 53623 on Haswell.
>> I suggest
>>
>> 1. We go without my current backport and backport the fix for PR 64286
>> in a separate patch.  Or
>> 2. We go without my backport minus the backport of the PR 59754
>> fix and backport the fixes for PR 59754 plus PR 64286 in a separate patch
>
> I think keeping the branch broken is bad, even if we don't have a testcase
> that really fails, pressumably the issue is just latent.
> So I'd strongly prefer
> 3. Add the PR64286 fix to the patch being tested and commit only when it as
> whole is tested, as one commit.
>

I will do that and restart the testing.

BTW, PR 53623 was a missed optimization bug originally.  Now
it turns out that it fixed a wrong code bug.  We are trying to extract
a run-time testcase from PR 64941 for trunk and branches.

Thanks.
H.J. Lu Feb. 16, 2015, 1:44 p.m. UTC | #7
On Mon, Feb 16, 2015 at 5:24 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Feb 16, 2015 at 5:18 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>> On Mon, Feb 16, 2015 at 05:15:02AM -0800, H.J. Lu wrote:
>>> On Mon, Feb 16, 2015 at 4:30 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> > On Mon, Feb 16, 2015 at 1:35 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>>> >> On Sun, Feb 15, 2015 at 12:53:39PM -0800, H.J. Lu wrote:
>>> >>> This is a backport of the patch for PR middle-end/53623 plus all bug
>>> >>> fixes caused by it.  Tested on Linux/x86-32, Linux/x86-64 and x32.  OK
>>> >>> for 4.8 branch?
>>> >>
>>> >> What about PR64286 and PR63659, are you sure those aren't related?
>>> >> I mean, they are on the 4.9 branch and I don't see why they couldn't affect
>>> >> the 4.8 backport.
>>> >>
>>> >>         Jakub
>>> >
>>> > Fix for PR 63659 has been backported to 4.8 branch.  I will check if
>>> > fix for PR 64286 is needed.
>>> >
>>> > --
>>> > H.J.
>>>
>>> The fix for PR 64286 is an updated fix for PR 59754 which is caused by
>>> the fix for PR 53623.  But the testcase in the fix for PR 64286 doesn't
>>> fail on 4.8 branch + my backport of the fix for PR 53623 on Haswell.
>>> I suggest
>>>
>>> 1. We go without my current backport and backport the fix for PR 64286
>>> in a separate patch.  Or
>>> 2. We go without my backport minus the backport of the PR 59754
>>> fix and backport the fixes for PR 59754 plus PR 64286 in a separate patch
>>
>> I think keeping the branch broken is bad, even if we don't have a testcase
>> that really fails, pressumably the issue is just latent.
>> So I'd strongly prefer
>> 3. Add the PR64286 fix to the patch being tested and commit only when it as
>> whole is tested, as one commit.
>>

4.9 branch backport of  the PR64286 fix caused a regression on ARM64:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64286#c11

Should it be a concern for 4.8 backport?  Should we also backport r215205:

commit b71346c449d2b4a63985a39c4c092ecdfb37b5a0
Author: jiwang <jiwang@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri Sep 12 09:29:16 2014 +0000

    [Ree] Ensure inserted copy don't change the number of hard registers

    2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>

      gcc/
        * ree.c (combine_reaching_defs): Ensure inserted copy don't change the
        number of hard registers.


    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@215205
138bc75d-0d04-0410-961f-82ee72b054a4

to 4.8 and 4.9 branches?

> I will do that and restart the testing.
>
> BTW, PR 53623 was a missed optimization bug originally.  Now
> it turns out that it fixed a wrong code bug.  We are trying to extract
> a run-time testcase from PR 64941 for trunk and branches.
>
> Thanks.
>
> --
> H.J.
Jakub Jelinek Feb. 16, 2015, 1:47 p.m. UTC | #8
On Mon, Feb 16, 2015 at 05:44:45AM -0800, H.J. Lu wrote:
> Should it be a concern for 4.8 backport?  Should we also backport r215205:

Probably.  But, please wait for Jeff Law's approval of all of this before
committing.

> commit b71346c449d2b4a63985a39c4c092ecdfb37b5a0
> Author: jiwang <jiwang@138bc75d-0d04-0410-961f-82ee72b054a4>
> Date:   Fri Sep 12 09:29:16 2014 +0000
> 
>     [Ree] Ensure inserted copy don't change the number of hard registers
> 
>     2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
> 
>       gcc/
>         * ree.c (combine_reaching_defs): Ensure inserted copy don't change the
>         number of hard registers.
> 
> 
>     git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@215205
> 138bc75d-0d04-0410-961f-82ee72b054a4
> 
> to 4.8 and 4.9 branches?

	Jakub
H.J. Lu Feb. 17, 2015, 12:47 p.m. UTC | #9
On Mon, Feb 16, 2015 at 5:24 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Feb 16, 2015 at 5:18 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>> On Mon, Feb 16, 2015 at 05:15:02AM -0800, H.J. Lu wrote:
>>> On Mon, Feb 16, 2015 at 4:30 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> > On Mon, Feb 16, 2015 at 1:35 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>>> >> On Sun, Feb 15, 2015 at 12:53:39PM -0800, H.J. Lu wrote:
>>> >>> This is a backport of the patch for PR middle-end/53623 plus all bug
>>> >>> fixes caused by it.  Tested on Linux/x86-32, Linux/x86-64 and x32.  OK
>>> >>> for 4.8 branch?
>>> >>
>>> >> What about PR64286 and PR63659, are you sure those aren't related?
>>> >> I mean, they are on the 4.9 branch and I don't see why they couldn't affect
>>> >> the 4.8 backport.
>>> >>
>>> >>         Jakub
>>> >
>>> > Fix for PR 63659 has been backported to 4.8 branch.  I will check if
>>> > fix for PR 64286 is needed.
>>> >
>>> > --
>>> > H.J.
>>>
>>> The fix for PR 64286 is an updated fix for PR 59754 which is caused by
>>> the fix for PR 53623.  But the testcase in the fix for PR 64286 doesn't
>>> fail on 4.8 branch + my backport of the fix for PR 53623 on Haswell.
>>> I suggest
>>>
>>> 1. We go without my current backport and backport the fix for PR 64286
>>> in a separate patch.  Or
>>> 2. We go without my backport minus the backport of the PR 59754
>>> fix and backport the fixes for PR 59754 plus PR 64286 in a separate patch
>>
>> I think keeping the branch broken is bad, even if we don't have a testcase
>> that really fails, pressumably the issue is just latent.
>> So I'd strongly prefer
>> 3. Add the PR64286 fix to the patch being tested and commit only when it as
>> whole is tested, as one commit.
>>
>
> I will do that and restart the testing.
>

I tested it on x86-32, x86-64 and x32.  There are no regressions.
diff mbox

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 469ee31..44bf322 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,82 @@ 
+2015-02-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+	Backport from mainline:
+	2014-06-13  Jeff Law  <law@redhat.com>
+
+	PR rtl-optimization/61094
+	PR rtl-optimization/61446
+	* ree.c (combine_reaching_defs): Get the mode for the copy from
+	the extension insn rather than the defining insn.
+
+	2014-06-02  Jeff Law  <law@redhat.com>
+
+	PR rtl-optimization/61094
+	* ree.c (combine_reaching_defs): Do not reextend an insn if it
+	was marked as do_no_reextend.  If a copy is needed to eliminate
+	an extension, then mark it as do_not_reextend.
+
+	2014-02-14  Jeff Law  <law@redhat.com>
+
+	PR rtl-optimization/60131
+	* ree.c (get_extended_src_reg): New function.
+	(combine_reaching_defs): Use it rather than assuming location
+	of REG.
+	(find_and_remove_re): Verify first operand of extension is
+	a REG before adding the insns to the copy list.
+
+	2014-01-17  Jeff Law  <law@redhat.com>
+
+	* ree.c (combine_set_extension): Temporarily disable test for
+	changing number of hard registers.
+
+	2014-01-15  Jeff Law  <law@redhat.com>
+
+	PR tree-optimization/59747
+	* ree.c (find_and_remove_re): Properly handle case where a second
+	eliminated extension requires widening a copy created for elimination
+	of a prior extension.
+	(combine_set_extension): Ensure that the number of hard regs needed
+	for a destination register does not change when we widen it.
+
+	2014-01-10  Jeff Law  <law@redhat.com>
+
+	PR middle-end/59743
+	* ree.c (combine_reaching_defs): Ensure the defining statement
+	occurs before the extension when optimizing extensions with
+	different source and destination hard registers.
+
+	2014-01-10  Jakub Jelinek  <jakub@redhat.com>
+
+	PR rtl-optimization/59754
+	* ree.c (combine_reaching_defs): Disallow !SCALAR_INT_MODE_P
+	modes in the REGNO != REGNO case.
+
+	2014-01-08  Jeff Law  <law@redhat.com>
+
+	* ree.c (get_sub_rtx): New function, extracted from...
+	(merge_def_and_ext): Here.
+	(combine_reaching_defs): Use get_sub_rtx.
+
+	2014-01-07  Jeff Law  <law@redhat.com>
+
+	PR middle-end/53623
+	* ree.c (combine_set_extension): Handle case where source
+	and destination registers in an extension insn are different.
+	(combine_reaching_defs): Allow source and destination
+	registers in extension to be different under limited
+	circumstances.
+	(add_removable_extension): Remove restriction that the
+	source and destination registers in the extension are the
+	same.
+	(find_and_remove_re): Emit a copy from the extension's
+	destination to its source after the defining insn if
+	the source and destination registers are different.
+
+	2013-12-12  Jeff Law  <law@redhat.com>
+
+	* i386.md (simple LEA peephole2): Add missing mode to zero_extend
+	for zero-extended MULT simple LEA pattern.
+
 2015-02-12  Jakub Jelinek  <jakub@redhat.com>
 
 	Backported from mainline
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 372ae63..aabd6ec 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17265,7 +17265,7 @@ 
    && REGNO (operands[0]) == REGNO (operands[1])
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0)
-		   (zero_extend (ashift:SI (match_dup 1) (match_dup 2))))
+		   (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))
 	      (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
 
diff --git a/gcc/ree.c b/gcc/ree.c
index c7e106f..bc566ad 100644
--- a/gcc/ree.c
+++ b/gcc/ree.c
@@ -327,8 +327,30 @@  combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
   enum machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
-  rtx new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
   rtx new_set;
+  rtx cand_pat = PATTERN (cand->insn);
+
+  /* If the extension's source/destination registers are not the same
+     then we need to change the original load to reference the destination
+     of the extension.  Then we need to emit a copy from that destination
+     to the original destination of the load.  */
+  rtx new_reg;
+  bool copy_needed
+    = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
+  if (copy_needed)
+    new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat)));
+  else
+    new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
+
+#if 0
+  /* Rethinking test.  Temporarily disabled.  */
+  /* We're going to be widening the result of DEF_INSN, ensure that doing so
+     doesn't change the number of hard registers needed for the result.  */
+  if (HARD_REGNO_NREGS (REGNO (new_reg), cand->mode)
+      != HARD_REGNO_NREGS (REGNO (SET_DEST (*orig_set)),
+			   GET_MODE (SET_DEST (*orig_set))))
+	return false;
+#endif
 
   /* Merge constants by directly moving the constant into the register under
      some conditions.  Recall that RTL constants are sign-extended.  */
@@ -387,7 +409,8 @@  combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
       if (dump_file)
         {
           fprintf (dump_file,
-		   "Tentatively merged extension with definition:\n");
+		   "Tentatively merged extension with definition %s:\n",
+		   (copy_needed) ? "(copy needed)" : "");
           print_rtl_single (dump_file, curr_insn);
         }
       return true;
@@ -531,6 +554,8 @@  struct ATTRIBUTE_PACKED ext_modified
   /* Kind of modification of the insn.  */
   ENUM_BITFIELD(ext_modified_kind) kind : 2;
 
+  unsigned int do_not_reextend : 1;
+
   /* True if the insn is scheduled to be deleted.  */
   unsigned int deleted : 1;
 };
@@ -614,27 +639,21 @@  make_defs_and_copies_lists (rtx extend_insn, const_rtx set_pat,
   return ret;
 }
 
-/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
-   on the SET pattern.  */
-
-static bool
-merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
+/* If DEF_INSN has single SET expression, possibly buried inside
+   a PARALLEL, return the address of the SET expression, else
+   return NULL.  This is similar to single_set, except that
+   single_set allows multiple SETs when all but one is dead.  */
+static rtx *
+get_sub_rtx (rtx def_insn)
 {
-  enum machine_mode ext_src_mode;
-  enum rtx_code code;
-  rtx *sub_rtx;
-  rtx s_expr;
-  int i;
-
-  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
-  code = GET_CODE (PATTERN (def_insn));
-  sub_rtx = NULL;
+  enum rtx_code code = GET_CODE (PATTERN (def_insn));
+  rtx *sub_rtx = NULL;
 
   if (code == PARALLEL)
     {
-      for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
+      for (int i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
         {
-          s_expr = XVECEXP (PATTERN (def_insn), 0, i);
+          rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
           if (GET_CODE (s_expr) != SET)
             continue;
 
@@ -643,7 +662,7 @@  merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
           else
             {
               /* PARALLEL with multiple SETs.  */
-              return false;
+              return NULL;
             }
         }
     }
@@ -652,10 +671,27 @@  merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
   else
     {
       /* It is not a PARALLEL or a SET, what could it be ? */
-      return false;
+      return NULL;
     }
 
   gcc_assert (sub_rtx != NULL);
+  return sub_rtx;
+}
+
+/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
+   on the SET pattern.  */
+
+static bool
+merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
+{
+  enum machine_mode ext_src_mode;
+  rtx *sub_rtx;
+
+  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
+  sub_rtx = get_sub_rtx (def_insn);
+
+  if (sub_rtx == NULL)
+    return false;
 
   if (REG_P (SET_DEST (*sub_rtx))
       && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
@@ -683,6 +719,18 @@  merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
   return false;
 }
 
+/* Given SRC, which should be one or more extensions of a REG, strip
+   away the extensions and return the REG.  */
+
+static inline rtx
+get_extended_src_reg (rtx src)
+{
+  while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
+    src = XEXP (src, 0);
+  gcc_assert (REG_P (src));
+  return src;
+}
+
 /* This function goes through all reaching defs of the source
    of the candidate for elimination (CAND) and tries to combine
    the extension with the definition instruction.  The changes
@@ -709,6 +757,108 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
   if (!outcome)
     return false;
 
+  /* If the destination operand of the extension is a different
+     register than the source operand, then additional restrictions
+     are needed.  Note we have to handle cases where we have nested
+     extensions in the source operand.  */
+  bool copy_needed
+    = (REGNO (SET_DEST (PATTERN (cand->insn)))
+       != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn)))));
+  if (copy_needed)
+    {
+      /* In theory we could handle more than one reaching def, it
+	 just makes the code to update the insn stream more complex.  */
+      if (state->defs_list.length () != 1)
+	return false;
+
+      /* We require the candidate not already be modified.  It may,
+	 for example have been changed from a (sign_extend (reg))
+	 into (zero_extend (sign_extend (reg))).
+
+	 Handling that case shouldn't be terribly difficult, but the code
+	 here and the code to emit copies would need auditing.  Until
+	 we see a need, this is the safe thing to do.  */
+      if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
+	return false;
+
+      /* Transformation of
+	 (set (reg1) (expression))
+	 (set (reg2) (any_extend (reg1)))
+	 into
+	 (set (reg2) (any_extend (expression)))
+	 (set (reg1) (reg2))
+	 is only valid for scalar integral modes, as it relies on the low
+	 subreg of reg1 to have the value of (expression), which is not true
+	 e.g. for vector modes.  */
+      if (!SCALAR_INT_MODE_P (GET_MODE (SET_DEST (PATTERN (cand->insn)))))
+	return false;
+
+      /* There's only one reaching def.  */
+      rtx def_insn = state->defs_list[0];
+
+      /* The defining statement must not have been modified either.  */
+      if (state->modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE)
+	return false;
+
+      /* The defining statement and candidate insn must be in the same block.
+	 This is merely to keep the test for safety and updating the insn
+	 stream simple.  Also ensure that within the block the candidate
+	 follows the defining insn.  */
+      if (BLOCK_FOR_INSN (cand->insn) != BLOCK_FOR_INSN (def_insn)
+	  || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn))
+	return false;
+
+      /* If there is an overlap between the destination of DEF_INSN and
+	 CAND->insn, then this transformation is not safe.  Note we have
+	 to test in the widened mode.  */
+      rtx *dest_sub_rtx = get_sub_rtx (def_insn);
+      if (dest_sub_rtx == NULL
+	  || !REG_P (SET_DEST (*dest_sub_rtx)))
+	return false;
+
+      rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand->insn))),
+				 REGNO (SET_DEST (*dest_sub_rtx)));
+      if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand->insn))))
+	return false;
+
+      /* The destination register of the extension insn must not be
+	 used or set between the def_insn and cand->insn exclusive.  */
+      if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)),
+			      def_insn, cand->insn)
+	  || reg_set_between_p (SET_DEST (PATTERN (cand->insn)),
+				def_insn, cand->insn))
+	return false;
+
+      /* We must be able to copy between the two registers.   Generate,
+	 recognize and verify constraints of the copy.  Also fail if this
+	 generated more than one insn.
+
+         This generates garbage since we throw away the insn when we're
+	 done, only to recreate it later if this test was successful. 
+
+	 Make sure to get the mode from the extension (cand->insn).  This
+	 is different than in the code to emit the copy as we have not
+	 modified the defining insn yet.  */
+      start_sequence ();
+      rtx pat = PATTERN (cand->insn);
+      rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
+                                 REGNO (XEXP (SET_SRC (pat), 0)));
+      rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
+                                 REGNO (SET_DEST (pat)));
+      emit_move_insn (new_dst, new_src);
+
+      rtx insn = get_insns();
+      end_sequence ();
+      if (NEXT_INSN (insn))
+	return false;
+      if (recog_memoized (insn) == -1)
+	return false;
+      extract_insn (insn);
+      if (!constrain_operands (1))
+	return false;
+    }
+
+
   /* If cand->insn has been already modified, update cand->mode to a wider
      mode if possible, or punt.  */
   if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
@@ -772,11 +922,15 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
             fprintf (dump_file, "All merges were successful.\n");
 
 	  FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
-	    if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
-	      state->modified[INSN_UID (def_insn)].kind
-		= (cand->code == ZERO_EXTEND
-		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT);
+	    {
+	      ext_modified *modified = &state->modified[INSN_UID (def_insn)];
+	      if (modified->kind == EXT_MODIFIED_NONE)
+		modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
+						            : EXT_MODIFIED_SEXT);
 
+	      if (copy_needed)
+		modified->do_not_reextend = 1;
+	    }
           return true;
         }
       else
@@ -825,8 +979,7 @@  add_removable_extension (const_rtx expr, rtx insn,
 
   if (REG_P (dest)
       && (code == SIGN_EXTEND || code == ZERO_EXTEND)
-      && REG_P (XEXP (src, 0))
-      && REGNO (dest) == REGNO (XEXP (src, 0)))
+      && REG_P (XEXP (src, 0)))
     {
       struct df_link *defs, *def;
       ext_cand *cand;
@@ -910,6 +1063,7 @@  find_and_remove_re (void)
   int num_re_opportunities = 0, num_realized = 0, i;
   vec<ext_cand> reinsn_list;
   vec<rtx> reinsn_del_list;
+  vec<rtx> reinsn_copy_list;
   ext_state state;
 
   /* Construct DU chain to get all reaching definitions of each
@@ -921,6 +1075,7 @@  find_and_remove_re (void)
 
   max_insn_uid = get_max_uid ();
   reinsn_del_list.create (0);
+  reinsn_copy_list.create (0);
   reinsn_list = find_removable_extensions ();
   state.defs_list.create (0);
   state.copies_list.create (0);
@@ -947,17 +1102,62 @@  find_and_remove_re (void)
           if (dump_file)
             fprintf (dump_file, "Eliminated the extension.\n");
           num_realized++;
-          reinsn_del_list.safe_push (curr_cand->insn);
+	  /* If the RHS of the current candidate is not (extend (reg)), then
+	     we do not allow the optimization of extensions where
+	     the source and destination registers do not match.  Thus
+	     checking REG_P here is correct.  */
+	  if (REG_P (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))
+	      && (REGNO (SET_DEST (PATTERN (curr_cand->insn)))
+		  != REGNO (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))))
+	    {
+              reinsn_copy_list.safe_push (curr_cand->insn);
+              reinsn_copy_list.safe_push (state.defs_list[0]);
+	    }
+	  reinsn_del_list.safe_push (curr_cand->insn);
 	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
         }
     }
 
+  /* The copy list contains pairs of insns which describe copies we
+     need to insert into the INSN stream.
+
+     The first insn in each pair is the extension insn, from which
+     we derive the source and destination of the copy.
+
+     The second insn in each pair is the memory reference where the
+     extension will ultimately happen.  We emit the new copy
+     immediately after this insn.
+
+     It may first appear that the arguments for the copy are reversed.
+     Remember that the memory reference will be changed to refer to the
+     destination of the extention.  So we're actually emitting a copy
+     from the new destination to the old destination.  */
+  for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
+    {
+      rtx curr_insn = reinsn_copy_list[i];
+      rtx def_insn = reinsn_copy_list[i + 1];
+
+      /* Use the mode of the destination of the defining insn
+	 for the mode of the copy.  This is necessary if the
+	 defining insn was used to eliminate a second extension
+	 that was wider than the first.  */
+      rtx sub_rtx = *get_sub_rtx (def_insn);
+      rtx pat = PATTERN (curr_insn);
+      rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
+				 REGNO (XEXP (SET_SRC (pat), 0)));
+      rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
+				 REGNO (SET_DEST (pat)));
+      rtx set = gen_rtx_SET (VOIDmode, new_dst, new_src);
+      emit_insn_after (set, def_insn);
+    }
+
   /* Delete all useless extensions here in one sweep.  */
   FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
     delete_insn (curr_insn);
 
   reinsn_list.release ();
   reinsn_del_list.release ();
+  reinsn_copy_list.release ();
   state.defs_list.release ();
   state.copies_list.release ();
   state.modified_list.release ();
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ff0934b..a408fed 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,32 @@ 
+2015-02-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+	Backport from mainline:
+	2014-06-02  Jeff Law  <law@redhat.com>
+
+	PR rtl-optimization/61094
+	* g++.dg/pr61094: New test.
+
+	2014-02-14  Jeff Law  <law@redhat.com>
+
+	PR rtl-optimization/60131
+	* g++.dg/torture/pr60131.C: New test.
+
+	2014-01-15  Jeff Law  <law@redhat.com>
+
+	PR tree-optimization/59747
+	* gcc.c-torture/execute/pr59747.c: New test.
+
+	2014-01-10  Jeff Law  <law@redhat.com>
+
+	PR middle-end/59743
+	* gcc.c-torture/compile/pr59743.c: New test.
+
+	Backport from mainline:
+	2014-01-07  Jeff Law  <law@redhat.com>
+
+	PR middle-end/53623
+	* gcc.target/i386/pr53623.c: New test.
+
 2015-02-13  Mikael Morin  <mikael@gcc.gnu.org>
 
 	PR fortran/63744
diff --git a/gcc/testsuite/g++.dg/pr61094.C b/gcc/testsuite/g++.dg/pr61094.C
new file mode 100644
index 0000000..35adc25
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr61094.C
@@ -0,0 +1,31 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" }  */
+
+template <typename> struct A {
+  unsigned _width, _height, _depth, _spectrum;
+  template <typename t> A(t p1) {
+    int a = p1.size();
+    if (a) {
+      _width = p1._width;
+      _depth = _height = _spectrum = p1._spectrum;
+    }
+  }
+  long size() { return (long)_width * _height * _depth * _spectrum; }
+};
+
+int d;
+void fn1(void *);
+A<int> *fn2();
+void fn3() {
+  int b;
+  for (;;) {
+    A<char> c(*fn2());
+    fn1(&c);
+    if (d || !b)
+      throw;
+  }
+}
+
+
+
+
diff --git a/gcc/testsuite/g++.dg/torture/pr60131.C b/gcc/testsuite/g++.dg/torture/pr60131.C
new file mode 100644
index 0000000..23dde31
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr60131.C
@@ -0,0 +1,23 @@ 
+// { dg-do compile }
+struct A { short a; };
+int **b;
+unsigned long c;
+
+bool foo ();
+unsigned bar (unsigned i);
+extern void baz () __attribute__((noreturn));
+
+int *
+test (unsigned x, struct A *y)
+{
+  unsigned v;
+  if (foo () || y[x].a == -1)
+    {
+      c = bar (x);
+      return 0;
+    }
+  v = y[x].a;
+  if (v >= 23)
+    baz ();
+  return b[v];
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr59743.c b/gcc/testsuite/gcc.c-torture/compile/pr59743.c
new file mode 100644
index 0000000..8dadba5
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr59743.c
@@ -0,0 +1,23 @@ 
+/* PR middle-end/59743 */
+
+typedef union {
+  long all;
+  struct {
+    int low;
+    int high;
+  } s;
+} udwords;
+int a, b, c, d;
+void __udivmoddi4() {
+  udwords r;
+  d = __builtin_clz(0);
+  r.s.low = 0;
+  for (; d; --d) {
+    r.s.high = r.s.high << 1 | r.s.low >> a;
+    r.s.low = r.s.low << b >> 1;
+    int s = -r.all;
+    c = s;
+    r.all--;
+  }
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr59747.c b/gcc/testsuite/gcc.c-torture/execute/pr59747.c
new file mode 100644
index 0000000..d45a908
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr59747.c
@@ -0,0 +1,27 @@ 
+extern void abort (void);
+extern void exit (int);
+
+int a[6], b, c = 1, d;
+short e;
+
+int __attribute__ ((noinline))
+fn1 (int p)
+{
+  b = a[p];
+}
+
+int
+main ()
+{
+  if (sizeof (long long) != 8)
+    exit (0);
+
+  a[0] = 1;
+  if (c)
+    e--;
+  d = e;
+  long long f = e;
+  if (fn1 ((f >> 56) & 1) != 0)
+    abort ();
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr53623.c b/gcc/testsuite/gcc.target/i386/pr53623.c
new file mode 100644
index 0000000..35c578b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr53623.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile { target {! ia32 } } } */
+/* { dg-options "-O2 -fdump-rtl-ree" } */
+
+
+#include <stdint.h>
+
+typedef (*inst_t)(int64_t rdi, int64_t rsi, int64_t rdx);
+
+int16_t code[256];
+inst_t dispatch[256];
+
+void an_inst(int64_t rdi, int64_t rsi, int64_t rdx) {
+  rdx = code[rdx];
+  uint8_t inst = (uint8_t) rdx;
+  rdx >>= 8;
+  dispatch[inst](rdi, rsi, rdx);
+}
+
+int main(void) {
+  return 0;
+}
+
+/* { dg-final { scan-rtl-dump "copy needed" "ree" } } */
+/* { dg-final { cleanup-rtl-dump "ree" } } */
+