diff mbox

Backport of patch for PR64688 to gcc-4.9 branch

Message ID 5515AE5E.90409@redhat.com
State New
Headers show

Commit Message

Vladimir Makarov March 27, 2015, 7:24 p.m. UTC
The following patch was ported to gcc-4.9 branch from the mainline.

   The patch was bootstrapped on x86/x86-64.

   Committed as rev. 221743.

2015-03-27  Vladimir Makarov  <vmakarov@redhat.com>

         Backport from mainline
         2015-01-30  Vladimir Makarov <vmakarov@redhat.com>

         PR target/64688
         * lra-constraints.c (original_subreg_reg_mode): New.
         (simplify_operand_subreg): Try to simplify subreg of const.  Use
         original_subreg_reg_mode for it.
         (swap_operands): Update original_subreg_reg_mode.
         (curr_insn_transform): Set up original_subreg_reg_mode.

2015-03-27  Vladimir Makarov  <vmakarov@redhat.com>

         Backport from mainline
         2015-01-30  Vladimir Makarov <vmakarov@redhat.com>

         PR target/64688
         * g++.dg/pr64688-2.C: New.
diff mbox

Patch

Index: lra-constraints.c
===================================================================
--- lra-constraints.c	(revision 220294)
+++ lra-constraints.c	(working copy)
@@ -170,6 +170,10 @@  static basic_block curr_bb;
 static lra_insn_recog_data_t curr_id;
 static struct lra_static_insn_data *curr_static_id;
 static machine_mode curr_operand_mode[MAX_RECOG_OPERANDS];
+/* Mode of the register substituted by its equivalence with VOIDmode
+   (e.g. constant) and whose subreg is given operand of the current
+   insn.  VOIDmode in all other cases.  */
+static machine_mode original_subreg_reg_mode[MAX_RECOG_OPERANDS];
 
 
 
@@ -1382,13 +1386,13 @@  static int valid_address_p (machine_mode
 
 /* Make reloads for subreg in operand NOP with internal subreg mode
    REG_MODE, add new reloads for further processing.  Return true if
-   any reload was generated.  */
+   any change was done.  */
 static bool
 simplify_operand_subreg (int nop, machine_mode reg_mode)
 {
   int hard_regno;
   rtx_insn *before, *after;
-  machine_mode mode;
+  machine_mode mode, innermode;
   rtx reg, new_reg;
   rtx operand = *curr_id->operand_loc[nop];
   enum reg_class regclass;
@@ -1401,6 +1405,7 @@  simplify_operand_subreg (int nop, machin
 
   mode = GET_MODE (operand);
   reg = SUBREG_REG (operand);
+  innermode = GET_MODE (reg);
   type = curr_static_id->operand[nop].type;
   /* If we change address for paradoxical subreg of memory, the
      address might violate the necessary alignment or the access might
@@ -1419,7 +1424,7 @@  simplify_operand_subreg (int nop, machin
       alter_subreg (curr_id->operand_loc[nop], false);
       subst = *curr_id->operand_loc[nop];
       lra_assert (MEM_P (subst));
-      if (! valid_address_p (GET_MODE (reg), XEXP (reg, 0),
+      if (! valid_address_p (innermode, XEXP (reg, 0),
 			     MEM_ADDR_SPACE (reg))
 	  || valid_address_p (GET_MODE (subst), XEXP (subst, 0),
 			      MEM_ADDR_SPACE (subst)))
@@ -1434,6 +1439,20 @@  simplify_operand_subreg (int nop, machin
       alter_subreg (curr_id->operand_loc[nop], false);
       return true;
     }
+  else if (CONSTANT_P (reg))
+    {
+      /* Try to simplify subreg of constant.  It is usually result of
+	 equivalence substitution.  */
+      if (innermode == VOIDmode
+	  && (innermode = original_subreg_reg_mode[nop]) == VOIDmode)
+	innermode = curr_static_id->operand[nop].mode;
+      if ((new_reg = simplify_subreg (mode, reg, innermode,
+				      SUBREG_BYTE (operand))) != NULL_RTX)
+	{
+	  *curr_id->operand_loc[nop] = new_reg;
+	  return true;
+	}
+    }
   /* Put constant into memory when we have mixed modes.  It generates
      a better code in most cases as it does not need a secondary
      reload memory.  It also prevents LRA looping when LRA is using
@@ -1453,9 +1472,9 @@  simplify_operand_subreg (int nop, machin
        && (hard_regno = lra_get_regno_hard_regno (REGNO (reg))) >= 0
        /* Don't reload paradoxical subregs because we could be looping
 	  having repeatedly final regno out of hard regs range.  */
-       && (hard_regno_nregs[hard_regno][GET_MODE (reg)]
+       && (hard_regno_nregs[hard_regno][innermode]
 	   >= hard_regno_nregs[hard_regno][mode])
-       && simplify_subreg_regno (hard_regno, GET_MODE (reg),
+       && simplify_subreg_regno (hard_regno, innermode,
 				 SUBREG_BYTE (operand), mode) < 0
        /* Don't reload subreg for matching reload.  It is actually
 	  valid subreg in LRA.  */
@@ -1481,7 +1500,7 @@  simplify_operand_subreg (int nop, machin
 	  bitmap_set_bit (&lra_subreg_reload_pseudos, REGNO (new_reg));
 
 	  insert_before = (type != OP_OUT
-			   || GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (mode));
+			   || GET_MODE_SIZE (innermode) > GET_MODE_SIZE (mode));
 	  insert_after = (type != OP_IN);
 	  insert_move_for_subreg (insert_before ? &before : NULL,
 				  insert_after ? &after : NULL,
@@ -1524,7 +1543,7 @@  simplify_operand_subreg (int nop, machin
   else if (REG_P (reg)
 	   && REGNO (reg) >= FIRST_PSEUDO_REGISTER
 	   && (hard_regno = lra_get_regno_hard_regno (REGNO (reg))) >= 0
-	   && (hard_regno_nregs[hard_regno][GET_MODE (reg)]
+	   && (hard_regno_nregs[hard_regno][innermode]
 	       < hard_regno_nregs[hard_regno][mode])
 	   && (regclass = lra_get_allocno_class (REGNO (reg)))
 	   && (type != OP_IN
@@ -1542,7 +1561,7 @@  simplify_operand_subreg (int nop, machin
 	  bool insert_before, insert_after;
 
 	  PUT_MODE (new_reg, mode);
-          subreg = simplify_gen_subreg (GET_MODE (reg), new_reg, mode, 0);
+          subreg = simplify_gen_subreg (innermode, new_reg, mode, 0);
 	  bitmap_set_bit (&lra_subreg_reload_pseudos, REGNO (new_reg));
 
 	  insert_before = (type != OP_OUT);
@@ -3286,6 +3305,9 @@  swap_operands (int nop)
   machine_mode mode = curr_operand_mode[nop];
   curr_operand_mode[nop] = curr_operand_mode[nop + 1];
   curr_operand_mode[nop + 1] = mode;
+  mode = original_subreg_reg_mode[nop];
+  original_subreg_reg_mode[nop] = original_subreg_reg_mode[nop + 1];
+  original_subreg_reg_mode[nop + 1] = mode;
   rtx x = *curr_id->operand_loc[nop];
   *curr_id->operand_loc[nop] = *curr_id->operand_loc[nop + 1];
   *curr_id->operand_loc[nop + 1] = x;
@@ -3389,21 +3411,26 @@  curr_insn_transform (bool check_only_p)
 	if (GET_CODE (old) == SUBREG)
 	  old = SUBREG_REG (old);
 	subst = get_equiv_with_elimination (old, curr_insn);
+	original_subreg_reg_mode[i] = VOIDmode;
 	if (subst != old)
 	  {
 	    subst = copy_rtx (subst);
 	    lra_assert (REG_P (old));
-	    if (GET_CODE (op) == SUBREG)
-	      SUBREG_REG (op) = subst;
-	    else
+	    if (GET_CODE (op) != SUBREG)
 	      *curr_id->operand_loc[i] = subst;
+	    else
+	      {
+		SUBREG_REG (op) = subst;
+		if (GET_MODE (subst) == VOIDmode)
+		  original_subreg_reg_mode[i] = GET_MODE (old);
+	      }
 	    if (lra_dump_file != NULL)
 	      {
 		fprintf (lra_dump_file,
 			 "Changing pseudo %d in operand %i of insn %u on equiv ",
 			 REGNO (old), i, INSN_UID (curr_insn));
 		dump_value_slim (lra_dump_file, subst, 1);
-	      fprintf (lra_dump_file, "\n");
+		fprintf (lra_dump_file, "\n");
 	      }
 	    op_change_p = change_p = true;
 	  }
Index: testsuite/g++.dg/pr64688-2.C
===================================================================
--- testsuite/g++.dg/pr64688-2.C	(revision 0)
+++ testsuite/g++.dg/pr64688-2.C	(working copy)
@@ -0,0 +1,136 @@ 
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-std=c++11 -O3 -march=westmere" }
+
+template <int> struct int_ {};
+template <typename> struct add_const { typedef int type; };
+template <typename> struct add_reference { typedef int type; };
+template <typename T> struct next { typedef typename T::next type; };
+template <typename> struct size_impl;
+template <typename T> struct msvc_eti_base : T {};
+template <int N> struct long_ {
+  static const int value = N;
+  typedef long_<N + 1> next;
+};
+template <typename Sequence>
+struct size : msvc_eti_base<typename size_impl<
+typename Sequence::tag>::template apply<Sequence>> {};
+template <typename Base> struct v_item : Base {
+  typedef typename next<typename Base::size>::type size;
+};
+template <typename = int> struct vector0 {
+  typedef int tag;
+  typedef long_<0> size;
+};
+template <> struct size_impl<int> {
+  template <typename Vector> struct apply : Vector::size {};
+};
+template <typename> struct vector3 : v_item<v_item<v_item<vector0<>>>> {};
+template <typename> struct layout { typedef vector3<int> color_space_t; };
+template <typename> struct kth_element_const_reference_type;
+template <typename> struct iterator_adaptor_get_base;
+template <typename, typename, int> struct homogeneous_color_base;
+template <typename> struct element_const_reference_type;
+template <typename Element, typename Layout>
+  struct homogeneous_color_base<Element, Layout, 3> {
+  Element _v0, _v1, _v2;
+  typename element_const_reference_type<homogeneous_color_base>::type
+    at(int_<0>) {
+    return _v0;
+  }
+  typename element_const_reference_type<homogeneous_color_base>::type
+    at(int_<1>) {
+    return _v1;
+  }
+  typename element_const_reference_type<homogeneous_color_base>::type
+    at(int_<2>) {
+    return _v2;
+  }
+};
+template <typename Element, typename Layout, int K1>
+  struct kth_element_const_reference_type<
+  homogeneous_color_base<Element, Layout, K1>>
+  : add_reference<typename add_const<Element>::type> {};
+template <int K, typename E, typename L, int N>
+  typename add_reference<typename add_const<E>::type>::type
+  at_c(homogeneous_color_base<E, L, N> p1) {
+  return p1.at(int_<K>());
+}
+template <typename> class memory_based_step_iterator;
+template <typename> class memory_based_2d_locator;
+template <typename> class image_view;
+template <typename, typename> struct pixel;
+struct iterator_type_from_pixel {
+  typedef pixel<unsigned char, layout<vector3<int>>> *type;
+};
+template <typename XIterator> struct type_from_x_iterator {
+    typedef image_view<
+    memory_based_2d_locator<memory_based_step_iterator<XIterator>>> view_t;
+};
+template <typename>
+struct element_const_reference_type
+: kth_element_const_reference_type<
+homogeneous_color_base<unsigned, layout<int>, 3>> {};
+template <typename, typename>
+  struct pixel : homogeneous_color_base<unsigned char, layout<int>,
+  size<layout<int>::color_space_t>::value> {
+};
+template <typename Iterator>
+struct iterator_adaptor_get_base<memory_based_step_iterator<Iterator>> {
+  typedef Iterator type;
+};
+template <typename> class memory_based_2d_locator {
+ public:
+    typedef iterator_adaptor_get_base<memory_based_step_iterator<
+      pixel<unsigned, layout<vector3<int>>> *>>::type x_iterator;
+};
+template <typename> class image_view {
+ public:
+  typedef memory_based_2d_locator<int>::x_iterator x_iterator;
+  x_iterator row_begin___trans_tmp_2;
+  x_iterator row_begin(int) { return row_begin___trans_tmp_2; }
+};
+template <typename, bool, typename = int> class image {
+ public:
+ typedef type_from_x_iterator<iterator_type_from_pixel::type>::view_t view_t;
+ image(int);
+};
+template <typename Pixel, bool IsPlanar, typename Alloc>
+  typename image<Pixel, 0>::view_t view(image<Pixel, IsPlanar, Alloc>);
+template <typename Op> void measure_time(Op p1) {
+  for (;;)
+    p1();
+}
+template <typename, typename> struct fill_nongil_t;
+template <typename T, typename P>
+  struct fill_nongil_t<
+      image_view<memory_based_2d_locator<
+  memory_based_step_iterator<pixel<T, layout<vector3<int>>> *>>>,
+  P> {
+    typedef image_view<memory_based_2d_locator<
+      memory_based_step_iterator<pixel<T, layout<vector3<int>>> *>>> View;
+    View _v;
+    P _p;
+ fill_nongil_t(View p1, P) : _v(p1) {}
+    void operator()() {
+      T *first = (T *)_v.row_begin(0);
+      T last;
+      while (first != &last) {
+	first[0] = at_c<0>(_p);
+	first[1] = at_c<1>(_p);
+	first[2] = at_c<2>(_p);
+	first += 3;
+      }
+    }
+};
+template <typename, typename> void test_fill(int) {
+  image<int, 0>::view_t __trans_tmp_1;
+  image<int, 0> im(0);
+  __trans_tmp_1 = view(im);
+  measure_time(fill_nongil_t<
+	             image_view<memory_based_2d_locator<memory_based_step_iterator<
+	       pixel<unsigned char, layout<vector3<int>>> *>>>,
+	       pixel<unsigned, int>>(__trans_tmp_1, pixel<unsigned, int>()));
+}
+void performance_testtest_method() {
+  test_fill<image_view<int>, pixel<unsigned, int>>(0);
+}