diff --git a/gcc/ddg.c b/gcc/ddg.c
index 5d0a401..b7c338d 100644
--- a/gcc/ddg.c
+++ b/gcc/ddg.c
@@ -49,6 +49,10 @@ along with GCC; see the file COPYING3.  If not see
 /* A flag indicating that a ddg edge belongs to an SCC or not.  */
 enum edge_flag {NOT_IN_SCC = 0, IN_SCC};
 
+/* A vector of dependencies needed while processing clobbers.  */
+DEF_VEC_P(df_ref);
+DEF_VEC_ALLOC_P(df_ref,heap);
+
 /* Forward declarations.  */
 static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr);
 static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr);
@@ -178,23 +182,45 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
      whose register has multiple defs in the loop.  */
   if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP))
     {
-      rtx set;
+      bool can_delete_dep = true;
+      unsigned regno;
+      reg_set_iterator rsi;
+      regset src_uses, dest_sets, regs;
+
+      /* Register sets from modulo scheduler structures.  */
+      src_uses = extract_from_insn_map (src_node->insn, 0);
+      dest_sets = extract_from_insn_map (dest_node->insn, 1);
+
+      if (!src_uses || !dest_sets)
+	return;
 
-      set = single_set (dest_node->insn);
-      /* TODO: Handle registers that REG_P is not true for them, i.e.
-         subregs and special registers.  */
-      if (set && REG_P (SET_DEST (set)))
+      /* Build regset intersection.  */
+      regs = ALLOC_REG_SET (&reg_obstack);
+      COPY_REG_SET (regs, src_uses);
+      AND_REG_SET (regs, dest_sets);
+
+      EXECUTE_IF_SET_IN_REG_SET (regs, 0, regno, rsi)
         {
-          int regno = REGNO (SET_DEST (set));
           df_ref first_def;
           struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
 
           first_def = df_bb_regno_first_def_find (g->bb, regno);
           gcc_assert (first_def);
 
-          if (bitmap_bit_p (&bb_info->gen, DF_REF_ID (first_def)))
-            return;
+	  /* CC-flags and other hard registers can't be renamed.
+	     Check whether loop kernel has only one def.  */
+          if (HARD_REGISTER_NUM_P (regno)
+	      || !bitmap_bit_p (&bb_info->gen, DF_REF_ID (first_def)))
+	    {
+	      can_delete_dep = false;
+	      break;
+	    }
         }
+
+      FREE_REG_SET (regs);
+
+      if (can_delete_dep)
+	return;
     }
 
    latency = dep_cost (link);
@@ -247,16 +273,20 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to,
 static void
 add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
 {
-  int regno = DF_REF_REGNO (last_def);
+  unsigned int regno = DF_REF_REGNO (last_def);
   struct df_link *r_use;
   int has_use_in_bb_p = false;
-  rtx def_insn = DF_REF_INSN (last_def);
+  rtx insn, def_insn = DF_REF_INSN (last_def);
   ddg_node_ptr last_def_node = get_node_of_insn (g, def_insn);
   ddg_node_ptr use_node;
+  df_ref *def_rec;
+  unsigned int uid;
+  static VEC(df_ref,heap) *all_defs;
 #ifdef ENABLE_CHECKING
   struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
 #endif
   df_ref first_def = df_bb_regno_first_def_find (g->bb, regno);
+  bool first_write = true;
 
   gcc_assert (last_def_node);
   gcc_assert (first_def);
@@ -267,6 +297,31 @@ add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
 			       DF_REF_ID (first_def)));
 #endif
 
+  all_defs = VEC_alloc (df_ref, heap, 0);
+
+  /* Find all defs which are clobbers and the first normal write def.  */
+  FOR_BB_INSNS (g->bb, insn)
+    {
+      if (!INSN_P (insn))
+        continue;
+      uid = INSN_UID (insn);
+      for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+        {
+          df_ref def = *def_rec;
+          if (DF_REF_REGNO (def) == regno)
+            {
+	      bool is_clobber = DF_REF_FLAGS (def) & (DF_REF_MUST_CLOBBER
+						      | DF_REF_MAY_CLOBBER);
+	      if (is_clobber || first_write)
+	        {
+		  VEC_safe_push (df_ref, heap, all_defs, def);
+		  if (!is_clobber)
+		    first_write = false;
+		}
+	    }
+        }
+    }
+
   /* Create inter-loop true dependences and anti dependences.  */
   for (r_use = DF_REF_CHAIN (last_def); r_use != NULL; r_use = r_use->next)
     {
@@ -290,25 +345,32 @@ add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
 	}
       else if (!DEBUG_INSN_P (use_insn))
 	{
+	  unsigned int i;
+	  df_ref curr_def;
 	  /* Add anti deps from last_def's uses in the current iteration
-	     to the first def in the next iteration.  We do not add ANTI
-	     dep when there is an intra-loop TRUE dep in the opposite
-	     direction, but use regmoves to fix such disregarded ANTI
-	     deps when broken.	If the first_def reaches the USE then
-	     there is such a dep.  */
-	  ddg_node_ptr first_def_node = get_node_of_insn (g,
-							  DF_REF_INSN (first_def));
-
-	  gcc_assert (first_def_node);
-
-         /* Always create the edge if the use node is a branch in
-            order to prevent the creation of reg-moves.  */
-          if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
-              || !flag_modulo_sched_allow_regmoves
-              || (flag_modulo_sched_allow_regmoves && JUMP_P (use_node->insn)))
-            create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
-                                    REG_DEP, 1);
-
+	     to the first def and all clobbers in the next iteration.
+	     We do not add ANTI dep when there is an intra-loop TRUE dep
+	     in the opposite direction, but use regmoves to fix such
+	     disregarded ANTI deps when broken.	If the curr_def reaches
+	     the USE then there is such a dep.  */
+	  FOR_EACH_VEC_ELT (df_ref, all_defs, i, curr_def)
+	    {
+	      if (DF_REF_ID (last_def) != DF_REF_ID (curr_def)
+		  /* Some hard regs (for ex. CC-flags) can't be renamed.  */
+                  || HARD_REGISTER_P (DF_REF_REG (last_def))
+		  || !flag_modulo_sched_allow_regmoves
+		  /* Always create the edge if the use node is a branch in
+		     order to prevent the creation of reg-moves.  */
+		  || (flag_modulo_sched_allow_regmoves
+		      && JUMP_P (use_node->insn)))
+		{
+	          ddg_node_ptr curr_def_node = get_node_of_insn (g,
+						DF_REF_INSN (curr_def));
+		  gcc_assert (curr_def_node);
+		  create_ddg_dep_no_link (g, use_node, curr_def_node,
+					  ANTI_DEP, REG_DEP, 1);
+	        }
+	    }
 	}
     }
   /* Create an inter-loop output dependence between LAST_DEF (which is the
@@ -318,18 +380,15 @@ add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
      defs starting with a true dependence to a use which can be in the
      next iteration; followed by an anti dependence of that use to the
      first def (i.e. if there is a use between the two defs.)  */
-  if (!has_use_in_bb_p)
+  if (!has_use_in_bb_p && DF_REF_ID (last_def) != DF_REF_ID (first_def))
     {
-      ddg_node_ptr dest_node;
-
-      if (DF_REF_ID (last_def) == DF_REF_ID (first_def))
-	return;
-
-      dest_node = get_node_of_insn (g, DF_REF_INSN (first_def));
+      ddg_node_ptr dest_node = get_node_of_insn (g, DF_REF_INSN (first_def));
       gcc_assert (dest_node);
       create_ddg_dep_no_link (g, last_def_node, dest_node,
 			      OUTPUT_DEP, REG_DEP, 1);
     }
+
+  VEC_free (df_ref, heap, all_defs);
 }
 /* Build inter-loop dependencies, by looking at DF analysis backwards.  */
 static void
@@ -466,6 +525,8 @@ build_intra_loop_deps (ddg_ptr g)
 
   /* Build the dependence information, using the sched_analyze function.  */
   init_deps_global ();
+  /* Set sms hooks and initialize additional structures.  */
+  sms_sched_analyze_init ();
   init_deps (&tmp_deps, false);
 
   /* Do the intra-block data dependence analysis for the given block.  */
diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
index 948209e..35d2ee4 100644
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 #include "dbgcnt.h"
 #include "df.h"
+#include "pointer-set.h"
 
 #ifdef INSN_SCHEDULING
 
@@ -200,9 +201,10 @@ static void set_node_sched_params (ddg_ptr);
 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
 static void permute_partial_schedule (partial_schedule_ptr, rtx);
 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
-                                    rtx, rtx);
+                                    rtx, bool, bool, rtx, HOST_WIDEST_INT,
+                                    bool, HOST_WIDEST_INT, rtx *);
 static void duplicate_insns_of_cycles (partial_schedule_ptr,
-				       int, int, int, rtx);
+				       int, int, int, rtx, bool);
 static int calculate_stage_count (partial_schedule_ptr, int);
 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
 					   int, int, sbitmap, sbitmap, sbitmap);
@@ -264,7 +266,162 @@ typedef struct node_sched_params
 } *node_sched_params_ptr;
 
 
-/* The following three functions are copied from the current scheduler
+/* Next data structures and callbacks help to store information about
+   using registers in insn.  */
+static struct sched_deps_info_def old_sched_deps_info;
+
+/* Two regsets to store which registers the insn reads and writes.  */
+typedef struct regset_pair_def
+{
+  regset uses;
+  regset sets;
+} regset_pair;
+
+/* Allocate memory for regset_pair structure.  */
+static regset_pair*
+regset_pair_init (void)
+{
+  regset_pair *trs = (regset_pair *)xcalloc (1, sizeof (regset_pair));
+  trs->uses = ALLOC_REG_SET (&reg_obstack);
+  trs->sets = ALLOC_REG_SET (&reg_obstack);
+  return trs;
+}
+
+/* Pointer map is used to find a reg sets for insn.  */
+static struct pointer_map_t *insn_map;
+
+/* Find (or create) regset_pair for INSN in pointer_map.  */
+static regset_pair*
+regset_pair_get (rtx insn)
+{
+  void **slot = pointer_map_contains (insn_map, insn);
+  if (!slot)
+    {
+      slot = pointer_map_insert (insn_map, insn);
+      *slot = regset_pair_init ();
+    }
+  return (regset_pair*)*slot;
+}
+
+/* Callback for pointer_map_traverse to free memory used by regset_pair.  */
+static bool
+destroy_regset_pair (const void *key ATTRIBUTE_UNUSED, void **slot,
+		   void *data ATTRIBUTE_UNUSED)
+{
+  regset_pair *trs = (regset_pair*)*slot;
+  FREE_REG_SET (trs->uses);
+  FREE_REG_SET (trs->sets);
+  free (trs);
+  return true;
+}
+
+/* SMS sched_analyze hooks.  Every of them calls original hook first.  */
+static rtx curr_insn = NULL_RTX;
+
+static void
+sms_start_insn (rtx insn)
+{
+  old_sched_deps_info.start_insn (insn);
+
+  gcc_assert (insn && !curr_insn);
+  curr_insn = insn;
+  if (dump_file)
+    {
+      fprintf (dump_file, "sms analyze: start insn:\n");
+      print_rtl_single (dump_file, curr_insn);
+    }
+}
+
+static void
+sms_finish_insn (void)
+{
+  old_sched_deps_info.finish_insn ();
+
+  gcc_assert (curr_insn);
+  curr_insn = NULL_RTX;
+  if (dump_file)
+    fprintf (dump_file, "sms analyze: finished insn\n");
+}
+
+static void
+sms_note_reg_set (int regno)
+{
+  regset_pair *trs;
+  old_sched_deps_info.note_reg_set (regno);
+
+  gcc_assert (curr_insn);
+  trs = regset_pair_get (curr_insn);
+  SET_REGNO_REG_SET (trs->sets, regno);
+
+  if (dump_file)
+    fprintf (dump_file, "sms analyze: reg set %d\n", regno);
+}
+
+static void
+sms_note_reg_clobber (int regno)
+{
+  regset_pair *trs;
+  old_sched_deps_info.note_reg_clobber (regno);
+
+  gcc_assert (curr_insn);
+  trs = regset_pair_get (curr_insn);
+  SET_REGNO_REG_SET (trs->sets, regno);
+
+  if (dump_file)
+    fprintf (dump_file, "sms analyze: reg clobber %d\n", regno);
+}
+
+static void
+sms_note_reg_use (int regno)
+{
+  regset_pair *trs;
+  old_sched_deps_info.note_reg_use (regno);
+
+  gcc_assert (curr_insn);
+  trs = regset_pair_get (curr_insn);
+  SET_REGNO_REG_SET (trs->uses, regno);
+
+  if (dump_file)
+    fprintf (dump_file, "sms analyze: reg use %d\n", regno);
+}
+
+/* Extract the saved data about register usage.  Bool SETS is true,
+   when we need the set of written regs.  */
+regset
+extract_from_insn_map (rtx insn, bool sets)
+{
+  void **slot = pointer_map_contains (insn_map, insn);
+  regset_pair *trs;
+  if (!slot)
+    return NULL;
+  trs = (regset_pair*)*slot;
+  return sets ? trs->sets : trs->uses;
+}
+
+/* Setup SMS hooks.  Initialize pointer_map.  */
+void
+sms_sched_analyze_init (void)
+{
+  memcpy (&old_sched_deps_info, sched_deps_info,
+	  sizeof (struct sched_deps_info_def));
+  sched_deps_info->start_insn = sms_start_insn;
+  sched_deps_info->finish_insn = sms_finish_insn;
+  sched_deps_info->note_reg_set = sms_note_reg_set;
+  sched_deps_info->note_reg_clobber = sms_note_reg_clobber;
+  sched_deps_info->note_reg_use = sms_note_reg_use;
+  insn_map = pointer_map_create ();
+}
+
+/* Free pointer_map with freeing internal structures first.  */
+static void
+sms_create_ddg_finish (void)
+{
+  pointer_map_traverse (insn_map, destroy_regset_pair, NULL);
+  pointer_map_destroy (insn_map);
+}
+
+
+/* The following two functions are copied from the current scheduler
    code in order to use sched_analyze() for computing the dependencies.
    They are used when initializing the sched_info structure.  */
 static const char *
@@ -287,9 +444,20 @@ static struct common_sched_info_def sms_common_sched_info;
 static struct sched_deps_info_def sms_sched_deps_info =
   {
     compute_jump_reg_dependencies,
-    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-    NULL,
-    0, 0, 0
+    sms_start_insn,
+    sms_finish_insn,
+    NULL, /* start_lhs */
+    NULL, /* finish_lhs */
+    NULL, /* start_rhs */
+    NULL, /* finish_rhs */
+    sms_note_reg_set,
+    sms_note_reg_clobber,
+    sms_note_reg_use,
+    NULL, /* note_mem_dep */
+    NULL, /* note_dep */
+    0, /* use_cselib */
+    0, /* use_deps_list */
+    0 /* generate_spec_deps */
   };
 
 static struct haifa_sched_info sms_sched_info =
@@ -311,6 +479,7 @@ static struct haifa_sched_info sms_sched_info =
   0
 };
 
+
 /* Given HEAD and TAIL which are the first and last insns in a loop;
    return the register which controls the loop.  Return zero if it has
    more than one occurrence in the loop besides the control part or the
@@ -364,37 +533,164 @@ doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
 #endif
 }
 
-/* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
-   that the number of iterations is a compile-time constant.  If so,
-   return the rtx that sets COUNT_REG to a constant, and set COUNT to
-   this constant.  Otherwise return 0.  */
+/* Same as previous for loop with always-the-same-step counter.  */
+static rtx
+nondoloop_register_get (rtx head, rtx tail, int cmp_side,
+			rtx *addsub_output, rtx *cmp_output)
+{
+  rtx insn, reg, flagreg, addsub, cmp, end;
+
+  /* Check jump instruction form */
+  insn = single_set (tail);
+  if (insn == NULL_RTX
+      || SET_DEST (insn) != pc_rtx
+      || GET_CODE (SET_SRC (insn)) != IF_THEN_ELSE
+      || GET_CODE (XEXP (SET_SRC (insn), 1)) != LABEL_REF
+      || XEXP (SET_SRC (insn), 2) != pc_rtx)
+    return NULL_RTX;
+
+  /* Check loop exit condition */
+  insn = XEXP (SET_SRC (insn), 0);
+  if (GET_CODE (insn) != NE || XEXP (insn, 1) != const0_rtx)
+    return NULL_RTX;
+
+  /* Flags register */
+  flagreg = XEXP (insn, 0);
+
+  /* Searching comparison instruction */
+  cmp = PREV_INSN (tail);
+  while (cmp != PREV_INSN (head))
+    {
+      if (INSN_P (cmp) && reg_set_p (flagreg, cmp))
+        break;
+      cmp = PREV_INSN (cmp);
+    }
+  if (cmp == PREV_INSN (head))
+    return NULL_RTX;
+
+  /* Check comparison */
+  insn = single_set (cmp);
+  if (insn == NULL_RTX
+      || ! rtx_equal_p (flagreg, SET_DEST (insn))
+      || GET_CODE (SET_SRC (insn)) != COMPARE)
+    return NULL_RTX;
+
+  /* Loop register */
+  gcc_assert (0 <= cmp_side && cmp_side <= 1);
+  reg = XEXP (SET_SRC (insn), cmp_side);
+  if (! REG_P (reg))
+    return NULL_RTX;
+
+  /* End value */
+  end = XEXP (SET_SRC (insn), 1 - cmp_side);
+  if (! REG_P (end) && ! CONST_INT_P (end))
+    return NULL_RTX;
+
+  /* Searching register add\sub instruction */
+  addsub = PREV_INSN (cmp);
+  while (addsub != PREV_INSN (head))
+    {
+      if (INSN_P (addsub) && reg_set_p (reg, addsub))
+        break;
+      addsub = PREV_INSN (addsub);
+    }
+  if (addsub == PREV_INSN (head))
+    return NULL_RTX;
+
+  /* Checking register change instruction */
+  insn = single_set (addsub);
+  if (insn == NULL_RTX || ! rtx_equal_p (reg, SET_DEST (insn)))
+    return NULL_RTX;
+  insn = SET_SRC (insn);
+  if ((GET_CODE (insn) != PLUS && GET_CODE (insn) != MINUS)
+      || ! rtx_equal_p (reg, XEXP (insn, 0))
+      || ! (CONST_INT_P (XEXP (insn, 1))))
+    return NULL_RTX;
+
+  /* No other REG and END (if reg) modifications allowed */
+  for (insn = head; insn != tail; insn = NEXT_INSN (insn))
+    {
+      if (REG_P(end) && reg_set_p (end, insn))
+        {
+          if (dump_file)
+          {
+            fprintf (dump_file, "SMS end register found ");
+            print_rtl_single (dump_file, reg);
+            fprintf (dump_file, " outside write in insn:\n");
+            print_rtl_single (dump_file, insn);
+          }
+	  return NULL_RTX;
+	}
+      if (insn != addsub && reg_set_p (reg, insn))
+        {
+          if (dump_file)
+          {
+            fprintf (dump_file, "SMS count_reg found ");
+            print_rtl_single (dump_file, reg);
+            fprintf (dump_file, " outside write in insn:\n");
+            print_rtl_single (dump_file, insn);
+          }
+          return NULL_RTX;
+        }
+    }
+
+  *addsub_output = addsub;
+  *cmp_output = cmp;
+  return reg;
+}
+
+/* Check if REG is set to a constant in the PRE_HEADER block.
+   If possible to find, return the rtx that sets REG.
+   If REG is set to a constant (probably not directly),
+   set IS_CONST to true and VALUE to that constant value.  */
 static rtx
-const_iteration_count (rtx count_reg, basic_block pre_header,
-		       HOST_WIDEST_INT * count)
+search_const_init (basic_block pre_header, rtx reg, bool *is_const,
+		   HOST_WIDEST_INT *value)
 {
   rtx insn;
   rtx head, tail;
 
-  if (! pre_header)
-    return NULL_RTX;
+  if (!pre_header)
+    {
+      *is_const = false;
+      return NULL_RTX;
+    }
 
   get_ebb_head_tail (pre_header, pre_header, &head, &tail);
 
   for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
     if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
-	rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
+	rtx_equal_p (reg, SET_DEST (single_set (insn))))
       {
-	rtx pat = single_set (insn);
+	rtx src, pat = single_set (insn);
+	src = SET_SRC (pat);
 
-	if (CONST_INT_P (SET_SRC (pat)))
+	if (CONST_INT_P (src))
 	  {
-	    *count = INTVAL (SET_SRC (pat));
-	    return insn;
+	    *is_const = true;
+	    *value = INTVAL (src);
 	  }
+	else if (REG_P (src))
+	  { /* Check if previous insn sets SRC = constant.  */
+	    pat = single_set (PREV_INSN (insn));
+	    if (pat != NULL_RTX && rtx_equal_p (src, SET_DEST (pat))
+		&& CONST_INT_P (SET_SRC (pat)))
+	      {
+		*is_const = true;
+		*value = INTVAL (SET_SRC (pat));
+	      }
+	    else
+		*is_const = false;
+	  }
+	else
+	  *is_const = false;
 
-	return NULL_RTX;
+	return insn;
       }
+    else if (reg_set_p (reg, insn))
+      break;
 
+  *is_const = false;
   return NULL_RTX;
 }
 
@@ -1008,7 +1304,8 @@ clear:
 
 static void
 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
-			   int to_stage, int for_prolog, rtx count_reg)
+			   int to_stage, int for_prolog, rtx count_reg,
+			   bool doloop_p)
 {
   int row;
   ps_insn_ptr ps_ij;
@@ -1020,14 +1317,14 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
 	int i_reg_moves;
 	rtx reg_move = NULL_RTX;
 
-        /* Do not duplicate any insn which refers to count_reg as it
-           belongs to the control part.
+        /* In doloop case do not duplicate any insn which refers
+	   to count_reg as it belongs to the control part.
            The closing branch is scheduled as well and thus should
            be ignored.
            TODO: This should be done by analyzing the control part of
            the loop.  */
-        if (reg_mentioned_p (count_reg, u_node->insn)
-            || JUMP_P (ps_ij->node->insn))
+        if ((doloop_p && reg_mentioned_p (count_reg, u_node->insn))
+            || JUMP_P (u_node->insn))
           continue;
 
 	if (for_prolog)
@@ -1104,7 +1401,10 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
 /* Generate the instructions (including reg_moves) for prolog & epilog.  */
 static void
 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
-                        rtx count_reg, rtx count_init)
+                        rtx count_reg, bool doloop_p, bool count_init_isconst,
+			rtx fin_reg, HOST_WIDEST_INT fin_nonconst_adjust,
+			bool create_reg, HOST_WIDEST_INT reg_val,
+			rtx *created_reg)
 {
   int i;
   int last_stage = PS_STAGE_COUNT (ps) - 1;
@@ -1113,12 +1413,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
   /* Generate the prolog, inserting its insns on the loop-entry edge.  */
   start_sequence ();
 
-  if (!count_init)
+  if (doloop_p && !count_init_isconst)
     {
-      /* Generate instructions at the beginning of the prolog to
-         adjust the loop count by STAGE_COUNT.  If loop count is constant
-         (count_init), this constant is adjusted by STAGE_COUNT in
-         generate_prolog_epilog function.  */
+      /* In doloop we generate instructions at the beginning of the prolog to
+         adjust the initial value of doloop counter by STAGE_COUNT.
+	 If loop count is constant, this adjustment is done outside this
+         function, simply correcting the source of initialization insn.  */
       rtx sub_reg = NULL_RTX;
 
       sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
@@ -1129,8 +1429,40 @@ generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
         emit_move_insn (count_reg, sub_reg);
     }
 
+  if (!doloop_p)
+    {
+      /* In non-doloop we generate instructions at the beginning of
+         the prolog to adjust the final value (with this value loop count
+	 register is compared to check whether the loop should stop).  */
+      if (fin_nonconst_adjust != 0)
+	{
+	  /* If the final value is in a register - create another register
+	     to store a shifted value.  */
+	  rtx new_reg, reg = NULL_RTX;
+	  reg = gen_reg_rtx (GET_MODE (fin_reg));
+	  new_reg = expand_simple_binop (GET_MODE (fin_reg), MINUS, fin_reg,
+					 GEN_INT (fin_nonconst_adjust),
+					 reg, 0, OPTAB_DIRECT);
+	  gcc_assert (REG_P (new_reg));
+	  if (REGNO (new_reg) != REGNO (reg))
+	    emit_move_insn (reg, new_reg);
+	  *created_reg = new_reg;
+	}
+      else if (create_reg)
+	{
+	  /* If old final value is an immediate, and the new one can't be
+	     an immediate, we create a register to store it.  If both values
+	     are immediate the adjustment is done outside this fuction,
+	     just correcting the constant value in compare intruction.  */
+	  rtx reg = NULL_RTX;
+	  reg = gen_reg_rtx (GET_MODE (count_reg));
+	  emit_move_insn (reg, GEN_INT (reg_val));
+	  *created_reg = reg;
+	}
+    }
+
   for (i = 0; i < last_stage; i++)
-    duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
+    duplicate_insns_of_cycles (ps, 0, i, 1, count_reg, doloop_p);
 
   /* Put the prolog on the entry edge.  */
   e = loop_preheader_edge (loop);
@@ -1142,7 +1474,7 @@ generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
   start_sequence ();
 
   for (i = 0; i < last_stage; i++)
-    duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
+    duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg, doloop_p);
 
   /* Put the epilogue on the exit edge.  */
   gcc_assert (single_exit (loop));
@@ -1422,13 +1754,30 @@ sms_schedule (void)
           continue;
         }
 
-      /* Make sure this is a doloop.  */
-      if ( !(count_reg = doloop_register_get (head, tail)))
-      {
-        if (dump_file)
-          fprintf (dump_file, "SMS doloop_register_get failed\n");
-	continue;
-      }
+      /* Is this a doloop?  */
+      if ((count_reg = doloop_register_get (head, tail)))
+        {
+	  if (dump_file)
+	    fprintf (dump_file, "SMS doloop\n");
+        }
+      else if ((count_reg = nondoloop_register_get (head, tail, 0,
+						    &insn, &insn)))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "SMS non-doloop\n");
+	}
+      else if ((count_reg = nondoloop_register_get (head, tail, 1,
+						    &insn, &insn)))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "SMS non-doloop with transposed cmp\n");
+	}
+      else
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "SMS imcompatible loop\n");
+	  continue;
+	}
 
       /* Don't handle BBs with calls or barriers, or !single_set insns
 	  with the exception of instructions that include count_reg---these
@@ -1477,7 +1826,7 @@ sms_schedule (void)
 	    fprintf (dump_file, "SMS create_ddg failed\n");
 	  continue;
         }
-
+      sms_create_ddg_finish ();
       g_arr[loop->num] = g;
       if (dump_file)
         fprintf (dump_file, "...OK\n");
@@ -1489,15 +1838,27 @@ sms_schedule (void)
     fprintf (dump_file, "=========================\n\n");
   }
 
+  df_clear_flags (DF_LR_RUN_DCE);
+
   /* We don't want to perform SMS on new loops - created by versioning.  */
   FOR_EACH_LOOP (li, loop, 0)
     {
+      bool doloop_p, count_fin_isconst, count_init_isconst;
+      bool was_immediate = false;
+      bool prolog_create_reg = false;
+      int prolog_fin_nonconst_adjust = 0;
+      bool nonsimple_loop = false;
       rtx head, tail;
-      rtx count_reg, count_init;
-      int mii, rec_mii;
-      unsigned stage_count = 0;
-      HOST_WIDEST_INT loop_count = 0;
       bool opt_sc_p = false;
+      rtx count_reg, count_fin_reg, new_comp_reg = NULL_RTX;
+      rtx count_init_insn, count_fin_init_insn;
+      rtx add, cmp;
+      int mii, rec_mii, cmp_side = -1;
+      int stage_count = 0;
+      HOST_WIDEST_INT count_init_val = 0, count_fin_val = 0;
+      HOST_WIDEST_INT count_step = 0, loop_count = -1;
+      HOST_WIDEST_INT count_fin_newval = 0;
+      struct niter_desc *desc = NULL;
 
       if (! (g = g_arr[loop->num]))
         continue;
@@ -1535,32 +1896,159 @@ sms_schedule (void)
 	               (HOST_WIDEST_INT) profile_info->sum_max);
 	      fprintf (dump_file, "\n");
 	    }
-	  fprintf (dump_file, "SMS doloop\n");
 	  fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
           fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
           fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
 	}
 
 
-      /* In case of th loop have doloop register it gets special
-	 handling.  */
-      count_init = NULL_RTX;
-      if ((count_reg = doloop_register_get (head, tail)))
+      /* Extract count register and determine loop type.  */
+      add = NULL_RTX;
+      cmp = NULL_RTX;
+      if ((count_reg = doloop_register_get (head, tail))
+	  || (count_reg = nondoloop_register_get (head, tail, 0, &add, &cmp))
+	  || (count_reg = nondoloop_register_get (head, tail, 1, &add, &cmp)))
 	{
-	  basic_block pre_header;
+	  basic_block pre_header = loop_preheader_edge (loop)->src;
+
+	  doloop_p = (cmp == NULL_RTX);
+	  if (doloop_p)
+	    {
+	      /* Doloop finish parameters are always the same.  */
+	      count_step = -1;
+	      count_fin_isconst = true;
+	      count_fin_val = 0;
+	      count_fin_reg = NULL_RTX;
+	      count_fin_init_insn = NULL_RTX;
+	    }
+	  else
+	    {
+	      /* In other loop we need to determine counter step
+	         and finish parameters.  */
+	      rtx step, end;
+
+	      gcc_assert (single_set (add) && single_set (cmp));
+
+	      /* Extract the step.  */
+	      step = XEXP (SET_SRC (single_set (add)), 1);
+	      gcc_assert (CONST_INT_P (step));
+
+	      if (GET_CODE (SET_SRC (single_set (add))) == MINUS)
+	        count_step = - INTVAL (step);
+	      else if (GET_CODE (SET_SRC (single_set (add))) == PLUS)
+	        count_step = INTVAL (step);
+	      else
+		gcc_unreachable ();
 
-	  pre_header = loop_preheader_edge (loop)->src;
-	  count_init = const_iteration_count (count_reg, pre_header,
-					      &loop_count);
+	      gcc_assert(count_step != 0);
+
+	      /* Check what operand of compare insn is a counter register.  */
+	      if (count_reg == XEXP (SET_SRC (single_set (cmp)), 0))
+		cmp_side = 0;
+	      else if (count_reg == XEXP (SET_SRC (single_set (cmp)), 1))
+		cmp_side = 1;
+	      else
+		gcc_unreachable ();
+
+	      /* Extract finish border for counter reg.  */
+	      end = XEXP (SET_SRC (single_set (cmp)), 1 - cmp_side);
+
+	      if (CONST_INT_P (end))
+		{
+		  /* Constant finish border.  loop until (reg != const).  */
+		  count_fin_isconst = true;
+		  count_fin_val = INTVAL (end);
+		  count_fin_reg = NULL_RTX;
+		  count_fin_init_insn = NULL_RTX;
+		}
+	      else if (REG_P (end))
+		{
+		  /* Register is a border.  Loop until (reg != fin_reg).  */
+		  count_fin_reg = end;
+		  count_fin_isconst = false;
+		  /* Try to find constant initinalization of fin_reg
+		   * in preheader.  */
+		  count_fin_init_insn = search_const_init (pre_header,
+							   count_fin_reg,
+							   &count_fin_isconst,
+							   &count_fin_val);
+		}
+	      else
+		gcc_unreachable ();
+	    }
+	  /* Try to find a constant initalization of count_reg in preheader.  */
+	  count_init_insn = search_const_init (pre_header,
+					       count_reg,
+					       &count_init_isconst,
+					       &count_init_val);
+	}
+      else /* Loop is incompatible now, but it was OK on while analyzing!  */
+	gcc_assert (count_reg);
+
+
+      desc = get_simple_loop_desc (loop);
+      gcc_assert (desc);
+      /* nonsimple_loop means it's impossible to analyze the loop
+         or there are some assumptions to make the analyzis results right
+         or there is a condition of non-infinite number of iterations.
+        We want doloops to be scheduled even if analyzis shows they are
+	 nonsimple (backward compatibility).  */
+      nonsimple_loop = !desc->simple_p;
+      /* We allow scheduling loop with some assumptions or infinite condition
+	 only when unsafe_loop_optimizations flag is enabled.  */
+      if (flag_unsafe_loop_optimizations)
+	 {
+	   desc->infinite = NULL_RTX;
+	   desc->assumptions = NULL_RTX;
+	   desc->noloop_assumptions = NULL_RTX;
+	 }
+      nonsimple_loop = nonsimple_loop || (desc->assumptions != NULL_RTX)
+			|| (desc->noloop_assumptions != NULL_RTX)
+			|| (desc->infinite != NULL_RTX);
+      /* Only doloops can be nonsimple_loops for SMS.  */
+      if (nonsimple_loop && !doloop_p)
+	{
+	  free_ddg (g);
+	  continue;
+	}
+      /* Manually set some description fields in non-simple doloop.  */
+      if (nonsimple_loop)
+	{
+	  gcc_assert(doloop_p);
+	  desc->const_iter = false;
+	  desc->infinite = NULL_RTX;
 	}
-      gcc_assert (count_reg);
 
-      if (dump_file && count_init)
+      if (desc->const_iter)
+	{
+	  gcc_assert (!desc->infinite);
+	  loop_count = desc->niter;
+	  if (dump_file)
+	    fprintf (dump_file, "SMS const loop iterations = "
+		     HOST_WIDEST_INT_PRINT_DEC "\n", loop_count);
+	}
+      if (count_init_isconst && count_fin_isconst)
         {
-          fprintf (dump_file, "SMS const-doloop ");
-          fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
-		     loop_count);
-          fprintf (dump_file, "\n");
+	  gcc_assert (doloop_p || desc->const_iter);
+	  if (doloop_p)
+	    {
+	      if (nonsimple_loop)
+		{
+	          loop_count = count_init_val;
+		  desc->const_iter = true;
+		}
+              gcc_assert (desc->const_iter && loop_count == count_init_val);
+	    }
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, "SMS const-%s ",
+		       doloop_p ? "doloop" : "loop");
+	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC " to "
+		       HOST_WIDEST_INT_PRINT_DEC " step "
+		       HOST_WIDEST_INT_PRINT_DEC,
+		       count_init_val, count_fin_val, count_step);
+	      fprintf (dump_file, "\n");
+	    }
         }
 
       node_order = XNEWVEC (int, g->num_nodes);
@@ -1608,8 +2096,8 @@ sms_schedule (void)
       /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
 	 1 means that there is no interleaving between iterations thus
 	 we let the scheduling passes do the job in this case.  */
-      if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
-	  || (count_init && (loop_count <= stage_count))
+      if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
+	  || (desc->const_iter && (loop_count <= stage_count))
 	  || (flag_branch_probabilities && (trip_count <= stage_count)))
 	{
 	  if (dump_file)
@@ -1625,8 +2113,10 @@ sms_schedule (void)
       else
 	{
 	  struct undo_replace_buff_elem *reg_move_replaces;
+	  int row, cmp_stage = -1;
+	  ps_insn_ptr crr_insn;
 
-          if (!opt_sc_p)
+	  if (!opt_sc_p)
             {
 	      /* Rotate the partial schedule to have the branch in row ii-1.  */
               int amount = SCHED_TIME (g->closing_branch) + 1;
@@ -1647,23 +2137,24 @@ sms_schedule (void)
 	      print_partial_schedule (ps, dump_file);
 	    }
  
-          /* case the BCT count is not known , Do loop-versioning */
-	  if (count_reg && ! count_init)
-            {
-	      rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
-	  				     GEN_INT(stage_count));
-	      unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
-			       * REG_BR_PROB_BASE) / 100;
-
-	      loop_version (loop, comp_rtx, &condition_bb,
-	  		    prob, prob, REG_BR_PROB_BASE - prob,
-			    true);
-	     }
+	  if (!desc->const_iter)
+	    {
+	      /* Loop versioning if the number of iterations is unknown.  */
+	      unsigned prob;
+	      rtx vers_cond;
+	      vers_cond = gen_rtx_fmt_ee (GT, VOIDmode, nonsimple_loop ?
+					  count_reg : desc->niter_expr,
+					  GEN_INT (stage_count));
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "\nLoop versioning condition:\n");
+		  print_rtl_single (dump_file, vers_cond);
+		}
 
-	  /* Set new iteration count of loop kernel.  */
-          if (count_reg && count_init)
-	    SET_SRC (single_set (count_init)) = GEN_INT (loop_count
-						     - stage_count + 1);
+	      prob = (PROB_SMS_ENOUGH_ITERATIONS * REG_BR_PROB_BASE) / 100;
+	      loop_version (loop, vers_cond, &condition_bb, prob,
+			    prob, REG_BR_PROB_BASE - prob, true);
+	    }
 
 	  /* Now apply the scheduled kernel to the RTL of the loop.  */
 	  permute_partial_schedule (ps, g->closing_branch->first_note);
@@ -1678,8 +2169,116 @@ sms_schedule (void)
 	  reg_move_replaces = generate_reg_moves (ps, true);
 	  if (dump_file)
 	    print_node_sched_params (dump_file, g->num_nodes, g);
-	  /* Generate prolog and epilog.  */
-          generate_prolog_epilog (ps, loop, count_reg, count_init);
+
+	  if (doloop_p && count_init_isconst)
+	    {
+	      /* Change counter reg initialization constant. In more complex
+	         cases this adjustment is done with adding some insns
+		 to loop prologue in generate_prolog_epilog function.  */
+	      gcc_assert (single_set (count_init_insn) != NULL_RTX);
+	      SET_SRC (single_set (count_init_insn))
+		    = GEN_INT (count_init_val - stage_count + 1);
+	      df_insn_rescan (count_init_insn);
+	    }
+
+	  if (!doloop_p)
+	    {
+	      /* Calculation of the compare insn stage in schedule.  */
+	      for (row = 0; row < ps->ii; row++)
+		for (crr_insn = ps->rows[row];
+		     crr_insn;
+		     crr_insn = crr_insn->next_in_row)
+		  {
+		    gcc_assert (0 <= SCHED_STAGE (crr_insn->node));
+		    gcc_assert (SCHED_STAGE (crr_insn->node) < stage_count);
+		    if (rtx_equal_p (crr_insn->node->insn, cmp))
+		      {
+			gcc_assert (cmp_stage == -1);
+		        cmp_stage = SCHED_STAGE (crr_insn->node);
+		      }
+		  }
+              if (dump_file)
+		fprintf (dump_file, "cmp_stage=%d\n", cmp_stage);
+	      gcc_assert (cmp_stage >= 0);
+	    }
+
+	  /* When compare insn stage is non-zero we are to shift the final
+	     counter reg value (which counter is compared to exit loop).
+	     Final value can be an immediate or can be a register, which
+	     constant initialization we find in preheader.  */
+	  was_immediate = false;
+	  if (!doloop_p && count_fin_isconst && cmp_stage > 0)
+	    {
+              gcc_assert (0 <= cmp_side && cmp_side <= 1);
+	      /* New finish value.  */
+	      count_fin_newval = count_fin_val - count_step * cmp_stage;
+	      was_immediate = CONST_INT_P (XEXP (SET_SRC (single_set (cmp)),
+							  1 - cmp_side));
+	      if (was_immediate)
+		{
+		  /* Check whether new value also can be an immediate.
+		     For exapmle, on ARM not all values can be encoded as
+		     an immediate, so we have to load it to a register once
+		     before the loop starts.  */
+		  rtx to = GEN_INT (count_fin_newval);
+		  prolog_create_reg = rtx_cost (to, GET_CODE (to), false)
+				    > rtx_cost (GEN_INT(1), CONST_INT, false);
+	        }
+	      else
+		{
+		  /* A value is already in a register and we easily change
+		     initialization instruction in preheader.  */
+		  gcc_assert (count_fin_init_insn);
+		  SET_SRC (single_set (count_fin_init_insn))
+			= GEN_INT (count_fin_newval);
+		  df_insn_rescan (count_fin_init_insn);
+		}
+	    }
+
+	  /* The adjustment of finish register value.
+	     Zero means no adjustment needed or adjusment is done
+	     without additional insn in prologue.  */
+	  if (!doloop_p && !count_fin_isconst)
+	    prolog_fin_nonconst_adjust = count_step * cmp_stage;
+
+	  /* Ready to generate prolog and epilog.  */
+	  generate_prolog_epilog (ps, loop, count_reg, doloop_p,
+			          count_init_isconst, count_fin_reg,
+				  prolog_fin_nonconst_adjust,
+				  prolog_create_reg, count_fin_newval,
+				  &new_comp_reg);
+
+	  /* And only after generating prolog and epilog it is possible
+	     to modify the compare instruction (to prevent copying wrong insn
+	     form to first and last stages).  */
+	  if (!doloop_p && cmp_stage > 0)
+	    {
+              gcc_assert (0 <= cmp_side && cmp_side <= 1);
+	      if (was_immediate && !prolog_create_reg)
+		{
+		/* Easy case - just modify a constant.  */
+		  gcc_assert (new_comp_reg == NULL_RTX);
+		  XEXP (SET_SRC (single_set (cmp)), 1 - cmp_side)
+			= GEN_INT (count_fin_newval);
+		}
+	      else
+		{
+		  if (count_fin_isconst && !was_immediate)
+		    /* Value is in a register and we already changed
+		       initialization instruction in preheader.  */
+		    gcc_assert (new_comp_reg == NULL_RTX);
+		  else
+		    {
+		      /* Another case - use created by generate_prolog_epilog
+		         register, which value is initialized in prologue.  */
+		      gcc_assert (new_comp_reg != NULL_RTX);
+		      XEXP (SET_SRC (single_set (cmp)), 1 - cmp_side)
+			      = new_comp_reg;
+		    }
+		}
+	      df_insn_rescan (cmp);
+	    }
+	  else gcc_assert (new_comp_reg == NULL_RTX);
 
 	  free_undo_replace_buff (reg_move_replaces);
 	}
@@ -1690,7 +2289,9 @@ sms_schedule (void)
       free_ddg (g);
     }
 
+  df_set_flags (DF_LR_RUN_DCE);
   free (g_arr);
+  iv_analysis_done ();
 
   /* Release scheduler data, needed until now because of DFA.  */
   haifa_sched_finish ();
diff --git a/gcc/sched-int.h b/gcc/sched-int.h
index 2eee49d..dce9363 100644
--- a/gcc/sched-int.h
+++ b/gcc/sched-int.h
@@ -1229,6 +1229,8 @@ extern void sched_deps_finish (void);
 extern void haifa_note_reg_set (int);
 extern void haifa_note_reg_clobber (int);
 extern void haifa_note_reg_use (int);
+void sms_sched_analyze_init (void);
+regset extract_from_insn_map (rtx, bool);
 
 extern void maybe_extend_reg_info_p (void);
 
