diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 19d6387..154df63 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1576,6 +1576,9 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   /* X86_TUNE_PAD_RETURNS */
   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
 
+  /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion.  */
+  m_ATOM,
+
   /* X86_TUNE_EXT_80387_CONSTANTS */
   m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
   | m_CORE2 | m_GENERIC,
@@ -8017,6 +8020,11 @@ ix86_code_end (void)
 
       xops[0] = gen_rtx_REG (Pmode, regno);
       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+      /* Pad stack IP move with 4 instructions.  2 NOPs count as 1
+         instruction.  */
+      if (TARGET_PAD_SHORT_FUNCTION)
+	output_asm_insn ("nop; nop; nop; nop; nop; nop; nop; nop",
+			 xops);
       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
       output_asm_insn ("ret", xops);
       final_end_function ();
@@ -27776,6 +27784,135 @@ ix86_pad_returns (void)
     }
 }
 
+/* Count the minimum number of instructions in BB.  Return 4 if the
+   number of instructions >= 4.  */
+
+static int 
+ix86_count_insn_bb (basic_block bb)
+{
+  rtx insn;
+  int insn_count = 0;
+
+  /* Count number of instructions in this block.  Return 4 if the number
+     of instructions >= 4.  */
+  FOR_BB_INSNS (bb, insn)
+    {
+      /* Only happen in exit blocks.  */
+      if (JUMP_P (insn)
+	  && GET_CODE (PATTERN (insn)) == RETURN)
+	break;
+
+      if (NONDEBUG_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) != USE
+	  && GET_CODE (PATTERN (insn)) != CLOBBER)
+	{
+	  insn_count++;
+	  if (insn_count >= 4)
+	    return insn_count;
+	}
+    }
+
+  return insn_count;
+}
+
+
+/* Count the minimum number of instructions in code path in BB.  
+   Return 4 if the number of instructions >= 4.  */
+
+static int 
+ix86_count_insn (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+  int insn_count = ix86_count_insn_bb (bb);
+  int min_insn_count;
+
+  if (insn_count >= 4)
+    return insn_count;
+
+  /* This block has less than 4 instructions.  Count predecessor
+     edges of this block.  */
+  min_insn_count = insn_count;
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    {
+      int count = insn_count + ix86_count_insn_bb (e->src);
+
+      if (count < 4)
+	{
+	  /* This block plus its predecessor have less than 4
+	     instructions.  Check predecessor edges.  */
+	  edge prev_e;
+	  edge_iterator prev_ei;
+	  int old_count = count;
+	  bool has_prev_bb = false;
+
+	  count = 4;
+	  FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
+	    {
+	      /* Check if the predecessor is entry point.  Not a short
+		 function if it has more than 2 basic blocks.  */
+	      if (prev_e->src == ENTRY_BLOCK_PTR)
+		{
+		  has_prev_bb = false;
+		  break;
+		}
+	      has_prev_bb = true;
+	    }
+
+	  if (!has_prev_bb)
+	    count = old_count;
+	}
+
+      if (min_insn_count == insn_count)
+	min_insn_count = count;
+      else if (count < min_insn_count)
+	min_insn_count = count;
+    }
+
+  return min_insn_count;
+}
+
+/* Pad short funtion to 4 instructions.   */
+
+static void
+ix86_pad_short_function (void)
+{
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+    {
+      rtx ret = BB_END (e->src);
+      if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
+	{
+	  int insn_count = ix86_count_insn (e->src);
+
+	  /* Pad short function.  */
+	  if (insn_count < 4)
+	    {
+	      rtx nop = gen_nop ();
+	      rtx insn = ret;
+
+	      /* Find epilogue.  */
+	      while (insn
+		     && (!NOTE_P (insn)
+			 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
+		insn = PREV_INSN (insn);
+
+	      if (!insn)
+		insn = ret;
+
+	      /* Two NOPs are counted as one instruction.  */
+	      for (; insn_count < 4; insn_count++)
+		{
+		  emit_insn_before (nop, insn);
+		  emit_insn_before (nop, insn);
+		}
+	    }
+	}
+    }
+}
+
 /* Implement machine specific optimizations.  We implement padding of returns
    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
 static void
@@ -27783,7 +27920,9 @@ ix86_reorg (void)
 {
   if (optimize && optimize_function_for_speed_p (cfun))
     {
-      if (TARGET_PAD_RETURNS)
+      if (TARGET_PAD_SHORT_FUNCTION)
+	ix86_pad_short_function ();
+      else if (TARGET_PAD_RETURNS)
 	ix86_pad_returns ();
 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
       if (TARGET_FOUR_JUMP_LIMIT)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index fe48efd..c28768f 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -299,6 +299,7 @@ enum ix86_tune_indices {
   X86_TUNE_USE_BT,
   X86_TUNE_USE_INCDEC,
   X86_TUNE_PAD_RETURNS,
+  X86_TUNE_PAD_SHORT_FUNCTION,
   X86_TUNE_EXT_80387_CONSTANTS,
   X86_TUNE_SHORTEN_X87_SSE,
   X86_TUNE_AVOID_VECTOR_DECODE,
@@ -385,6 +386,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_USE_BT		ix86_tune_features[X86_TUNE_USE_BT]
 #define TARGET_USE_INCDEC	ix86_tune_features[X86_TUNE_USE_INCDEC]
 #define TARGET_PAD_RETURNS	ix86_tune_features[X86_TUNE_PAD_RETURNS]
+#define TARGET_PAD_SHORT_FUNCTION \
+	ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION]
 #define TARGET_EXT_80387_CONSTANTS \
 	ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
 #define TARGET_SHORTEN_X87_SSE	ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE]
diff --git a/gcc/testsuite/gcc.target/i386/pad-1.c b/gcc/testsuite/gcc.target/i386/pad-1.c
new file mode 100644
index 0000000..87a9d6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -mtune=generic -S" } */
+/* { dg-final { scan-assembler "rep" } } */
+/* { dg-final { scan-assembler-not "nop" } } */
+
+void
+foo ()
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-10.c b/gcc/testsuite/gcc.target/i386/pad-10.c
new file mode 100644
index 0000000..6ba3b78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-10.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-not "nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+extern void bar ();
+
+int
+foo2 (int z, int x)
+{
+  if (x == 1)
+    {
+      bar ();
+      return z;
+    }
+  else
+    return x + z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-2.c b/gcc/testsuite/gcc.target/i386/pad-2.c
new file mode 100644
index 0000000..e7659a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop" 8 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+void
+foo ()
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-3.c b/gcc/testsuite/gcc.target/i386/pad-3.c
new file mode 100644
index 0000000..52442b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-not "nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int s[8] = {1, 2, 3, 4, 5, 6, 7, 8};
+int d[8] = {11, 22, 33, 44, 55, 66, 77, 88};
+
+void
+foo ()
+{
+  int i;
+  for (i = 0; i < 8; i++)
+    d[i] = s[i] + 0x1000;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-4.c b/gcc/testsuite/gcc.target/i386/pad-4.c
new file mode 100644
index 0000000..a7033fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S -fPIC" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+extern int bar;
+
+int
+foo ()
+{
+  return bar;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-5a.c b/gcc/testsuite/gcc.target/i386/pad-5a.c
new file mode 100644
index 0000000..4af419e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-5a.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop" 2 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y, int z)
+{
+   return x + y + z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-5b.c b/gcc/testsuite/gcc.target/i386/pad-5b.c
new file mode 100644
index 0000000..48ab446
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-5b.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop" 4 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y, int z)
+{
+   return x + y + z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-6a.c b/gcc/testsuite/gcc.target/i386/pad-6a.c
new file mode 100644
index 0000000..379ba65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-6a.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop" 4 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y)
+{
+   return x + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-6b.c b/gcc/testsuite/gcc.target/i386/pad-6b.c
new file mode 100644
index 0000000..fdedca1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-6b.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop" 6 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y)
+{
+   return x + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-7.c b/gcc/testsuite/gcc.target/i386/pad-7.c
new file mode 100644
index 0000000..7a7493d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-7.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-not "nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y, int z)
+{
+   return x + y + z + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-8.c b/gcc/testsuite/gcc.target/i386/pad-8.c
new file mode 100644
index 0000000..d3163fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-8.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop" 6 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y)
+{
+   return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-9.c b/gcc/testsuite/gcc.target/i386/pad-9.c
new file mode 100644
index 0000000..572a1af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-9.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop" 4 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+extern void bar (void);
+
+void
+foo (int x)
+{
+  if (x)
+    bar ();
+}
