Patchwork PATCH: Pad short functions for Atom

login
register
mail settings
Submitter H.J. Lu
Date Sept. 17, 2010, 8:53 p.m.
Message ID <AANLkTins5WUew_mVzE=ytzQZorQxVCgnkWk1L0C22R11@mail.gmail.com>
Download mbox | patch
Permalink /patch/65105/
State New
Headers show

Comments

H.J. Lu - Sept. 17, 2010, 8:53 p.m.
On Fri, Sep 17, 2010 at 12:27 PM, Richard Henderson <rth@redhat.com> wrote:
>> +  int insn_count = ix86_count_insn_bb (bb);
>> +  int min_insn_count;
>> +
>> +  if (insn_count >= 4)
>> +    return insn_count;
>> +
>> +  /* This block has less than 4 instructions.  Count predecessor
>> +     edges of this block.  */
>> +  min_insn_count = insn_count;
>> +  FOR_EACH_EDGE (e, ei, bb->preds)
>
> So this is the exit block we're looking at now...
>
>> +      int count = insn_count + ix86_count_insn_bb (e->src);
>
> (You may be counting insns in ENTRY_BLOCK here.)
>
>> +
>> +      if (count < 4)
>> +     {
>> +       /* This block plus its predecessor have less than 4
>> +          instructions.  Check predecessor edges.  */
>> +       edge prev_e;
>> +       edge_iterator prev_ei;
>> +       int old_count = count;
>> +       bool has_prev_bb = false;
>> +
>> +       count = 4;
>> +       FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
>> +         {
>> +           /* Check if the predecessor is entry point.  Not a short
>> +              function if it has more than 2 basic blocks.  */
>> +           if (prev_e->src == ENTRY_BLOCK_PTR)
>
> ... and this is the predecessor.
>
> I suspect that it would be quicker to verify the CFG form
> before counting instructions.  At least it'll be clearer.
> E.g.
>
>  /* Only bother counting instructions along paths with no
>     more than 2 basic blocks between entry and exit.  Given
>     that BB has an edge to exit, determine if a predecessor
>     of BB has an edge from entry.  If so, compute the number
>     of instructions in the predecessor block.  If there
>     happen to be multiple such blocks, compute the minimum.  */
>  min_prev_count = 4;
>  FOR_EACH_EDGE (e, ei, bb->preds)
>    {
>      if (e->src == ENTRY_BLOCK_PTR)
>        {
>          min_prev_count = 0;
>          break;
>        }
>      FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
>        {
>          if (prev_e->src == ENTRY_BLOCK_PTR)
>            {
>              c = ix86_count_insn_bb (e->src);
>              if (c < min_prev_count)
>                min_prev_count = c;
>              break;
>            }
>        }
>
>  if (min_prev_count < 4)
>    min_prev_count += ix86_count_insn_bb (bb);
>  return min_prev_count;
>
>

Here is the updated patch.  OK for trunk?

Thanks.
Richard Henderson - Sept. 17, 2010, 9:04 p.m.
Ok.


r~

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index abec057..d9f9237 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1576,6 +1576,9 @@  static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   /* X86_TUNE_PAD_RETURNS */
   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
 
+  /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion.  */
+  m_ATOM,
+
   /* X86_TUNE_EXT_80387_CONSTANTS */
   m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
   | m_CORE2 | m_GENERIC,
@@ -8021,6 +8024,11 @@  ix86_code_end (void)
 
       xops[0] = gen_rtx_REG (Pmode, regno);
       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+      /* Pad stack IP move with 4 instructions.  2 NOPs count as 1
+         instruction.  */
+      if (TARGET_PAD_SHORT_FUNCTION)
+	output_asm_insn ("nop; nop; nop; nop; nop; nop; nop; nop",
+			 xops);
       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
       output_asm_insn ("ret", xops);
       final_end_function ();
@@ -27882,6 +27890,120 @@  ix86_pad_returns (void)
     }
 }
 
+/* Count the minimum number of instructions in BB.  Return 4 if the
+   number of instructions >= 4.  */
+
+static int 
+ix86_count_insn_bb (basic_block bb)
+{
+  rtx insn;
+  int insn_count = 0;
+
+  /* Count number of instructions in this block.  Return 4 if the number
+     of instructions >= 4.  */
+  FOR_BB_INSNS (bb, insn)
+    {
+      /* Only happen in exit blocks.  */
+      if (JUMP_P (insn)
+	  && GET_CODE (PATTERN (insn)) == RETURN)
+	break;
+
+      if (NONDEBUG_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) != USE
+	  && GET_CODE (PATTERN (insn)) != CLOBBER)
+	{
+	  insn_count++;
+	  if (insn_count >= 4)
+	    return insn_count;
+	}
+    }
+
+  return insn_count;
+}
+
+
+/* Count the minimum number of instructions in code path in BB.  
+   Return 4 if the number of instructions >= 4.  */
+
+static int 
+ix86_count_insn (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+  int min_prev_count;
+
+  /* Only bother counting instructions along paths with no
+     more than 2 basic blocks between entry and exit.  Given
+     that BB has an edge to exit, determine if a predecessor
+     of BB has an edge from entry.  If so, compute the number
+     of instructions in the predecessor block.  If there
+     happen to be multiple such blocks, compute the minimum.  */
+  min_prev_count = 4;
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    {
+      edge prev_e;
+      edge_iterator prev_ei;
+
+      if (e->src == ENTRY_BLOCK_PTR)
+	{
+	  min_prev_count = 0;
+	  break;
+	}
+      FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
+	{
+	  if (prev_e->src == ENTRY_BLOCK_PTR)
+	    {
+	      int count = ix86_count_insn_bb (e->src);
+	      if (count < min_prev_count)
+		min_prev_count = count;
+	      break;
+	    }
+	}
+    }
+
+  if (min_prev_count < 4)
+    min_prev_count += ix86_count_insn_bb (bb);
+
+  return min_prev_count;
+}
+
+/* Pad short funtion to 4 instructions.   */
+
+static void
+ix86_pad_short_function (void)
+{
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+    {
+      rtx ret = BB_END (e->src);
+      if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
+	{
+	  int insn_count = ix86_count_insn (e->src);
+
+	  /* Pad short function.  */
+	  if (insn_count < 4)
+	    {
+	      rtx insn = ret;
+
+	      /* Find epilogue.  */
+	      while (insn
+		     && (!NOTE_P (insn)
+			 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
+		insn = PREV_INSN (insn);
+
+	      if (!insn)
+		insn = ret;
+
+	      /* Two NOPs are counted as one instruction.  */
+	      insn_count = 2 * (4  - insn_count);
+	      emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
+	    }
+	}
+    }
+}
+
 /* Implement machine specific optimizations.  We implement padding of returns
    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
 static void
@@ -27889,7 +28011,9 @@  ix86_reorg (void)
 {
   if (optimize && optimize_function_for_speed_p (cfun))
     {
-      if (TARGET_PAD_RETURNS)
+      if (TARGET_PAD_SHORT_FUNCTION)
+	ix86_pad_short_function ();
+      else if (TARGET_PAD_RETURNS)
 	ix86_pad_returns ();
 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
       if (TARGET_FOUR_JUMP_LIMIT)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 22dd02b..aa246c6 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -299,6 +299,7 @@  enum ix86_tune_indices {
   X86_TUNE_USE_BT,
   X86_TUNE_USE_INCDEC,
   X86_TUNE_PAD_RETURNS,
+  X86_TUNE_PAD_SHORT_FUNCTION,
   X86_TUNE_EXT_80387_CONSTANTS,
   X86_TUNE_SHORTEN_X87_SSE,
   X86_TUNE_AVOID_VECTOR_DECODE,
@@ -385,6 +386,8 @@  extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_USE_BT		ix86_tune_features[X86_TUNE_USE_BT]
 #define TARGET_USE_INCDEC	ix86_tune_features[X86_TUNE_USE_INCDEC]
 #define TARGET_PAD_RETURNS	ix86_tune_features[X86_TUNE_PAD_RETURNS]
+#define TARGET_PAD_SHORT_FUNCTION \
+	ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION]
 #define TARGET_EXT_80387_CONSTANTS \
 	ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
 #define TARGET_SHORTEN_X87_SSE	ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE]
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ec43793..4ccd932 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -104,6 +104,7 @@ 
   UNSPEC_LD_MPIC	; load_macho_picbase
   UNSPEC_TRUNC_NOOP
   UNSPEC_DIV_ALREADY_SPLIT
+  UNSPEC_NOPS
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -11465,6 +11466,39 @@ 
    (set_attr "length_immediate" "0")
    (set_attr "modrm" "0")])
 
+;; Generate nops.  Operand 0 is the number of nops, up to 8.
+(define_insn "nops"
+  [(unspec [(match_operand 0 "const_int_operand" "")]
+	   UNSPEC_NOPS)]
+  "reload_completed"
+{
+  switch (INTVAL (operands[0]))
+    {
+    case 1:
+      return "nop";
+    case 2:
+      return "nop; nop";
+    case 3:
+      return "nop; nop; nop";
+    case 4:
+      return "nop; nop; nop; nop";
+    case 5:
+      return "nop; nop; nop; nop; nop";
+    case 6:
+      return "nop; nop; nop; nop; nop; nop";
+    case 7:
+      return "nop; nop; nop; nop; nop; nop; nop";
+    case 8:
+      return "nop; nop; nop; nop; nop; nop; nop; nop";
+    default:
+      gcc_unreachable ();
+      break;
+  }
+}
+  [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
 ;; Pad to 16-byte boundary, max skip in op0.  Used to avoid
 ;; branch prediction penalty for the third jump in a 16-byte
 ;; block on K8.
diff --git a/gcc/testsuite/gcc.target/i386/pad-1.c b/gcc/testsuite/gcc.target/i386/pad-1.c
new file mode 100644
index 0000000..87a9d6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-1.c
@@ -0,0 +1,9 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -mtune=generic -S" } */
+/* { dg-final { scan-assembler "rep" } } */
+/* { dg-final { scan-assembler-not "nop" } } */
+
+void
+foo ()
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-10.c b/gcc/testsuite/gcc.target/i386/pad-10.c
new file mode 100644
index 0000000..6ba3b78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-10.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-not "nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+extern void bar ();
+
+int
+foo2 (int z, int x)
+{
+  if (x == 1)
+    {
+      bar ();
+      return z;
+    }
+  else
+    return x + z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-2.c b/gcc/testsuite/gcc.target/i386/pad-2.c
new file mode 100644
index 0000000..964547c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-2.c
@@ -0,0 +1,9 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+void
+foo ()
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-3.c b/gcc/testsuite/gcc.target/i386/pad-3.c
new file mode 100644
index 0000000..52442b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-3.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-not "nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int s[8] = {1, 2, 3, 4, 5, 6, 7, 8};
+int d[8] = {11, 22, 33, 44, 55, 66, 77, 88};
+
+void
+foo ()
+{
+  int i;
+  for (i = 0; i < 8; i++)
+    d[i] = s[i] + 0x1000;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-4.c b/gcc/testsuite/gcc.target/i386/pad-4.c
new file mode 100644
index 0000000..a7033fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-4.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S -fPIC" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+extern int bar;
+
+int
+foo ()
+{
+  return bar;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-5a.c b/gcc/testsuite/gcc.target/i386/pad-5a.c
new file mode 100644
index 0000000..9d0aa2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-5a.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y, int z)
+{
+   return x + y + z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-5b.c b/gcc/testsuite/gcc.target/i386/pad-5b.c
new file mode 100644
index 0000000..2e1cf12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-5b.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y, int z)
+{
+   return x + y + z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-6a.c b/gcc/testsuite/gcc.target/i386/pad-6a.c
new file mode 100644
index 0000000..e865967
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-6a.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y)
+{
+   return x + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-6b.c b/gcc/testsuite/gcc.target/i386/pad-6b.c
new file mode 100644
index 0000000..41aeaee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-6b.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y)
+{
+   return x + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-7.c b/gcc/testsuite/gcc.target/i386/pad-7.c
new file mode 100644
index 0000000..7a7493d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-7.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-not "nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y, int z)
+{
+   return x + y + z + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-8.c b/gcc/testsuite/gcc.target/i386/pad-8.c
new file mode 100644
index 0000000..873a0a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-8.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y)
+{
+   return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-9.c b/gcc/testsuite/gcc.target/i386/pad-9.c
new file mode 100644
index 0000000..3d68805
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-9.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+extern void bar (void);
+
+void
+foo (int x)
+{
+  if (x)
+    bar ();
+}