diff mbox

[SH] PR 31640 - cache block alignment is too aggressive on sh-elf

Message ID 1325469242.18753.785.camel@yam-132-YW-E178-FTW
State New
Headers show

Commit Message

Oleg Endo Jan. 2, 2012, 1:54 a.m. UTC
The attached patch addresses PR 31640.
It reduces the the default function alignment when not optimizing for
size from cache line size (32 bytes) to 4 bytes and sets the loop
alignment to 4 bytes when not optimizing for size.  Moreover, it brings
back the -falign-loops option which was always overridden and disabled
for -m4.

Tested against rev 182734 with 
make -k -check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,
-m2/-mb,
-m2a-single/-mb,
-m4-single/-ml,
-m4-single/-mb,
-m4a-single/-ml,
-m4a-single/-mb}"

and no new failures. 
OK for trunk?

2012-01-02  Oleg Endo  <olegendo@gcc.gnu.org>

	PR target/31640
	* config/sh/sh.h (LOOP_ALIGN): Move logic to ...
	* config/sh/sh.c (sh_loop_align): ... here.  Don't disable loop
	alignment for TARGET_HARD_SH4.
	(sh_option_override): Reduce default function alignment.  Set 
	loop alignment to 4 bytes when not optimizing for size.

Comments

Kaz Kojima Jan. 4, 2012, 10:14 a.m. UTC | #1
Oleg Endo <oleg.endo@t-online.de> wrote:
> The attached patch addresses PR 31640.
> It reduces the the default function alignment when not optimizing for
> size from cache line size (32 bytes) to 4 bytes and sets the loop
> alignment to 4 bytes when not optimizing for size.  Moreover, it brings
> back the -falign-loops option which was always overridden and disabled
> for -m4.
> 
> Tested against rev 182734 with 
> make -k -check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,
> -m2/-mb,
> -m2a-single/-mb,
> -m4-single/-ml,
> -m4-single/-mb,
> -m4a-single/-ml,
> -m4a-single/-mb}"
> 
> and no new failures. 
> OK for trunk?
> 
> 2012-01-02  Oleg Endo  <olegendo@gcc.gnu.org>
> 
> 	PR target/31640
> 	* config/sh/sh.h (LOOP_ALIGN): Move logic to ...
> 	* config/sh/sh.c (sh_loop_align): ... here.  Don't disable loop
> 	alignment for TARGET_HARD_SH4.
> 	(sh_option_override): Reduce default function alignment.  Set 
> 	loop alignment to 4 bytes when not optimizing for size.

OK for trunk when it returns to stage 1 or 2.

Regards,
	kaz
diff mbox

Patch

Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 182734)
+++ gcc/config/sh/sh.c	(working copy)
@@ -816,20 +816,42 @@ 
 	}
     }
 
+  /*  Adjust loop, jump and function alignment values (in bytes), if those
+      were not specified by the user using -falign-loops, -falign-jumps
+      and -falign-functions options.
+      32 bit alignment is better for speed, because instructions can be
+      fetched as a pair from a longword boundary.  For size use 16 bit
+      alignment to get more compact code.
+      Aligning all jumps increases the code size, even if it might
+      result in slightly faster code.  Thus, it is set to the smallest 
+      alignment possible if not specified by the user.  */
   if (align_loops == 0)
-    align_loops =  1 << (TARGET_SH5 ? 3 : 2);
+    {
+      if (TARGET_SH5)
+	align_loops = 8;
+      else
+	align_loops = optimize_size ? 2 : 4;
+    }
+
   if (align_jumps == 0)
-    align_jumps = 1 << CACHE_LOG;
+    {
+      if (TARGET_SHMEDIA)
+	align_jumps = 1 << CACHE_LOG;
+      else
+	align_jumps = 2;
+    }
   else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
     align_jumps = TARGET_SHMEDIA ? 4 : 2;
 
-  /* Allocation boundary (in *bytes*) for the code of a function.
-     SH1: 32 bit alignment is faster, because instructions are always
-     fetched as a pair from a longword boundary.
-     SH2 .. SH5 : align to cache line start.  */
   if (align_functions == 0)
-    align_functions
-      = optimize_size ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
+    {
+      if (TARGET_SHMEDIA)
+	align_functions = optimize_size
+			  ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
+      else
+	align_functions = optimize_size ? 2 : 4;
+    }
+
   /* The linker relaxation code breaks when a function contains
      alignments that are larger than that at the start of a
      compilation unit.  */
@@ -5342,6 +5364,9 @@ 
 {
   rtx next = label;
 
+  if (! optimize || optimize_size)
+    return 0;
+
   do
     next = next_nonnote_insn (next);
   while (next && LABEL_P (next));
Index: gcc/config/sh/sh.h
===================================================================
--- gcc/config/sh/sh.h	(revision 182734)
+++ gcc/config/sh/sh.h	(working copy)
@@ -579,9 +579,7 @@ 
 #define LABEL_ALIGN_AFTER_BARRIER(LABEL_AFTER_BARRIER) \
   barrier_align (LABEL_AFTER_BARRIER)
 
-#define LOOP_ALIGN(A_LABEL) \
-  ((! optimize || TARGET_HARD_SH4 || optimize_size) \
-   ? 0 : sh_loop_align (A_LABEL))
+#define LOOP_ALIGN(A_LABEL) sh_loop_align (A_LABEL)
 
 #define LABEL_ALIGN(A_LABEL) \
 (									\