===================================================================
@@ -339,9 +339,11 @@ Objective-C and Objective-C++ Dialects}.
@item Optimization Options
@xref{Optimize Options,,Options that Control Optimization}.
-@gccoptlist{-faggressive-loop-optimizations -falign-functions[=@var{n}] @gol
--falign-jumps[=@var{n}] @gol
--falign-labels[=@var{n}] -falign-loops[=@var{n}] @gol
+@gccoptlist{-faggressive-loop-optimizations @gol
+-falign-functions[=@var{n}[,@var{m},[@var{n2}[,@var{m2}]]]] @gol
+-falign-jumps[=@var{n}[,@var{m},[@var{n2}[,@var{m2}]]]] @gol
+-falign-labels[=@var{n}[,@var{m},[@var{n2}[,@var{m2}]]]] @gol
+-falign-loops[=@var{n}[,@var{m},[@var{n2}[,@var{m2}]]]] @gol
-fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol
-fauto-inc-dec -fbranch-probabilities @gol
-fbranch-target-load-optimize -fbranch-target-load-optimize2 @gol
@@ -8231,19 +8233,36 @@ The @option{-fstrict-overflow} option is enabled a
@item -falign-functions
@itemx -falign-functions=@var{n}
+@itemx -falign-functions=@var{n},@var{m}
+@itemx -falign-functions=@var{n},@var{m},@var{n2}
+@itemx -falign-functions=@var{n},@var{m},@var{n2},@var{m2}
@opindex falign-functions
Align the start of functions to the next power-of-two greater than
-@var{n}, skipping up to @var{n} bytes. For instance,
-@option{-falign-functions=32} aligns functions to the next 32-byte
-boundary, but @option{-falign-functions=24} aligns to the next
-32-byte boundary only if this can be done by skipping 23 bytes or less.
+@var{n}, skipping up to @var{m}-1 bytes. Such alignment ensures that
+after branch, at least @var{m} bytes can be fetched by the CPU
+without crossing specified alignment boundary.
-@option{-fno-align-functions} and @option{-falign-functions=1} are
-equivalent and mean that functions are not aligned.
+If @var{m} is not specified, it defaults to @var{n}.
+Same for @var{m2} and @var{n2}.
+Examples: @option{-falign-functions=32} aligns functions to the next
+32-byte boundary, @option{-falign-functions=24} aligns to the next
+32-byte boundary only if this can be done by skipping 23 bytes or less,
+@option{-falign-functions=32,7} aligns to the next
+32-byte boundary only if this can be done by skipping 6 bytes or less.
+
+The second pair of @var{n2},@var{m2} values allows to have a secondary
+alignment: @option{-falign-functions=64,7,32,3} aligns to the next
+64-byte boundary if this can be done by skipping 6 bytes or less,
+otherwise aligns to the next 32-byte boundary if this can be done
+by skipping 2 bytes or less.
+
Some assemblers only support this flag when @var{n} is a power of two;
in that case, it is rounded up.
+@option{-fno-align-functions} and @option{-falign-functions=1} are
+equivalent and mean that functions are not aligned.
+
If @var{n} is not specified or is zero, use a machine-dependent default.
Enabled at levels @option{-O2}, @option{-O3}.
@@ -8250,12 +8269,13 @@ Enabled at levels @option{-O2}, @option{-O3}.
@item -falign-labels
@itemx -falign-labels=@var{n}
+@itemx -falign-labels=@var{n},@var{m}
+@itemx -falign-labels=@var{n},@var{m},@var{n2}
+@itemx -falign-labels=@var{n},@var{m},@var{n2},@var{m2}
@opindex falign-labels
-Align all branch targets to a power-of-two boundary, skipping up to
-@var{n} bytes like @option{-falign-functions}. This option can easily
-make code slower, because it must insert dummy operations for when the
-branch target is reached in the usual flow of the code.
+Align all branch targets to a power-of-two boundary.
+Parameters of this option are analogous to @option{-falign-functions} option.
@option{-fno-align-labels} and @option{-falign-labels=1} are
equivalent and mean that labels are not aligned.
@@ -8269,12 +8289,15 @@ Enabled at levels @option{-O2}, @option{-O3}.
@item -falign-loops
@itemx -falign-loops=@var{n}
+@itemx -falign-loops=@var{n},@var{m}
+@itemx -falign-loops=@var{n},@var{m},@var{n2}
+@itemx -falign-loops=@var{n},@var{m},@var{n2},@var{m2}
@opindex falign-loops
-Align loops to a power-of-two boundary, skipping up to @var{n} bytes
-like @option{-falign-functions}. If the loops are
-executed many times, this makes up for any execution of the dummy
-operations.
+Align loops to a power-of-two boundary. If the loops are executed
+many times, this makes up for any execution of the dummy padding
+instructions.
+Parameters of this option are analogous to @option{-falign-functions} option.
@option{-fno-align-loops} and @option{-falign-loops=1} are
equivalent and mean that loops are not aligned.
@@ -8284,12 +8307,15 @@ Enabled at levels @option{-O2}, @option{-O3}.
@item -falign-jumps
@itemx -falign-jumps=@var{n}
+@itemx -falign-jumps=@var{n},@var{m}
+@itemx -falign-jumps=@var{n},@var{m},@var{n2}
+@itemx -falign-jumps=@var{n},@var{m},@var{n2},@var{m2}
@opindex falign-jumps
Align branch targets to a power-of-two boundary, for branch targets
-where the targets can only be reached by jumping, skipping up to @var{n}
-bytes like @option{-falign-functions}. In this case, no dummy operations
-need be executed.
+where the targets can only be reached by jumping. In this case,
+no dummy operations need be executed.
+Parameters of this option are analogous to @option{-falign-functions} option.
@option{-fno-align-jumps} and @option{-falign-jumps=1} are
equivalent and mean that loops are not aligned.
===================================================================
@@ -900,32 +900,32 @@ Common Report Var(flag_aggressive_loop_optimizatio
Aggressively optimize loops using language constraints.
falign-functions
-Common Report Var(align_functions,0) Optimization UInteger
+Common Report Var(flag_align_functions) Optimization
Align the start of functions.
falign-functions=
-Common RejectNegative Joined UInteger Var(align_functions)
+Common RejectNegative Joined Var(str_align_functions)
falign-jumps
-Common Report Var(align_jumps,0) Optimization UInteger
+Common Report Var(flag_align_jumps) Optimization
Align labels which are only reached by jumping.
falign-jumps=
-Common RejectNegative Joined UInteger Var(align_jumps)
+Common RejectNegative Joined Var(str_align_jumps)
falign-labels
-Common Report Var(align_labels,0) Optimization UInteger
+Common Report Var(flag_align_labels) Optimization
Align all labels.
falign-labels=
-Common RejectNegative Joined UInteger Var(align_labels)
+Common RejectNegative Joined Var(str_align_labels)
falign-loops
-Common Report Var(align_loops,0) Optimization UInteger
+Common Report Var(flag_align_loops) Optimization
Align the start of loops.
falign-loops=
-Common RejectNegative Joined UInteger Var(align_loops)
+Common RejectNegative Joined Var(str_align_loops)
fargument-alias
Common Ignore
===================================================================
@@ -208,9 +208,9 @@ struct tune_params
int memmov_cost;
int issue_rate;
unsigned int fusible_ops;
- int function_align;
- int jump_align;
- int loop_align;
+ const char *function_align;
+ const char *jump_align;
+ const char *loop_align;
int int_reassoc_width;
int fp_reassoc_width;
int vec_reassoc_width;
===================================================================
@@ -522,9 +522,9 @@ static const struct tune_params generic_tunings =
4, /* memmov_cost */
2, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fusible_ops */
- 8, /* function_align. */
- 8, /* jump_align. */
- 4, /* loop_align. */
+ "8", /* function_align. */
+ "8", /* jump_align. */
+ "4", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1, /* vec_reassoc_width. */
@@ -548,9 +548,9 @@ static const struct tune_params cortexa35_tunings
1, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
- 16, /* function_align. */
- 8, /* jump_align. */
- 8, /* loop_align. */
+ "16", /* function_align. */
+ "8", /* jump_align. */
+ "8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1, /* vec_reassoc_width. */
@@ -574,9 +574,9 @@ static const struct tune_params cortexa53_tunings
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
- 16, /* function_align. */
- 8, /* jump_align. */
- 8, /* loop_align. */
+ "16", /* function_align. */
+ "8", /* jump_align. */
+ "8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1, /* vec_reassoc_width. */
@@ -600,9 +600,9 @@ static const struct tune_params cortexa57_tunings
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
- 16, /* function_align. */
- 8, /* jump_align. */
- 8, /* loop_align. */
+ "16", /* function_align. */
+ "8", /* jump_align. */
+ "8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1, /* vec_reassoc_width. */
@@ -626,9 +626,9 @@ static const struct tune_params cortexa72_tunings
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
- 16, /* function_align. */
- 8, /* jump_align. */
- 8, /* loop_align. */
+ "16", /* function_align. */
+ "8", /* jump_align. */
+ "8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1, /* vec_reassoc_width. */
@@ -652,9 +652,9 @@ static const struct tune_params cortexa73_tunings
2, /* issue_rate. */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
- 16, /* function_align. */
- 8, /* jump_align. */
- 8, /* loop_align. */
+ "16", /* function_align. */
+ "8", /* jump_align. */
+ "8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1, /* vec_reassoc_width. */
@@ -677,9 +677,9 @@ static const struct tune_params exynosm1_tunings =
4, /* memmov_cost */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC), /* fusible_ops */
- 4, /* function_align. */
- 4, /* jump_align. */
- 4, /* loop_align. */
+ "4", /* function_align. */
+ "4", /* jump_align. */
+ "4", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1, /* vec_reassoc_width. */
@@ -702,9 +702,9 @@ static const struct tune_params thunderx_tunings =
6, /* memmov_cost */
2, /* issue_rate */
AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
- 8, /* function_align. */
- 8, /* jump_align. */
- 8, /* loop_align. */
+ "8", /* function_align. */
+ "8", /* jump_align. */
+ "8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1, /* vec_reassoc_width. */
@@ -727,9 +727,9 @@ static const struct tune_params xgene1_tunings =
6, /* memmov_cost */
4, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fusible_ops */
- 16, /* function_align. */
- 8, /* jump_align. */
- 16, /* loop_align. */
+ "16", /* function_align. */
+ "8", /* jump_align. */
+ "16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1, /* vec_reassoc_width. */
@@ -753,9 +753,9 @@ static const struct tune_params qdf24xx_tunings =
4, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
- 16, /* function_align. */
- 8, /* jump_align. */
- 16, /* loop_align. */
+ "16", /* function_align. */
+ "8", /* jump_align. */
+ "16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1, /* vec_reassoc_width. */
@@ -778,9 +778,9 @@ static const struct tune_params vulcan_tunings =
4, /* memmov_cost. */
4, /* issue_rate. */
AARCH64_FUSE_NOTHING, /* fuseable_ops. */
- 16, /* function_align. */
- 8, /* jump_align. */
- 16, /* loop_align. */
+ "16", /* function_align. */
+ "8", /* jump_align. */
+ "16", /* loop_align. */
3, /* int_reassoc_width. */
2, /* fp_reassoc_width. */
2, /* vec_reassoc_width. */
===================================================================
@@ -624,13 +624,13 @@ alpha_override_options_after_change (void)
/* ??? Kludge these by not doing anything if we don't optimize. */
if (optimize > 0)
{
- if (align_loops <= 0)
- align_loops = 16;
- if (align_jumps <= 0)
- align_jumps = 16;
+ if (flag_align_loops && !str_align_loops)
+ str_align_loops = "16";
+ if (flag_align_jumps && !str_align_jumps)
+ str_align_jumps = "16";
}
- if (align_functions <= 0)
- align_functions = 16;
+ if (flag_align_functions && !str_align_functions)
+ str_align_functions = "16";
}
/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
===================================================================
@@ -2922,9 +2922,10 @@ static GTY(()) tree init_optimize;
static void
arm_override_options_after_change_1 (struct gcc_options *opts)
{
- if (opts->x_align_functions <= 0)
- opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
- && opts->x_optimize_size ? 2 : 4;
+ /* -falign-functions without argument: supply one */
+ if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+ opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
+ && opts->x_optimize_size ? "2" : "4";
}
/* Implement targetm.override_options_after_change. */
===================================================================
@@ -2627,45 +2627,47 @@ struct ptt
{
const char *const name; /* processor name */
const struct processor_costs *cost; /* Processor costs */
- const int align_loop; /* Default alignments. */
- const int align_loop_max_skip;
- const int align_jump;
- const int align_jump_max_skip;
- const int align_func;
+ const char *const align_loop; /* Default alignments. */
+ const char *const align_jump;
+ const char *const align_label;
+ const char *const align_func;
};
-/* This table must be in sync with enum processor_type in i386.h. */
+/* This table must be in sync with enum processor_type in i386.h. */
static const struct ptt processor_target_table[PROCESSOR_max] =
{
- {"generic", &generic_cost, 16, 10, 16, 10, 16},
- {"i386", &i386_cost, 4, 3, 4, 3, 4},
- {"i486", &i486_cost, 16, 15, 16, 15, 16},
- {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
- {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
- {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
- {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
- {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
- {"core2", &core_cost, 16, 10, 16, 10, 16},
- {"nehalem", &core_cost, 16, 10, 16, 10, 16},
- {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
- {"haswell", &core_cost, 16, 10, 16, 10, 16},
- {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
- {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
- {"knl", &slm_cost, 16, 15, 16, 7, 16},
- {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
- {"intel", &intel_cost, 16, 15, 16, 7, 16},
- {"geode", &geode_cost, 0, 0, 0, 0, 0},
- {"k6", &k6_cost, 32, 7, 32, 7, 32},
- {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
- {"k8", &k8_cost, 16, 7, 16, 7, 16},
- {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
- {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
- {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
- {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
- {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
- {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
- {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
- {"znver1", &znver1_cost, 16, 10, 16, 7, 11}
+/* The "0,0,8" label alignment specified for some processors generates
+ secondary 8-byte alignment only for those label/jump/loop targets
+ which have primary alignment. */
+ {"generic", &generic_cost, "16,11,8", "16,11,8", "0,0,8", "16"},
+ {"i386", &i386_cost, "4", "4", NULL, "4" },
+ {"i486", &i486_cost, "16,16,8", "16,16,8", "0,0,8", "16"},
+ {"pentium", &pentium_cost, "16,8,8", "16,8,8", "0,0,8", "16"},
+ {"lakemont", &lakemont_cost, "16,8,8", "16,8,8", "0,0,8", "16"},
+ {"pentiumpro", &pentiumpro_cost,"16,16,8", "16,11,8", "0,0,8", "16"},
+ {"pentium4", &pentium4_cost, NULL, NULL, NULL, NULL},
+ {"nocona", &nocona_cost, NULL, NULL, NULL, NULL},
+ {"core2", &core_cost, "16,11,8", "16,11,8", "0,0,8", "16"},
+ {"nehalem", &core_cost, "16,11,8", "16,11,8", "0,0,8", "16"},
+ {"sandybridge",&core_cost, "16,11,8", "16,11,8", "0,0,8", "16"},
+ {"haswell", &core_cost, "16,11,8", "16,11,8", "0,0,8", "16"},
+ {"bonnell", &atom_cost, "16,16,8", "16,8,8", "0,0,8", "16"},
+ {"silvermont", &slm_cost, "16,16,8", "16,8,8", "0,0,8", "16"},
+ {"knl", &slm_cost, "16,16,8", "16,8,8", "0,0,8", "16"},
+ {"skylake-avx512", &core_cost, "16,11,8", "16,11,8", "0,0,8", "16"},
+ {"intel", &intel_cost, "16,16,8", "16,8,8", "0,0,8", "16"},
+ {"geode", &geode_cost, NULL, NULL, NULL, NULL},
+ {"k6", &k6_cost, "32,8,8", "32,8,8", "0,0,8", "32"},
+ {"athlon", &athlon_cost, "16,8,8", "16,8,8", "0,0,8", "16"},
+ {"k8", &k8_cost, "16,8,8", "16,8,8", "0,0,8", "16"},
+ {"amdfam10", &amdfam10_cost, "32,25,8", "32,8,8", "0,0,8", "32"},
+ {"bdver1", &bdver1_cost, "16,11,8", "16,8,8", "0,0,8", "11"},
+ {"bdver2", &bdver2_cost, "16,11,8", "16,8,8", "0,0,8", "11"},
+ {"bdver3", &bdver3_cost, "16,11,8", "16,8,8", "0,0,8", "11"},
+ {"bdver4", &bdver4_cost, "16,11,8", "16,8,8", "0,0,8", "11"},
+ {"btver1", &btver1_cost, "16,11,8", "16,8,8", "0,0,8", "11"},
+ {"btver2", &btver2_cost, "16,11,8", "16,8,8", "0,0,8", "11"},
+ {"znver1", &znver1_cost, "16,11,8", "16,8,8", "0,0,8", "11"}
};
static unsigned int
@@ -4706,20 +4708,23 @@ set_ix86_tune_features (enum processor_type ix86_t
static void
ix86_default_align (struct gcc_options *opts)
{
- if (opts->x_align_loops == 0)
+ /* -falign-foo without argument: supply one */
+ if (opts->x_flag_align_loops && !opts->x_str_align_loops)
{
- opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
- align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
+ opts->x_str_align_loops = processor_target_table[ix86_tune].align_loop;
}
- if (opts->x_align_jumps == 0)
+ if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
{
- opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
- align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
+ opts->x_str_align_jumps = processor_target_table[ix86_tune].align_jump;
}
- if (opts->x_align_functions == 0)
+ if (opts->x_flag_align_labels && !opts->x_str_align_labels)
{
- opts->x_align_functions = processor_target_table[ix86_tune].align_func;
+ opts->x_str_align_labels = processor_target_table[ix86_tune].align_label;
}
+ if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+ {
+ opts->x_str_align_functions = processor_target_table[ix86_tune].align_func;
+ }
}
/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
===================================================================
@@ -488,9 +488,9 @@ unsigned int mips_base_compression_flags;
static int mips_base_schedule_insns; /* flag_schedule_insns */
static int mips_base_reorder_blocks_and_partition; /* flag_reorder... */
static int mips_base_move_loop_invariants; /* flag_move_loop_invariants */
-static int mips_base_align_loops; /* align_loops */
-static int mips_base_align_jumps; /* align_jumps */
-static int mips_base_align_functions; /* align_functions */
+static const char *mips_base_align_loops; /* align_loops */
+static const char *mips_base_align_jumps; /* align_jumps */
+static const char *mips_base_align_functions; /* align_functions */
/* Index [M][R] is true if register R is allowed to hold a value of mode M. */
bool mips_hard_regno_mode_ok[(int) MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER];
@@ -19303,12 +19303,12 @@ mips_set_compression_mode (unsigned int compressio
/* Provide default values for align_* for 64-bit targets. */
if (TARGET_64BIT)
{
- if (align_loops == 0)
- align_loops = 8;
- if (align_jumps == 0)
- align_jumps = 8;
- if (align_functions == 0)
- align_functions = 8;
+ if (flag_align_loops && !str_align_loops)
+ str_align_loops = "8";
+ if (flag_align_jumps && !str_align_jumps)
+ str_align_jumps = "8";
+ if (flag_align_functions && !str_align_functions)
+ str_align_functions = "8";
}
targetm.min_anchor_offset = -32768;
===================================================================
@@ -4750,29 +4750,25 @@ rs6000_option_override_internal (bool global_init_
if (rs6000_cpu == PROCESSOR_TITAN
|| rs6000_cpu == PROCESSOR_CELL)
{
- if (align_functions <= 0)
- align_functions = 8;
- if (align_jumps <= 0)
- align_jumps = 8;
- if (align_loops <= 0)
- align_loops = 8;
+ if (flag_align_functions && !str_align_functions)
+ str_align_functions = "8";
+ if (flag_align_jumps && !str_align_jumps)
+ str_align_jumps = "8";
+ if (flag_align_loops && !str_align_loops)
+ str_align_loops = "8";
}
if (rs6000_align_branch_targets)
{
- if (align_functions <= 0)
- align_functions = 16;
- if (align_jumps <= 0)
- align_jumps = 16;
- if (align_loops <= 0)
+ if (flag_align_functions && !str_align_functions)
+ str_align_functions = "16";
+ if (flag_align_jumps && !str_align_jumps)
+ str_align_jumps = "16";
+ if (flag_align_loops && !str_align_loops)
{
can_override_loop_align = 1;
- align_loops = 16;
+ str_align_loops = "16";
}
}
- if (align_jumps_max_skip <= 0)
- align_jumps_max_skip = 15;
- if (align_loops_max_skip <= 0)
- align_loops_max_skip = 15;
}
/* Arrange to save and restore machine status around nested functions. */
===================================================================
@@ -2819,12 +2819,15 @@ rx_option_override (void)
rx_override_options_after_change ();
/* These values are bytes, not log. */
- if (align_jumps == 0 && ! optimize_size)
- align_jumps = ((rx_cpu_type == RX100 || rx_cpu_type == RX200) ? 4 : 8);
- if (align_loops == 0 && ! optimize_size)
- align_loops = ((rx_cpu_type == RX100 || rx_cpu_type == RX200) ? 4 : 8);
- if (align_labels == 0 && ! optimize_size)
- align_labels = ((rx_cpu_type == RX100 || rx_cpu_type == RX200) ? 4 : 8);
+ if (! optimize_size)
+ {
+ if (flag_align_jumps && !str_align_jumps)
+ str_align_jumps = ((rx_cpu_type == RX100 || rx_cpu_type == RX200) ? "4" : "8");
+ if (flag_align_loops && !str_align_loops)
+ str_align_loops = ((rx_cpu_type == RX100 || rx_cpu_type == RX200) ? "4" : "8");
+ if (flag_align_labels && !str_align_labels)
+ str_align_labels = ((rx_cpu_type == RX100 || rx_cpu_type == RX200) ? "4" : "8");
+ }
}
===================================================================
@@ -432,9 +432,9 @@ typedef unsigned int CUMULATIVE_ARGS;
/* Compute the alignment needed for label X in various situations.
If the user has specified an alignment then honour that, otherwise
use rx_align_for_label. */
-#define JUMP_ALIGN(x) (align_jumps > 1 ? align_jumps_log : rx_align_for_label (x, 0))
-#define LABEL_ALIGN(x) (align_labels > 1 ? align_labels_log : rx_align_for_label (x, 3))
-#define LOOP_ALIGN(x) (align_loops > 1 ? align_loops_log : rx_align_for_label (x, 2))
+#define JUMP_ALIGN(x) (align_jumps_log > 0 ? align_jumps_log : rx_align_for_label (x, 0))
+#define LABEL_ALIGN(x) (align_labels_log > 0 ? align_labels_log : rx_align_for_label (x, 3))
+#define LOOP_ALIGN(x) (align_loops_log > 0 ? align_loops_log : rx_align_for_label (x, 2))
#define LABEL_ALIGN_AFTER_BARRIER(x) rx_align_for_label (x, 0)
#define ASM_OUTPUT_MAX_SKIP_ALIGN(STREAM, LOG, MAX_SKIP) \
===================================================================
@@ -983,16 +983,16 @@ sh_override_options_after_change (void)
Aligning all jumps increases the code size, even if it might
result in slightly faster code. Thus, it is set to the smallest
alignment possible if not specified by the user. */
- if (align_loops == 0)
- align_loops = optimize_size ? 2 : 4;
+ if (flag_align_loops && !str_align_loops)
+ str_align_loops = optimize_size ? "2" : "4";
- if (align_jumps == 0)
- align_jumps = 2;
- else if (align_jumps < 2)
- align_jumps = 2;
+ if (flag_align_jumps && !str_align_jumps)
+ str_align_jumps = "2";
+ else
+ min_align_jumps_log = 1;
- if (align_functions == 0)
- align_functions = optimize_size ? 2 : 4;
+ if (flag_align_functions && !str_align_functions)
+ str_align_functions = optimize_size ? "2" : "4";
/* The linker relaxation code breaks when a function contains
alignments that are larger than that at the start of a
@@ -999,13 +999,13 @@ sh_override_options_after_change (void)
compilation unit. */
if (TARGET_RELAX)
{
- int min_align = align_loops > align_jumps ? align_loops : align_jumps;
+ parse_alignment_opts ();
+ min_align_functions_log = align_loops_log > align_jumps_log ?
+ align_loops_log : align_jumps_log;
/* Also take possible .long constants / mova tables into account. */
- if (min_align < 4)
- min_align = 4;
- if (align_functions < min_align)
- align_functions = min_align;
+ if (min_align_functions_log < 2)
+ min_align_functions_log = 2;
}
}
===================================================================
@@ -2767,7 +2767,8 @@ static void
spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
int max_ready ATTRIBUTE_UNUSED)
{
- if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
+ parse_alignment_opts ();
+ if (align_labels_log > 2 || align_loops_log > 2 || align_jumps_log > 2)
{
/* When any block might be at least 8-byte aligned, assume they
will all be at least 8-byte aligned to make sure dual issue
===================================================================
@@ -412,12 +412,12 @@ visium_option_override (void)
/* Align functions on 256-byte (32-quadword) for GR5 and 64-byte (8-quadword)
boundaries for GR6 so they start a new burst mode window. */
- if (align_functions == 0)
+ if (flag_align_functions && !str_align_functions)
{
if (visium_cpu == PROCESSOR_GR6)
- align_functions = 64;
+ str_align_functions = "64";
else
- align_functions = 256;
+ str_align_functions = "256";
/* Allow the size of compilation units to double because of inlining.
In practice the global size of the object code is hardly affected
@@ -428,26 +428,25 @@ visium_option_override (void)
}
/* Likewise for loops. */
- if (align_loops == 0)
+ if (flag_align_loops && !str_align_loops)
{
if (visium_cpu == PROCESSOR_GR6)
- align_loops = 64;
+ str_align_loops = "64";
else
{
- align_loops = 256;
/* But not if they are too far away from a 256-byte boundary. */
- align_loops_max_skip = 31;
+ str_align_loops = "256,32";
}
}
/* Align all jumps on quadword boundaries for the burst mode, and even
on 8-quadword boundaries for GR6 so they start a new window. */
- if (align_jumps == 0)
+ if (flag_align_jumps && !str_align_jumps)
{
if (visium_cpu == PROCESSOR_GR6)
- align_jumps = 64;
+ str_align_jumps = "64";
else
- align_jumps = 8;
+ str_align_jumps = "8";
}
/* We register a machine-specific pass. This pass must be scheduled as
===================================================================
@@ -2417,6 +2417,12 @@ final_scan_insn (rtx_insn *insn, FILE *file, int o
{
#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
ASM_OUTPUT_MAX_SKIP_ALIGN (file, align, max_skip);
+ /* Above, we don't know whether a label, jump or loop
+ alignment was used. Conservatively apply
+ label subalignment, not jump or loop
+ subalignment (they are almost always larger). */
+ ASM_OUTPUT_MAX_SKIP_ALIGN (file, align_labels[1].log,
+ align_labels[1].maxskip);
#else
#ifdef ASM_OUTPUT_ALIGN_WITH_NOP
ASM_OUTPUT_ALIGN_WITH_NOP (file, align);
===================================================================
@@ -43,19 +43,22 @@ extern bool final_insns_dump_p;
/* Other basic status info about current function. */
/* Target-dependent global state. */
-struct target_flag_state {
+struct align_flags {
/* Values of the -falign-* flags: how much to align labels in code.
- 0 means `use default', 1 means `don't align'.
- For each variable, there is an _log variant which is the power
- of two not less than the variable, for .align output. */
- int x_align_loops_log;
- int x_align_loops_max_skip;
- int x_align_jumps_log;
- int x_align_jumps_max_skip;
- int x_align_labels_log;
- int x_align_labels_max_skip;
- int x_align_functions_log;
+ log is "align to 2^log" (so 0 means no alignment).
+ maxskip is the maximum allowed amount of padding to insert. */
+ int log;
+ int maxskip;
+};
+struct target_flag_state {
+ /* Each falign-foo can generate up to two levels of alignment:
+ -falign-foo=N,M[,N2,M2] */
+ struct align_flags x_align_loops[2];
+ struct align_flags x_align_jumps[2];
+ struct align_flags x_align_labels[2];
+ struct align_flags x_align_functions[2];
+
/* The excess precision currently in effect. */
enum excess_precision x_flag_excess_precision;
};
@@ -67,20 +70,21 @@ extern struct target_flag_state *this_target_flag_
#define this_target_flag_state (&default_target_flag_state)
#endif
-#define align_loops_log \
- (this_target_flag_state->x_align_loops_log)
-#define align_loops_max_skip \
- (this_target_flag_state->x_align_loops_max_skip)
-#define align_jumps_log \
- (this_target_flag_state->x_align_jumps_log)
-#define align_jumps_max_skip \
- (this_target_flag_state->x_align_jumps_max_skip)
-#define align_labels_log \
- (this_target_flag_state->x_align_labels_log)
-#define align_labels_max_skip \
- (this_target_flag_state->x_align_labels_max_skip)
-#define align_functions_log \
- (this_target_flag_state->x_align_functions_log)
+#define align_loops (this_target_flag_state->x_align_loops)
+#define align_jumps (this_target_flag_state->x_align_jumps)
+#define align_labels (this_target_flag_state->x_align_labels)
+#define align_functions (this_target_flag_state->x_align_functions)
+#define align_loops_log (align_loops[0].log)
+#define align_jumps_log (align_jumps[0].log)
+#define align_labels_log (align_labels[0].log)
+#define align_functions_log (align_functions[0].log)
+#define align_loops_max_skip (align_loops[0].maxskip)
+#define align_jumps_max_skip (align_jumps[0].maxskip)
+#define align_labels_max_skip (align_labels[0].maxskip)
+#define align_functions_max_skip (align_functions[0].maxskip)
+/* String representaions of the above options are available in
+ const char *str_align_foo. NULL if not set. */
+
#define flag_excess_precision \
(this_target_flag_state->x_flag_excess_precision)
===================================================================
@@ -1179,31 +1179,111 @@ target_supports_section_anchors_p (void)
return true;
}
-/* Default the align_* variables to 1 if they're still unset, and
- set up the align_*_log variables. */
+/* Read a decimal number from string FLAG, up to end of line or comma.
+ Emit error message if number ends with any other character.
+ Return pointer past comma, or NULL if end of line. */
+static const char *
+read_uint (const char *flag, const char *name, int *np)
+{
+ const char *flag_start = flag;
+ int n = 0;
+ char c;
+
+ while ((c = *flag++) >= '0' && c <= '9')
+ n = n*10 + (c-'0');
+ *np = n & 0x3fffffff; /* avoid accidentally negative numbers */
+ if (c == '\0')
+ return NULL;
+ if (c == ',')
+ return flag;
+
+ error_at (UNKNOWN_LOCATION, "-falign-%s parameter is bad at '%s'",
+ name, flag_start);
+ return NULL;
+}
+
+/* Parse "N[,M][,...]" string FLAG into struct align_flags A.
+ Return pointer past second comma, or NULL if end of line. */
+static const char *
+read_log_maxskip (const char *flag, const char *name, struct align_flags *a)
+{
+ int n, m;
+ flag = read_uint (flag, name, &a->log);
+ n = a->log;
+ if (n != 0)
+ a->log = floor_log2 (n * 2 - 1);
+ if (!flag)
+ {
+ a->maxskip = n ? n - 1 : 0;
+ return flag;
+ }
+ flag = read_uint (flag, name, &a->maxskip);
+ m = a->maxskip;
+ if (m > n) m = n;
+ if (m > 0) m--; /* -falign-foo=N,M means M-1 max bytes of padding, not M */
+ a->maxskip = m;
+ return flag;
+}
+
+/* Parse "N[,M[,N2[,M2]]]" string FLAG into a pair of struct align_flags. */
static void
-init_alignments (void)
+parse_N_M (const char *flag, const char *name, struct align_flags a[2],
+ unsigned int min_align_log)
{
- if (align_loops <= 0)
- align_loops = 1;
- if (align_loops_max_skip > align_loops)
- align_loops_max_skip = align_loops - 1;
- align_loops_log = floor_log2 (align_loops * 2 - 1);
- if (align_jumps <= 0)
- align_jumps = 1;
- if (align_jumps_max_skip > align_jumps)
- align_jumps_max_skip = align_jumps - 1;
- align_jumps_log = floor_log2 (align_jumps * 2 - 1);
- if (align_labels <= 0)
- align_labels = 1;
- align_labels_log = floor_log2 (align_labels * 2 - 1);
- if (align_labels_max_skip > align_labels)
- align_labels_max_skip = align_labels - 1;
- if (align_functions <= 0)
- align_functions = 1;
- align_functions_log = floor_log2 (align_functions * 2 - 1);
+ if (flag)
+ {
+ flag = read_log_maxskip (flag, name, &a[0]);
+ if (flag)
+ flag = read_log_maxskip (flag, name, &a[1]);
+#ifdef SUBALIGN_LOG
+ else
+ {
+ /* N2[,M2] is not specified. This arch has a default for N2.
+ Before -falign-foo=N,M,N2,M2 was introduced, x86 had a tweak.
+ -falign-functions=N with N > 8 was adding secondary alignment.
+ -falign-functions=10 was emitting this before every function:
+ .p2align 4,,9
+ .p2align 3
+ Now this behavior (and more) can be explicitly requested:
+ -falign-functions=16,10,8
+ Retain old behavior if N2 is missing: */
+
+ int align = 1 << a[0].log;
+ int subalign = 1 << SUBALIGN_LOG;
+
+ if (a[0].log > SUBALIGN_LOG && a[0].maxskip >= subalign - 1)
+ {
+ /* Set N2 unless subalign can never have any effect */
+ if (align > a[0].maxskip + 1)
+ a[1].log = SUBALIGN_LOG;
+ }
+ }
+#endif
+ }
+ if ((unsigned int)a[0].log < min_align_log)
+ {
+ a[0].log = min_align_log;
+ a[0].maxskip = (1 << min_align_log) - 1;
+ }
}
+/* Minimum alignment requirements, if arch has them. */
+unsigned int min_align_loops_log = 0;
+unsigned int min_align_jumps_log = 0;
+unsigned int min_align_labels_log = 0;
+unsigned int min_align_functions_log = 0;
+
+/* Process -falign-foo=N[,M[,N2[,M2]]] options. */
+void
+parse_alignment_opts (void)
+{
+ parse_N_M (str_align_loops, "loops", align_loops, min_align_loops_log);
+ parse_N_M (str_align_jumps, "jumps", align_jumps, min_align_jumps_log);
+ parse_N_M (str_align_labels, "labels", align_labels, min_align_labels_log);
+ parse_N_M (str_align_functions, "functions", align_functions,
+ min_align_functions_log);
+}
+
/* Process the options that have been parsed. */
static void
process_options (void)
@@ -1627,7 +1709,7 @@ static void
backend_init_target (void)
{
/* Initialize alignment variables. */
- init_alignments ();
+ parse_alignment_opts ();
/* This depends on stack_pointer_rtx. */
init_fake_stack_mems ();
===================================================================
@@ -98,6 +98,13 @@ extern bool set_src_pwd (const char *);
extern HOST_WIDE_INT get_random_seed (bool);
extern const char *set_random_seed (const char *);
+extern unsigned int min_align_loops_log;
+extern unsigned int min_align_jumps_log;
+extern unsigned int min_align_labels_log;
+extern unsigned int min_align_functions_log;
+
+extern void parse_alignment_opts (void);
+
extern void initialize_rtl (void);
#endif /* ! GCC_TOPLEV_H */
===================================================================
@@ -1792,8 +1792,10 @@ assemble_start_function (tree decl, const char *fn
&& optimize_function_for_speed_p (cfun))
{
#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
- ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file,
- align_functions_log, align_functions - 1);
+ ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, align_functions[0].log,
+ align_functions[0].maxskip);
+ ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, align_functions[1].log,
+ align_functions[1].maxskip);
#else
ASM_OUTPUT_ALIGN (asm_out_file, align_functions_log);
#endif
===================================================================
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -falign-functions=64,8" } */
+/* { dg-final { scan-assembler ".p2align 6,,7" } } */
+
+void
+test_func (void)
+{
+}