@@ -53,6 +53,9 @@ enum aarch64_code_model {
/* Static code and data fit within a 4GB region.
The default non-PIC code model. */
AARCH64_CMODEL_SMALL,
+ /* -fpic for small memory model.
+ GOT size to 28KiB (4K*8-4K) or 3580 entries. */
+ AARCH64_CMODEL_SMALL_SPIC,
/* Static code, data and GOT/PLT fit within a 4GB region.
The default PIC code model. */
AARCH64_CMODEL_SMALL_PIC,
@@ -840,10 +840,55 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
rtx tmp_reg = dest;
machine_mode mode = GET_MODE (dest);
- if (can_create_pseudo_p ())
- tmp_reg = gen_reg_rtx (mode);
+ if (aarch64_cmodel != AARCH64_CMODEL_SMALL_SPIC)
+ {
+ if (can_create_pseudo_p ())
+ tmp_reg = gen_reg_rtx (mode);
+
+ emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
+ }
+ /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
+ here before rtl expand. Tree IVOPT will generate rtl pattern to
+ decide rtx costs, in which case pic_offset_table_rtx is not
+ initialized. For that case no need to generate the first adrp
+ instruction as the the final cost for global variable access is
+ one instruction. */
+ else if (pic_offset_table_rtx != NULL_RTX)
+ {
+ /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
+ using the page base as GOT base, the first page may be wasted,
+ in the worst scenario, there is only 28K space for GOT).
+
+ The generate instruction sequence for accessing global variable
+ is:
+
+ ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
+
+ Only one instruction needed. But we must initialize
+ pic_offset_table_rtx properly. We generate initialize insn for
+ every global access, and allow CSE to remove all redundant.
+
+ The final instruction sequences will look like the following
+ for multiply global variables access.
+
+ adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
+
+ ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
+ ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
+ ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
+ ... */
+
+
+ rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+ tmp_reg = pic_offset_table_rtx;
+ crtl->uses_pic_offset_table = 1;
+ emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, s));
+
+ if (mode != GET_MODE (tmp_reg))
+ tmp_reg = simplify_gen_subreg (mode, tmp_reg,
+ GET_MODE (tmp_reg), 0);
+ }
- emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
if (mode == ptr_mode)
{
if (mode == DImode)
@@ -4352,7 +4397,15 @@ aarch64_print_operand (FILE *f, rtx x, char code)
switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
{
case SYMBOL_SMALL_GOT:
- asm_fprintf (asm_out_file, ":got:");
+ /* For SYMBOL_GOT symbol, don't generate GOT modifier for high part when
+ it's -fpic, because the high part will be:
+
+ adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
+
+ We need normal pc relative relocation against symbol value instead of
+ against symbol's GOT entry. */
+ if (aarch64_cmodel != AARCH64_CMODEL_SMALL_SPIC)
+ asm_fprintf (asm_out_file, ":got:");
break;
case SYMBOL_SMALL_TLSGD:
@@ -6300,7 +6346,8 @@ cost_plus:
case SYMBOL_REF:
- if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
+ if (aarch64_cmodel == AARCH64_CMODEL_LARGE
+ || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
{
/* LDR. */
if (speed)
@@ -7108,7 +7155,9 @@ initialize_aarch64_code_model (void)
aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
break;
case AARCH64_CMODEL_SMALL:
- aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
+ aarch64_cmodel = (flag_pic == 2
+ ? AARCH64_CMODEL_SMALL_PIC
+ : AARCH64_CMODEL_SMALL_SPIC);
break;
case AARCH64_CMODEL_LARGE:
sorry ("code model %qs with -f%s", "large",
@@ -7189,6 +7238,7 @@ aarch64_classify_symbol (rtx x, rtx offset,
case AARCH64_CMODEL_TINY:
return SYMBOL_TINY_ABSOLUTE;
+ case AARCH64_CMODEL_SMALL_SPIC:
case AARCH64_CMODEL_SMALL_PIC:
case AARCH64_CMODEL_SMALL:
return SYMBOL_SMALL_ABSOLUTE;
@@ -7236,6 +7286,7 @@ aarch64_classify_symbol (rtx x, rtx offset,
return SYMBOL_TINY_GOT;
return SYMBOL_TINY_ABSOLUTE;
+ case AARCH64_CMODEL_SMALL_SPIC:
case AARCH64_CMODEL_SMALL_PIC:
if (!aarch64_symbol_binds_local_p (x))
return SYMBOL_SMALL_GOT;
@@ -9118,6 +9169,7 @@ aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
case AARCH64_CMODEL_TINY_PIC:
case AARCH64_CMODEL_SMALL:
case AARCH64_CMODEL_SMALL_PIC:
+ case AARCH64_CMODEL_SMALL_SPIC:
/* text+got+data < 4Gb. 4-byte signed relocs are sufficient
for everything. */
type = DW_EH_PE_sdata4;
@@ -11371,6 +11423,18 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
return true;
}
+/* Return 1 if pseudo register should be created and used to hold
+ GOT address for PIC code. */
+
+bool
+aarch64_use_pseudo_pic_reg (void)
+{
+ if (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
+ return true;
+
+ return false;
+}
+
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST aarch64_address_cost
@@ -11649,6 +11712,9 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
#undef TARGET_SCHED_FUSION_PRIORITY
#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
+#undef TARGET_USE_PSEUDO_PIC_REG
+#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-aarch64.h"
@@ -4226,7 +4226,12 @@
(match_operand:PTR 2 "aarch64_valid_symref" "S")))]
UNSPEC_GOTSMALLPIC))]
""
- "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
+ {
+ if (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
+ return "ldr\t%<w>0, [%1, #:<got_modifier>:%a2]";
+ else
+ return "ldr\t%<w>0, [%1, #:got_lo12:%a2]";
+ }
[(set_attr "type" "load1")]
)
@@ -4238,7 +4243,12 @@
(match_operand:DI 2 "aarch64_valid_symref" "S")))]
UNSPEC_GOTSMALLPIC)))]
"TARGET_ILP32"
- "ldr\\t%w0, [%1, #:got_lo12:%a2]"
+ {
+ if (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
+ return "ldr\t%w0, [%1, #:gotpage_lo14:%a2]";
+ else
+ return "ldr\t%w0, [%1, #:got_lo12:%a2]";
+ }
[(set_attr "type" "load1")]
)
@@ -657,6 +657,10 @@
(define_mode_attr insn_count [(OI "8") (CI "12") (XI "16")])
+;; -fpic small model GOT reloc modifers: gotpage_lo15/lo14 for ILP64/32.
+;; No need of iterator for -fPIC as it use got_lo12 for both modes.
+(define_mode_attr got_modifier [(SI "gotpage_lo14") (DI "gotpage_lo15")])
+
;; -------------------------------------------------------------------
;; Code Iterators
;; -------------------------------------------------------------------
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fpic -fno-inline --save-temps" } */
+
+void abort ();
+int global_a;
+
+int
+initialize (void)
+{
+ global_a = 0x10;
+ return global_a - 1;
+}
+
+int
+main (int argc, char **argv)
+{
+ int a = initialize ();
+
+ if (a != global_a - 1)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "adrp\tx\[0-9\]+, :got:_GLOBAL_OFFSET_TABLE" 2 } } */
+/* { dg-final { cleanup-saved-temps } } */