@@ -51,6 +51,43 @@
lwz data_reg,0(addr_reg)
+ We only look for a single usage in the basic block where the external
+ address is loaded. Multiple uses or references in another basic block will
+ force us to not use the PCREL_OPT relocation.
+
+ We also optimize stores to the address of an external variable using the
+ PCREL_GOT relocation and a single store that uses that external address. If
+ that is found we create the PCREL_OPT relocation to possibly convert:
+
+ pld addr_reg,var@pcrel@got
+
+ <possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+ stw data_reg,0(addr_reg)
+
+ into:
+
+ pstw data_reg,var@pcrel
+
+ <possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+ nop
+
+ If the variable is not defined in the main program or the code using it is
+ not in the main program, the linker put the address in the .got section and
+ do:
+
+ .section .got
+ .Lvar_got:
+ .dword var
+
+ .section .text
+ pld addr_reg,.Lvar_got@pcrel
+
+ <possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+ stw data_reg,0(addr_reg)
+
We only look for a single usage in the basic block where the external
address is loaded. Multiple uses or references in another basic block will
force us to not use the PCREL_OPT relocation. */
@@ -87,9 +124,12 @@ static struct {
unsigned long loads;
unsigned long adjacent_loads;
unsigned long failed_loads;
+ unsigned long stores;
+ unsigned long adjacent_stores;
+ unsigned long failed_stores;
} counters;
-/* Return a marker to identify the PCREL_OPT load address and load
+/* Return a marker to identify the PCREL_OPT load address and load/store
instruction. We use a constant integer which is added to ".Lpcrel" to make
the label. */
@@ -328,6 +368,160 @@ pcrel_opt_load (rtx_insn *addr_insn, /* insn loading address. */
return;
}
+/* Optimize a PC-relative load address to be used in a store.
+
+ If the sequence of insns is safe to use the PCREL_OPT optimization (i.e. no
+ additional references to the address register, the address register dies at
+ the load, and no references to the load), convert insns of the form:
+
+ (set (reg:DI addr)
+ (symbol_ref:DI "ext_symbol"))
+
+ ...
+
+ (set (mem:<MODE> (reg:DI addr))
+ (reg:<MODE> value))
+
+ into:
+
+ (parallel [(set (reg:DI addr)
+ (unspec:DI [(symbol_ref:DI "ext_symbol")
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_ST_ADDR))
+ (use (reg:<MODE> value))])
+
+ ...
+
+ (parallel [(set (mem:<MODE> (reg:DI addr))
+ (unspec:<MODE> [(reg:<MODE>)
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (reg:DI addr))])
+
+ The UNSPEC_PCREL_OPT_ST_ADDR insn will generate the load address plus a
+ definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_ST_RELOC insn
+ will generate the .reloc to tell the linker to tie the load address and load
+ using that address together.
+
+ pld b,ext_symbol@got@pcrel
+ .Lpcrel1:
+
+ ...
+
+ .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+ stw r,0(b)
+
+ If ext_symbol is defined in another object file in the main program and we
+ are linking the main program, the linker will convert the above instructions
+ to:
+
+ pstwz r,ext_symbol@got@pcrel
+
+ ...
+
+ nop */
+
+static void
+pcrel_opt_store (rtx_insn *addr_insn, /* insn loading address. */
+ rtx_insn *store_insn) /* insn using address. */
+{
+ rtx addr_old_set = PATTERN (addr_insn);
+ gcc_assert (GET_CODE (addr_old_set) == SET);
+
+ rtx addr_reg = SET_DEST (addr_old_set);
+ gcc_assert (base_reg_operand (addr_reg, Pmode));
+
+ rtx addr_symbol = SET_SRC (addr_old_set);
+ gcc_assert (pcrel_external_address (addr_symbol, Pmode));
+
+ rtx store_set = PATTERN (store_insn);
+ gcc_assert (GET_CODE (store_set) == SET);
+
+ rtx mem = SET_DEST (store_set);
+ if (!MEM_P (mem))
+ return;
+
+ machine_mode mem_mode = GET_MODE (mem);
+ rtx reg = SET_SRC (store_set);
+
+ /* Don't allow storing the address of the external variable. Make sure the
+ value being stored wasn't updated. */
+ if (!register_operand (reg, GET_MODE (reg))
+ && reg_or_subregno (reg) != reg_or_subregno (addr_reg)
+ && !reg_set_between_p (reg, addr_insn, store_insn))
+ return;
+
+ /* If the address isn't a non-prefixed offsettable instruction, we can't do
+ the optimization. */
+ if (!offsettable_non_prefixed_memory (reg, mem_mode, mem))
+ return;
+
+ /* Allocate a new PC-relative label, and update the load address insn.
+
+ (parallel [(set (reg addr)
+ (unspec [(symbol_ref symbol)
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_ST_ADDR))
+ (use (reg store))]) */
+ rtx label_num = pcrel_opt_next_marker ();
+ rtvec v_addr = gen_rtvec (2, addr_symbol, label_num);
+ rtx addr_unspec = gen_rtx_UNSPEC (Pmode, v_addr,
+ UNSPEC_PCREL_OPT_ST_ADDR);
+ rtx addr_new_set = gen_rtx_SET (addr_reg, addr_unspec);
+ rtx addr_use = gen_rtx_USE (VOIDmode, reg);
+ rtx addr_new_pattern
+ = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, addr_new_set, addr_use));
+
+ validate_change (addr_insn, &PATTERN (addr_insn), addr_new_pattern, true);
+
+ /* Update the store insn. Add an explicit clobber of the external address
+ register just to be sure there are no additional uses of the address
+ register.
+
+ (parallel [(set (mem (addr_reg)
+ (unspec:<MODE> [(reg)
+ (const_int label_num)]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (reg:DI addr_reg))]) */
+ rtvec v_store = gen_rtvec (2, reg, label_num);
+ rtx new_store = gen_rtx_UNSPEC (mem_mode, v_store,
+ UNSPEC_PCREL_OPT_ST_RELOC);
+
+ rtx new_store_set = gen_rtx_SET (mem, new_store);
+ rtx store_clobber = gen_rtx_CLOBBER (VOIDmode, addr_reg);
+ rtx new_store_pattern
+ = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_store_set, store_clobber));
+
+ validate_change (store_insn, &PATTERN (store_insn), new_store_pattern, true);
+
+ /* Note whether changes succeeded or not. */
+ if (apply_change_group ())
+ {
+ /* PCREL_OPT store succeeded. */
+ counters.stores++;
+ if (next_nonnote_insn (addr_insn) == store_insn)
+ counters.adjacent_stores++;
+
+ if (dump_file)
+ fprintf (dump_file,
+ "PCREL_OPT store (addr insn = %d, use insn = %d).\n",
+ INSN_UID (addr_insn),
+ INSN_UID (store_insn));
+ }
+ else
+ {
+ /* PCREL_OPT store failed. */
+ counters.failed_stores++;
+ if (dump_file)
+ fprintf (dump_file,
+ "PCREL_OPT store failed (addr insn = %d, use insn = %d).\n",
+ INSN_UID (addr_insn),
+ INSN_UID (store_insn));
+ }
+
+ return;
+}
+
/* Given an insn with that loads up a base register with the address of an
external symbol, see if we can optimize it with the PCREL_OPT
optimization. */
@@ -363,7 +557,7 @@ pcrel_opt_address (rtx_insn *addr_insn)
if (!chain || chain->next)
return;
- /* Get the insn of the possible load. */
+ /* Get the insn of the possible load or store. */
df_ref use = chain->ref;
if (!use)
return;
@@ -449,7 +643,7 @@ pcrel_opt_address (rtx_insn *addr_insn)
}
/* If this is the last insn in the basic block, and we haven't found the
- load, exit. */
+ load or store, exit. */
if (insn == last_insn_in_bb)
{
do_pcrel_opt = false;
@@ -461,7 +655,7 @@ pcrel_opt_address (rtx_insn *addr_insn)
if (!do_pcrel_opt)
return;
- /* Is this a load? */
+ /* Is this a load or a store? */
switch (get_attr_type (use_insn))
{
/* Don't do the PCREL_OPT load optimization if there was a store
@@ -476,7 +670,22 @@ pcrel_opt_address (rtx_insn *addr_insn)
pcrel_opt_load (addr_insn, use_insn);
break;
- /* If the use is not a load, just skip the optimization. */
+ /* Don't do the PCREL_OPT store optimization if there was a load or store
+ operation. For example, a load might be trying to load the value
+ being stored in between getting the address and doing the store. If
+ we do the PCREL_OPT store optimization, there is the potential for the
+ optimization to replace the load address with a store, which could
+ change the program. */
+ case TYPE_STORE:
+ case TYPE_FPSTORE:
+ case TYPE_VECSTORE:
+ if (store_insns_found || load_insns_found)
+ break;
+
+ pcrel_opt_store (addr_insn, use_insn);
+ break;
+
+ /* If the use is not a load or store, just skip the optimization. */
default:
break;
}
@@ -505,7 +714,7 @@ pcrel_opt_pass (function *fun)
fprintf (dump_file, "\n");
/* Look at each basic block to see if there is a load of an external
- variable's external address, and a single load using that external
+ variable's external address, and a single load/store using that external
address. */
FOR_ALL_BB_FN (bb, fun)
{
@@ -531,6 +740,13 @@ pcrel_opt_pass (function *fun)
fprintf (dump_file, "# of failed PCREL_OPT load(s) = %lu\n",
counters.failed_loads);
+ fprintf (dump_file, "# of PCREL_OPT store(s) = %lu (adjacent %lu)\n",
+ counters.stores, counters.adjacent_stores);
+
+ if (counters.failed_stores)
+ fprintf (dump_file, "# of failed PCREL_OPT store(s) = %lu\n",
+ counters.failed_stores);
+
fprintf (dump_file, "\n");
}
@@ -84,7 +84,9 @@
(define_c_enum "unspec"
[UNSPEC_PCREL_OPT_LD_ADDR
UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG
- UNSPEC_PCREL_OPT_LD_RELOC])
+ UNSPEC_PCREL_OPT_LD_RELOC
+ UNSPEC_PCREL_OPT_ST_ADDR
+ UNSPEC_PCREL_OPT_ST_RELOC])
;; Modes that are supported for PCREL_OPT
(define_mode_iterator PO [QI HI SI DI TI SF DF KF
@@ -254,3 +256,131 @@ (define_insn "*pcrel_opt_ld<mode>"
return "lxv %x0,%1";
}
[(set_attr "type" "vecload")])
+
+
+;; PCREL_OPT optimization for stores. We need to put the label after the PLD
+;; instruction, because the assembler might insert a NOP before the PLD for
+;; alignment.
+;;
+;; If we are optimizing a single write, normally the code would look like:
+;;
+;; (set (reg:DI <ptr>)
+;; (symbol_ref:DI "<extern_addr>")) # <data> must be live here
+;;
+;; ... # insns do not need to be adjacent
+;;
+;; (set (mem:SI (reg:DI <xxx>))
+;; (reg:SI <data>)) # <ptr> dies with this insn
+;;
+;; We optimize this to be:
+;;
+;; (parallel [(set (reg:DI <ptr>)
+;; (unspec:DI [(symbol_ref:DI "<extern_addr>")
+;; (const_int <marker>)]
+;; UNSPEC_PCREL_OPT_ST_ADDR))
+;; (use (reg:<MODE> <data>))])
+;;
+;; ... # insns do not need to be adjacent
+;;
+;; (parallel [(set (mem:<MODE> (reg:DI <ptr>))
+;; (unspec:<MODE> [(reg:<MODE> <data>)
+;; (const_int <marker>)]
+;; UNSPEC_PCREL_OPT_ST_RELOC))
+;; (clobber (reg:DI <ptr>))])
+
+(define_insn "*pcrel_opt_st_addr<mode>"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=b")
+ (unspec:DI [(match_operand:DI 1 "pcrel_external_address")
+ (match_operand 2 "const_int_operand" "n")]
+ UNSPEC_PCREL_OPT_ST_ADDR))
+ (use (match_operand:PO 3 "gpc_reg_operand" "rwa"))]
+ "TARGET_PCREL_OPT"
+ "ld %0,%a1\n.Lpcrel%2:"
+ [(set_attr "prefixed" "yes")
+ (set_attr "type" "load")
+ (set_attr "loads_extern_addr" "yes")])
+
+;; PCREL_OPT stores.
+(define_insn "*pcrel_opt_st<mode>"
+ [(set (match_operand:QHSI 0 "d_form_memory" "=o")
+ (unspec:QHSI [(match_operand:QHSI 1 "gpc_reg_operand" "r")
+ (match_operand 2 "const_int_operand" "n")]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (match_operand:DI 3 "base_reg_operand" "=b"))]
+ "TARGET_PCREL_OPT"
+{
+ output_pcrel_opt_reloc (operands[2]);
+ return "st<wd> %1,%0";
+}
+ [(set_attr "type" "store")])
+
+(define_insn "*pcrel_opt_stdi"
+ [(set (match_operand:DI 0 "d_form_memory" "=o,o,o")
+ (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r,d,v")
+ (match_operand 2 "const_int_operand" "n,n,n")]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
+ "TARGET_PCREL_OPT && TARGET_POWERPC64"
+{
+ output_pcrel_opt_reloc (operands[2]);
+ switch (which_alternative)
+ {
+ case 0: return "std %1,%0";
+ case 1: return "stfd %1,%0";
+ case 2: return "stxsd %1,%0";
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "store,fpstore,fpstore")])
+
+(define_insn "*pcrel_opt_stsf"
+ [(set (match_operand:SF 0 "d_form_memory" "=o,o,o")
+ (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "d,v,r")
+ (match_operand 2 "const_int_operand" "n,n,n")]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
+ "TARGET_PCREL_OPT"
+{
+ output_pcrel_opt_reloc (operands[2]);
+ switch (which_alternative)
+ {
+ case 0: return "stfs %1,%0";
+ case 1: return "stxssp %1,%0";
+ case 2: return "stw %1,%0";
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fpstore,fpstore,store")])
+
+(define_insn "*pcrel_opt_stdf"
+ [(set (match_operand:DF 0 "d_form_memory" "=o,o,o")
+ (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d,v,r")
+ (match_operand 2 "const_int_operand" "n,n,n")]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
+ "TARGET_PCREL_OPT
+ && (TARGET_POWERPC64 || vsx_register_operand (operands[1], DFmode))"
+{
+ output_pcrel_opt_reloc (operands[2]);
+ switch (which_alternative)
+ {
+ case 0: return "stfd %1,%0";
+ case 1: return "stxsd %1,%0";
+ case 2: return "std %1,%0";
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fpstore,fpstore,store")])
+
+(define_insn "*pcrel_opt_st<mode>"
+ [(set (match_operand:PO_VECT 0 "d_form_memory" "=o")
+ (unspec:PO_VECT [(match_operand:PO_VECT 1 "gpc_reg_operand" "wa")
+ (match_operand 2 "const_int_operand" "n")]
+ UNSPEC_PCREL_OPT_ST_RELOC))
+ (clobber (match_operand:DI 3 "base_reg_operand" "=b"))]
+ "TARGET_PCREL_OPT"
+{
+ output_pcrel_opt_reloc (operands[2]);
+ return "stxv %x1,%0";
+}
+ [(set_attr "type" "vecstore")])
@@ -8556,7 +8556,17 @@ rs6000_delegitimize_address (rtx orig_x)
(set (reg:DI <base-reg>)
(unspec:DI [(symbol_ref <symbol>)
(const_int <marker>)]
- UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG)) */
+ UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG))
+
+ UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
+ UNSPEC include the external SYMBOL_REF along with the value being loaded.
+ We return the original SYMBOL_REF.
+
+ (parallel [(set (reg:DI <base-reg>)
+ (unspec:DI [(symbol_ref <symbol>)
+ (const_int <marker>)]
+ UNSPEC_PCREL_OPT_ST_ADDR))
+ (use (reg <store-reg>))]) */
if (GET_CODE (orig_x) == UNSPEC)
switch (XINT (orig_x, 1))
@@ -8564,6 +8574,7 @@ rs6000_delegitimize_address (rtx orig_x)
case UNSPEC_FUSION_GPR:
case UNSPEC_PCREL_OPT_LD_ADDR:
case UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG:
+ case UNSPEC_PCREL_OPT_ST_ADDR:
orig_x = XVECEXP (orig_x, 0, 0);
break;