===================================================================
@@ -919,6 +919,7 @@ See RS/6000 and PowerPC Options.
-mpower8-fusion -mno-mpower8-fusion -mpower8-vector -mno-power8-vector @gol
-mcrypto -mno-crypto -mdirect-move -mno-direct-move @gol
-mquad-memory -mno-quad-memory @gol
+-mquad-memory-atomic -mno-quad-memory-atomic @gol
-mcompat-align-parm -mno-compat-align-parm}
@emph{RX Options}
@@ -18853,7 +18854,8 @@ following options:
-mpopcntb -mpopcntd -mpowerpc64 @gol
-mpowerpc-gpopt -mpowerpc-gfxopt -msingle-float -mdouble-float @gol
-msimple-fpu -mstring -mmulhw -mdlmzb -mmfpgpr -mvsx @gol
--mcrypto -mdirect-move -mpower8-fusion -mpower8-vector -mquad-memory}
+-mcrypto -mdirect-move -mpower8-fusion -mpower8-vector @gol
+-mquad-memory -mquad-memory-atomic}
The particular options set for any particular CPU varies between
compiler versions, depending on what setting seems to produce optimal
@@ -19040,10 +19042,18 @@ the vector instructions.
@itemx -mno-quad-memory
@opindex mquad-memory
@opindex mno-quad-memory
-Generate code that uses (does not use) the quad word memory
+Generate code that uses (does not use) the non-atomic quad word memory
instructions. The @option{-mquad-memory} option requires use of
64-bit mode.
+@item -mquad-memory-atomic
+@itemx -mno-quad-memory-atomic
+@opindex mquad-memory-atomic
+@opindex mno-quad-memory-atomic
+Generate code that uses (does not use) the atomic quad word memory
+instructions. The @option{-mquad-memory-atomic} option requires use of
+64-bit mode.
+
@item -mfloat-gprs=@var{yes/single/double/no}
@itemx -mfloat-gprs
@opindex mfloat-gprs
===================================================================
@@ -270,7 +270,7 @@ (define_predicate "quad_int_reg_operand"
{
HOST_WIDE_INT r;
- if (!TARGET_QUAD_MEMORY)
+ if (!TARGET_QUAD_MEMORY && !TARGET_QUAD_MEMORY_ATOMIC)
return 0;
if (GET_CODE (op) == SUBREG)
@@ -624,6 +624,7 @@ (define_predicate "offsettable_mem_opera
(match_test "offsettable_nonstrict_memref_p (op)")))
;; Return 1 if the operand is suitable for load/store quad memory.
+;; This predicate only checks for non-atomic loads/stores.
(define_predicate "quad_memory_operand"
(match_code "mem")
{
===================================================================
@@ -53,7 +53,8 @@
| OPTION_MASK_CRYPTO \
| OPTION_MASK_DIRECT_MOVE \
| OPTION_MASK_HTM \
- | OPTION_MASK_QUAD_MEMORY)
+ | OPTION_MASK_QUAD_MEMORY \
+ | OPTION_MASK_QUAD_MEMORY_ATOMIC)
#define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
===================================================================
@@ -571,7 +571,11 @@ Use ISA 2.07 transactional memory (HTM)
mquad-memory
Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
-Generate the quad word memory instructions (lq/stq/lqarx/stqcx).
+Generate the quad word memory instructions (lq/stq).
+
+mquad-memory-atomic
+Target Report Mask(QUAD_MEMORY_ATOMIC) Var(rs6000_isa_flags)
+Generate the quad word memory atomic instructions (lqarx/stqcx).
mcompat-align-parm
Target Report Var(rs6000_compat_align_parm) Init(0) Save
===================================================================
@@ -3356,14 +3356,37 @@ rs6000_option_override_internal (bool gl
/* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
silently turn off quad memory mode. */
- if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64)
+ if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
{
if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
warning (0, N_("-mquad-memory requires 64-bit mode"));
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+ warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
+
+ rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
+ | OPTION_MASK_QUAD_MEMORY_ATOMIC);
+ }
+
+ /* Non-atomic quad memory load/store are disabled for little endian, since
+ the words are reversed, but atomic operations can still be done by
+ swapping the words. */
+ if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+ warning (0, N_("-mquad-memory is not available in little endian mode"));
+
rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
}
+ /* Assume if the user asked for normal quad memory instructions, they want
+ the atomic versions as well, unless they explicity told us not to use quad
+ word atomic instructions. */
+ if (TARGET_QUAD_MEMORY
+ && !TARGET_QUAD_MEMORY_ATOMIC
+ && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
+ rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
+
/* Enable power8 fusion if we are tuning for power8, even if we aren't
generating power8 instructions. */
if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
@@ -5939,7 +5962,8 @@ direct_move_p (rtx op0, rtx op1)
return false;
}
-/* Return true if this is a load or store quad operation. */
+/* Return true if this is a load or store quad operation. This function does
+ not handle the atomic quad memory instructions. */
bool
quad_load_store_p (rtx op0, rtx op1)
@@ -30753,6 +30777,7 @@ static struct rs6000_opt_mask const rs60
{ "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
{ "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
{ "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
+ { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
{ "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
{ "string", OPTION_MASK_STRING, false, true },
{ "update", OPTION_MASK_NO_UPDATE, true , true },
===================================================================
@@ -533,8 +533,11 @@ extern int rs6000_vector_align[];
/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
in power7, so conditionalize them on p8 features. TImode syncs need quad
memory support. */
-#define TARGET_SYNC_HI_QI (TARGET_QUAD_MEMORY || TARGET_DIRECT_MOVE)
-#define TARGET_SYNC_TI TARGET_QUAD_MEMORY
+#define TARGET_SYNC_HI_QI (TARGET_QUAD_MEMORY \
+ || TARGET_QUAD_MEMORY_ATOMIC \
+ || TARGET_DIRECT_MOVE)
+
+#define TARGET_SYNC_TI TARGET_QUAD_MEMORY_ATOMIC
/* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
to allocate the SDmode stack slot to get the value into the proper location
===================================================================
@@ -204,25 +204,66 @@ (define_insn "load_locked<QHI:mode>_si"
"<QHI:larx> %0,%y1"
[(set_attr "type" "load_l")])
+;; Swap insns to swap words for little endian quad memory systems
+(define_mode_attr qswap_pred1 [(TI "int_reg_operand")
+ (PTI "quad_int_reg_operand")])
+
+(define_mode_attr qswap_pred2 [(TI "quad_int_reg_operand")
+ (PTI "int_reg_operand")])
+
+(define_mode_attr qswap_other [(TI "PTI")
+ (PTI "TI")])
+
+(define_insn_and_split "qswap_<mode>"
+ [(set (match_operand:TI2 0 "<qswap_pred1>" "=&r")
+ (unspec:TI2 [(match_operand:<qswap_other> 1 "<qswap_pred2>" "r")]
+ UNSPEC_TI_PTI_SWAP))]
+ "TARGET_SYNC_TI && !WORDS_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))]
+{
+ gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]));
+
+ operands[2] = gen_lowpart (DImode, operands[0]);
+ operands[3] = gen_lowpart (DImode, operands[1]);
+
+ operands[4] = gen_highpart (DImode, operands[0]);
+ operands[5] = gen_highpart (DImode, operands[1]);
+})
+
;; Use PTImode to get even/odd register pairs
+;; Use a temporary register to force getting an even register for the
+;; lqarx/stqcrx. instructions. Normal optimizations will eliminate this extra
+;; copy on big endian systems.
+
(define_expand "load_lockedti"
[(use (match_operand:TI 0 "quad_int_reg_operand" ""))
(use (match_operand:TI 1 "memory_operand" ""))]
"TARGET_SYNC_TI"
{
- /* Use a temporary register to force getting an even register for the
- lqarx/stqcrx. instructions. Normal optimizations will eliminate this
- extra copy. */
rtx pti = gen_reg_rtx (PTImode);
+
+ if (!indexed_or_indirect_operand (operands[1], TImode))
+ {
+ rtx old_addr = XEXP (operands[1], 0);
+ rtx new_addr = force_reg (Pmode, old_addr);
+ operands[1] = change_address (operands[1], TImode, new_addr);
+ }
+
emit_insn (gen_load_lockedpti (pti, operands[1]));
- emit_move_insn (operands[0], gen_lowpart (TImode, pti));
+ if (WORDS_BIG_ENDIAN)
+ emit_move_insn (operands[0], gen_lowpart (TImode, pti));
+ else
+ emit_insn (gen_qswap_ti (operands[0], pti));
DONE;
})
(define_insn "load_lockedpti"
[(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
(unspec_volatile:PTI
- [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))]
+ [(match_operand:TI 1 "indexed_or_indirect_operand" "Z")] UNSPECV_LL))]
"TARGET_SYNC_TI
&& !reg_mentioned_p (operands[0], operands[1])
&& quad_int_reg_operand (operands[0], PTImode)"
@@ -238,6 +279,10 @@ (define_insn "store_conditional<mode>"
"<stcx> %2,%y1"
[(set_attr "type" "store_c")])
+;; Use a temporary register to force getting an even register for the
+;; lqarx/stqcrx. instructions. Normal optimizations will eliminate this extra
+;; copy on big endian systems.
+
(define_expand "store_conditionalti"
[(use (match_operand:CC 0 "cc_reg_operand" ""))
(use (match_operand:TI 1 "memory_operand" ""))
@@ -247,13 +292,25 @@ (define_expand "store_conditionalti"
rtx op0 = operands[0];
rtx op1 = operands[1];
rtx op2 = operands[2];
- rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0));
- rtx pti_op2 = gen_reg_rtx (PTImode);
+ rtx addr = XEXP (op1, 0);
+ rtx pti_op1;
+ rtx pti_op2;
+
+ if (!indexed_or_indirect_operand (op1, TImode))
+ {
+ rtx new_addr = force_reg (Pmode, addr);
+ operands[1] = op1 = change_address (op1, TImode, new_addr);
+ addr = new_addr;
+ }
+
+ pti_op1 = change_address (op1, PTImode, addr);
+ pti_op2 = gen_reg_rtx (PTImode);
+
+ if (WORDS_BIG_ENDIAN)
+ emit_move_insn (pti_op2, gen_lowpart (PTImode, op2));
+ else
+ emit_insn (gen_qswap_pti (pti_op2, op2));
- /* Use a temporary register to force getting an even register for the
- lqarx/stqcrx. instructions. Normal optimizations will eliminate this
- extra copy. */
- emit_move_insn (pti_op2, gen_lowpart (PTImode, op2));
emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2));
DONE;
})
@@ -261,7 +318,7 @@ (define_expand "store_conditionalti"
(define_insn "store_conditionalpti"
[(set (match_operand:CC 0 "cc_reg_operand" "=x")
(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
- (set (match_operand:PTI 1 "memory_operand" "=Z")
+ (set (match_operand:PTI 1 "indexed_or_indirect_operand" "=Z")
(match_operand:PTI 2 "quad_int_reg_operand" "r"))]
"TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
"stqcx. %2,%y1"
===================================================================
@@ -125,6 +125,7 @@ (define_c_enum "unspec"
UNSPEC_P8V_MTVSRD
UNSPEC_P8V_XXPERMDI
UNSPEC_P8V_RELOAD_FROM_VSX
+ UNSPEC_TI_PTI_SWAP
])
;;
===================================================================
@@ -339,6 +339,10 @@ rs6000_target_modify_macros (bool define
rs6000_define_or_undefine_macro (define_p, "__HTM__");
if ((flags & OPTION_MASK_P8_VECTOR) != 0)
rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
+ if ((flags & OPTION_MASK_QUAD_MEMORY) != 0)
+ rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY__");
+ if ((flags & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+ rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY_ATOMIC__");
if ((flags & OPTION_MASK_CRYPTO) != 0)
rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");