@@ -18,7 +18,6 @@
;; <http://www.gnu.org/licenses/>.
;;; Unused letters:
-;;; B
;;; a jkl uv xyz
@@ -201,6 +200,17 @@
"A memory with only a base register"
(match_operand 0 "mem_noofs_operand"))
+;; We need a special memory constraint for the misaligned memory access
+;; This is only for TARGET_MISALIGN target
+;; However, due to a bug in the current special_memory_constraint handling
+;; in lra-constraints.c, we have to define this special_memory_constraint
+;; as a regular constraint as a workaround.
+(define_constraint "B"
+ "Memory reference whose address is misaligned"
+ (and (match_code "mem")
+ (match_test "TARGET_MISALIGN")
+ (match_test "memory_is_misaligned (op, mode)")))
+
(define_constraint "Y"
"The vector zero constant"
(and (match_code "const_vector")
@@ -125,10 +125,10 @@
(eq_attr "subtype" "regular"))))
"m8_slot0, nothing*2")
-;; (define_insn_reservation "m8_load_misalign" 11
-;; (and (eq_attr "cpu" "m8")
-;; (eq_attr "type" "load_mis,fpload_mis"))
-;; "m8_slot0, nothing*10")
+(define_insn_reservation "m8_load_misalign" 11
+ (and (eq_attr "cpu" "m8")
+ (eq_attr "type" "load_mis,fpload_mis"))
+ "m8_slot0, nothing*10")
(define_insn_reservation "m8_prefetch" 1
(and (eq_attr "cpu" "m8")
@@ -147,10 +147,10 @@
(eq_attr "type" "store,fpstore"))
"m8_slot1")
-;; (define_insn_reservation "m8_store_misalign" 3
-;; (and (eq_attr "cpu" "m8")
-;; (eq_attr "type" "store_mis,fpstore_mis"))
-;; "m8_slot1, nothing*2")
+(define_insn_reservation "m8_store_misalign" 3
+ (and (eq_attr "cpu" "m8")
+ (eq_attr "type" "store_mis,fpstore_mis"))
+ "m8_slot1, nothing*2")
;; Control-transfer instructions execute in the Branch Unit in the
;; slot1.
@@ -100,6 +100,7 @@ extern int mem_min_alignment (rtx, int);
extern int pic_address_needs_scratch (rtx);
extern int register_ok_for_ldd (rtx);
extern int memory_ok_for_ldd (rtx);
+extern int memory_is_misaligned (rtx, machine_mode);
extern int v9_regcmp_p (enum rtx_code);
/* Function used for V8+ code generation. Returns 1 if the high
32 bits of REG are 0 before INSN. */
@@ -1342,6 +1342,8 @@ dump_target_flag_bits (const int flags)
fprintf (stderr, "V8 ");
if (flags & MASK_V9)
fprintf (stderr, "V9 ");
+ if (flags & MASK_MISALIGN)
+ fprintf (stderr, "MISALIGN ");
}
static void
@@ -1449,10 +1451,14 @@ sparc_option_override (void)
MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
/* UltraSPARC M8 */
{ "m8", MASK_ISA,
- MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
+ MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC
+ |MASK_VIS4B|MASK_MISALIGN }
};
const struct cpu_table *cpu;
unsigned int i;
+ unsigned int target_flags_explicit_init;
+ unsigned int target_flags_explicit_enable;
+ unsigned int target_flags_explicit_disable;
if (sparc_debug_string != NULL)
{
@@ -1493,10 +1499,16 @@ sparc_option_override (void)
if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
target_flags |= MASK_FSMULD;
+ target_flags_explicit_init = target_flags & target_flags_explicit;
+ target_flags_explicit_enable = target_flags_explicit_init & target_flags_explicit;
+ target_flags_explicit_disable = ~target_flags_explicit_init & target_flags_explicit;
if (TARGET_DEBUG_OPTIONS)
{
dump_target_flags("Initial target_flags", target_flags);
dump_target_flags("target_flags_explicit", target_flags_explicit);
+ dump_target_flags("target_flags_explicit_init", target_flags_explicit_init);
+ dump_target_flags("target_flags_explicit_enable", target_flags_explicit_enable);
+ dump_target_flags("target_flags_explicit_disable", target_flags_explicit_disable);
}
#ifdef SUBTARGET_OVERRIDE_OPTIONS
@@ -1572,8 +1584,8 @@ sparc_option_override (void)
dump_target_flags ("cpu->enable", cpu->enable);
}
- target_flags &= ~cpu->disable;
- target_flags |= (cpu->enable
+ target_flags &= ~(cpu->disable & ~target_flags_explicit_enable);
+ target_flags |= (cpu->enable & ~target_flags_explicit_disable
#ifndef HAVE_AS_FMAF_HPC_VIS3
& ~(MASK_FMAF | MASK_VIS3)
#endif
@@ -1584,7 +1596,7 @@ sparc_option_override (void)
& ~(MASK_VIS4 | MASK_SUBXC)
#endif
#ifndef HAVE_AS_SPARC6
- & ~(MASK_VIS4B)
+ & ~(MASK_VIS4B | MASK_MISALIGN)
#endif
#ifndef HAVE_AS_LEON
& ~(MASK_LEON | MASK_LEON3)
@@ -4099,10 +4111,30 @@ legitimate_pic_operand_p (rtx x)
return true;
}
-#define RTX_OK_FOR_OFFSET_P(X, MODE) \
- (CONST_INT_P (X) \
- && INTVAL (X) >= -0x1000 \
- && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
+/* for misaligned ld/st provided by M8, the IMM field is 10-bit wide
+ other than the 13-bit for regular ld/st.
+ The best solution for this problem is to distinguish each ld/st
+ whether it's aligned or misaligned. However, due to the current
+ design of the common routine TARGET_LEGITIMATE_ADDRESS_P, only
+ the ADDR of a ld/st is passed to the routine, the align info
+ carried by the corresponding MEM is NOT passed in. without changing
+ the prototype of TARGET_LEGITIMATE_ADDRESS_P, we cannot use this
+ best solution.
+ as a workaround, we have to conservatively treat ALL IMM field of
+ a ld/st insn on a MISALIGNED target is 10-bit wide.
+ the side-effect of this workaround is: there will be additional
+ REG<-IMM insn generated for regular ld/st when -mmisalign is ON.
+ However, such additional reload insns should be very easily to be
+ removed by a set of optimization whenever -O specified.
+*/
+#define RTX_OK_FOR_OFFSET_P(X, MODE) \
+ (CONST_INT_P (X) \
+ && ((!TARGET_MISALIGN \
+ && INTVAL (X) >= -0x1000 \
+ && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))\
+ || (TARGET_MISALIGN \
+ && INTVAL (X) >= -0x0400 \
+ && INTVAL (X) <= (0x0400 - GET_MODE_SIZE (MODE)))))
#define RTX_OK_FOR_OLO10_P(X, MODE) \
(CONST_INT_P (X) \
@@ -4179,10 +4211,12 @@ sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
&& (mode == DFmode || mode == DImode))
return 0;
}
+ /* We prohibit LO_SUM + IMM on TARGET_MISALIGN since it is not supported */
else if (USE_AS_OFFSETABLE_LO10
&& GET_CODE (rs1) == LO_SUM
&& TARGET_ARCH64
&& ! TARGET_CM_MEDMID
+ && ! TARGET_MISALIGN
&& RTX_OK_FOR_OLO10_P (rs2, mode))
{
rs2 = NULL;
@@ -8910,6 +8944,16 @@ memory_ok_for_ldd (rtx op)
return 1;
}
+
+/* Return 1 if OP, a MEM, has an address which is know to be
+ misaligned */
+
+int
+memory_is_misaligned (rtx op, machine_mode mode)
+{
+ return (MEM_ALIGN (op) < GET_MODE_BITSIZE (mode));
+}
+
/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
@@ -595,8 +595,9 @@ extern enum cmodel sparc_cmodel;
#define LOCAL_ALIGNMENT(TYPE, ALIGN) DATA_ALIGNMENT (TYPE, ALIGN)
/* Set this nonzero if move instructions will actually fail to work
- when given unaligned data. */
-#define STRICT_ALIGNMENT 1
+ when given unaligned data.
+ when TARGET_MISALIGN, this should be zero */
+#define STRICT_ALIGNMENT !(TARGET_MISALIGN)
/* Things that must be doubleword aligned cannot go in the text section,
because the linker fails to align the text section enough!
@@ -258,7 +258,7 @@
(symbol_ref "TARGET_SPARCLET") (const_string "sparclet")]
(const_string "v7"))))
-(define_attr "cpu_feature" "none,fpu,fpunotv9,v9,vis,vis3,vis4,vis4b"
+(define_attr "cpu_feature" "none,fpu,fpunotv9,v9,vis,vis3,vis4,vis4b,misalign"
(const_string "none"))
(define_attr "lra" "disabled,enabled"
@@ -273,7 +273,8 @@
(eq_attr "cpu_feature" "vis") (symbol_ref "TARGET_VIS")
(eq_attr "cpu_feature" "vis3") (symbol_ref "TARGET_VIS3")
(eq_attr "cpu_feature" "vis4") (symbol_ref "TARGET_VIS4")
- (eq_attr "cpu_feature" "vis4b") (symbol_ref "TARGET_VIS4B")]
+ (eq_attr "cpu_feature" "vis4b") (symbol_ref "TARGET_VIS4B")
+ (eq_attr "cpu_feature" "misalign") (symbol_ref "TARGET_MISALIGN")]
(const_int 0)))
;; The SPARC instructions used by the backend are organized into a
@@ -302,8 +303,12 @@
;; load/prefetch: PREFETCH
;; fpload: LDF LDDF LDQF
;; sload: LD{SB,SH,SW}
+;; load_mis: LDM{SH,UH,SW,UW,X}[A]
+;; fpload_mis: LDMF{S,D}[A]
;; store: ST{B,H,W,X} STFSR
;; fpstore: STF STDF STQF
+;; store_mis: STM{H,W,X}[A]
+;; fpstore_mis: STMF{S,D}[A]
;; cbcond: CWB{NE,E,G,LE,GE,L,GU,LEU,CC,CS,POS,NEG,VC,VS}
;; CXB{NE,E,G,LE,GE,L,GU,LEU,CC,CS,POS,NEG,VC,VS}
;; uncond_branch: BA BPA JMPL
@@ -361,10 +366,12 @@
(define_attr "type"
"ialu,compare,shift,
load,sload,store,
+ load_mis,store_mis,
uncond_branch,branch,call,sibcall,call_no_delay_slot,return,
cbcond,uncond_cbcond,
imul,idiv,
fpload,fpstore,
+ fpload_mis,fpstore_mis,
fp,fpmove,
fpcmove,fpcrmove,
fpcmp,
@@ -1621,18 +1628,21 @@
})
(define_insn "*movhi_insn"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
- (match_operand:HI 1 "input_operand" "rI,K,m,rJ"))]
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r, B,m")
+ (match_operand:HI 1 "input_operand" "rI,K,B,m,rJ,rJ"))]
"(register_operand (operands[0], HImode)
|| register_or_zero_operand (operands[1], HImode))"
"@
mov\t%1, %0
sethi\t%%hi(%a1), %0
+ ldmuh\t%1, %0
lduh\t%1, %0
+ stmh\t%r1, %0
sth\t%r1, %0"
- [(set_attr "type" "*,*,load,store")
- (set_attr "subtype" "*,*,regular,*")
- (set_attr "us3load_type" "*,*,3cycle,*")])
+ [(set_attr "type" "*,*,load_mis,load,store_mis,store")
+ (set_attr "subtype" "*,*,*,regular,*,*")
+ (set_attr "cpu_feature" "*,*,misalign,*,misalign,*")
+ (set_attr "us3load_type" "*,*,*,3cycle,*,*")])
;; We always work with constants here.
(define_insn "*movhi_lo_sum"
@@ -1652,25 +1662,31 @@
})
(define_insn "*movsi_insn"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m, r,*f,*f,*f, m,d,d")
- (match_operand:SI 1 "input_operand" "rI,K,m,rJ,*f, r, f, m,*f,J,P"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r, B, m, r,*f,*f,*f,*f, B, m,d,d")
+ (match_operand:SI 1 "input_operand" "rI,K,B,m,rJ,rJ,*f, r, f, B, m,*f,*f,J,P"))]
+
"register_operand (operands[0], SImode)
|| register_or_zero_or_all_ones_operand (operands[1], SImode)"
"@
mov\t%1, %0
sethi\t%%hi(%a1), %0
+ ldmsw\t%1, %0
ld\t%1, %0
+ stmw\t%r1, %0
st\t%r1, %0
movstouw\t%1, %0
movwtos\t%1, %0
fmovs\t%1, %0
+ ldmfs\t%1, %0
ld\t%1, %0
+ stmfs\t%1, %0
st\t%1, %0
fzeros\t%0
fones\t%0"
- [(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
- (set_attr "subtype" "*,*,regular,*,movstouw,single,*,*,*,single,single")
- (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")])
+ [(set_attr "type" "*,*,load_mis,load,store_mis,store,vismv,vismv,fpmove,fpload_mis,fpload,fpstore_mis,fpstore,visl,visl")
+ (set_attr "subtype" "*,*,*,regular,*,*,*,*,*,*,*,*,*,*,*")
+ (set_attr "cpu_feature" "*,*,misalign,*,misalign,*,vis3,vis3,*,misalign,*,misalign,*,vis,vis")])
+
(define_insn "*movsi_lo_sum"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -1810,13 +1826,15 @@
(define_insn "*movdi_insn_sp32"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=T,o,U,T,r,o,r,r,?*f,?T,?*f,?o,?*e,?*e, r,?*f,?*e,?T,*b,*b")
+ "=B,T,o,U,T,r,o,r,r,?*f,?T,?*f,?o,?*e,?*e, r,?*f,?*e,?T,*b,*b")
(match_operand:DI 1 "input_operand"
- " J,J,T,U,o,r,i,r, T,*f, o,*f, *e, *e,?*f, r, T,*e, J, P"))]
+ " J,J,J,T,U,o,r,i,r, T,*f, o,*f, *e, *e,?*f, r, T,*e, J, P"))]
+
"TARGET_ARCH32
&& (register_operand (operands[0], DImode)
|| register_or_zero_operand (operands[1], DImode))"
"@
+ stmx\t%r1, %0
stx\t%r1, %0
#
ldd\t%1, %0
@@ -1837,24 +1855,26 @@
std\t%1, %0
fzero\t%0
fone\t%0"
- [(set_attr "type" "store,*,load,store,load,store,*,*,fpload,fpstore,*,*,fpmove,*,*,*,fpload,fpstore,visl,
-visl")
- (set_attr "subtype" "*,*,regular,*,regular,*,*,*,*,*,*,*,*,*,*,*,*,*,double,double")
- (set_attr "length" "*,2,*,*,*,*,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
- (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
- (set_attr "cpu_feature" "v9,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")
- (set_attr "lra" "*,*,disabled,disabled,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
+ [(set_attr "type" "store_mis,store,*,load,store,load,store,*,*,fpload,fpstore,*,*,fpmove,*,*,*,fpload,fpstore,visl,visl")
+ (set_attr "subtype" "*,*,*,*,regular,*,*,*,regular,*,*,*,*,*,*,*,*,*,*,*,*")
+ (set_attr "length" "*,*,2,*,*,*,*,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
+ (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
+ (set_attr "cpu_feature" "misalign,*,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")
+ (set_attr "lra" "*,*,*,disabled,disabled,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
+
(define_insn "*movdi_insn_sp64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m, r,*e,?*e,?*e,?W,b,b")
- (match_operand:DI 1 "input_operand" "rI,N,m,rJ,*e, r, *e, W,*e,J,P"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r, B, m, r,*e,?*e,?*e,?W,b,b")
+ (match_operand:DI 1 "input_operand" "rI,N,B,m,rJ,rJ,*e, r, *e, W,*e,J,P"))]
"TARGET_ARCH64
&& (register_operand (operands[0], DImode)
|| register_or_zero_or_all_ones_operand (operands[1], DImode))"
"@
mov\t%1, %0
sethi\t%%hi(%a1), %0
+ ldmx\t%1, %0
ldx\t%1, %0
+ stmx\t%r1, %0
stx\t%r1, %0
movdtox\t%1, %0
movxtod\t%1, %0
@@ -1863,10 +1883,11 @@ visl")
std\t%1, %0
fzero\t%0
fone\t%0"
- [(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
- (set_attr "subtype" "*,*,regular,*,movdtox,movxtod,*,*,*,double,double")
- (set_attr "fptype" "*,*,*,*,*,*,double,*,*,double,double")
- (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")])
+ [(set_attr "type" "*,*,load_mis,load,store_mis,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
+ (set_attr "subtype" "*,*,*,regular,*,*,movdtox,movxtod,*,*,*,*,*")
+ (set_attr "fptype" "*,*,*,*,*,*,*,*,double,*,*,double,double")
+ (set_attr "cpu_feature" "*,*,misalign,*,misalign,*,vis3,vis3,*,*,*,vis,vis")])
+
(define_expand "movdi_pic_label_ref"
[(set (match_dup 3) (high:DI
@@ -2170,6 +2191,7 @@ visl")
"reload_completed
&& (!TARGET_V9
|| (TARGET_ARCH32
+ && !TARGET_MISALIGN
&& !mem_min_alignment (operands[0], 8)))
&& offsettable_memref_p (operands[0])"
[(clobber (const_int 0))]
@@ -2350,8 +2372,9 @@ visl")
})
(define_insn "*movsf_insn"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,f, *r,*r,*r,*r, f,f,*r,m, m")
- (match_operand:SF 1 "input_operand" "G,C,f,*rR, Q, S, f,*r,m, m,f,*rG"))]
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,f, *r,*r,*r,*r, f,f,f,*r,*r,B,m, B, m")
+ (match_operand:SF 1 "input_operand" "G,C,f,*rR, Q, S, f,*r,B,m, B, m,f,f,*rG,*rG"))]
+
"(register_operand (operands[0], SFmode)
|| register_or_zero_or_all_ones_operand (operands[1], SFmode))"
{
@@ -2385,18 +2408,29 @@ visl")
case 7:
return "movwtos\t%1, %0";
case 8:
+ return "ldmfs\t%1, %0";
case 9:
return "ld\t%1, %0";
case 10:
+ return "ldmsw\t%1, %0";
case 11:
+ return "ld\t%1, %0";
+ case 12:
+ return "stmfs\t%r1, %0";
+ case 13:
return "st\t%r1, %0";
+ case 14:
+ return "stm\t%r1, %0";
+ case 15:
+ return "st\t%r1, %0";
default:
gcc_unreachable ();
}
}
- [(set_attr "type" "visl,visl,fpmove,*,*,*,vismv,vismv,fpload,load,fpstore,store")
- (set_attr "subtype" "single,single,*,*,*,*,movstouw,single,*,regular,*,*")
- (set_attr "cpu_feature" "vis,vis,fpu,*,*,*,vis3,vis3,fpu,*,fpu,*")])
+ [(set_attr "type" "visl,visl,fpmove,*,*,*,vismv,vismv,fpload_mis,fpload,load_mis,load,fpstore_mis,fpstore,store_mis,store")
+ (set_attr "subtype" "single,single,*,*,*,*,movstouw,single,*,*,*,regular,*,*,*,*")
+ (set_attr "cpu_feature" "vis,vis,fpu,*,*,*,vis3,vis3,misalign,fpu,misalign,*,misalign,fpu,misalign,*")])
+
;; The following 3 patterns build SFmode constants in integer registers.
@@ -2443,13 +2477,14 @@ visl")
(define_insn "*movdf_insn_sp32"
[(set (match_operand:DF 0 "nonimmediate_operand"
- "=T,o,b,b,e,e,*r, f, e,T,U,T, f,o, *r,*r, o")
+ "=B,T,o,b,b,e,e,*r, f, e,T,U,T, f,o, *r,*r, o")
(match_operand:DF 1 "input_operand"
- " G,G,G,C,e,e, f,*r,T#F,e,T,U,o#F,f,*rF, o,*r"))]
+ " G,G,G,G,C,e,e, f,*r,T#F,e,T,U,o#F,f,*rF, o,*r"))]
"TARGET_ARCH32
&& (register_operand (operands[0], DFmode)
|| register_or_zero_or_all_ones_operand (operands[1], DFmode))"
"@
+ stmx\t%r1, %0
stx\t%r1, %0
#
fzero\t%0
@@ -2467,16 +2502,17 @@ visl")
#
ldd\t%1, %0
std\t%1, %0"
- [(set_attr "type" "store,*,visl,visl,fpmove,*,*,*,fpload,fpstore,load,store,*,*,*,load,store")
- (set_attr "subtype" "*,*,double,double,*,*,*,*,*,*,regular,*,*,*,*,regular,*")
- (set_attr "length" "*,2,*,*,*,2,2,2,*,*,*,*,2,2,2,*,*")
- (set_attr "fptype" "*,*,double,double,double,*,*,*,*,*,*,*,*,*,*,*,*")
- (set_attr "cpu_feature" "v9,*,vis,vis,v9,fpunotv9,vis3,vis3,fpu,fpu,*,*,fpu,fpu,*,*,*")
- (set_attr "lra" "*,*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
+ [(set_attr "type" "store_mis,store,*,visl,visl,fpmove,*,*,*,fpload,fpstore,load,store,*,*,*,load,store")
+ (set_attr "subtype" "*,*,*,double,double,*,*,*,*,*,*,regular,*,*,*,*,regular,*")
+ (set_attr "length" "*,*,2,*,*,*,2,2,2,*,*,*,*,2,2,2,*,*")
+ (set_attr "fptype" "*,*,*,double,double,double,*,*,*,*,*,*,*,*,*,*,*,*")
+ (set_attr "cpu_feature" "misalign,v9,*,vis,vis,v9,fpunotv9,vis3,vis3,fpu,fpu,*,*,fpu,fpu,*,*,*")
+ (set_attr "lra" "*,*,*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
+
(define_insn "*movdf_insn_sp64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e, e,W, *r,*r, m,*r")
- (match_operand:DF 1 "input_operand" "G,C,e, e,*r,W#F,e,*rG, m,*rG, F"))]
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e,e, e,B,W, *r,*r, m,*r")
+ (match_operand:DF 1 "input_operand" " G,C,e, e,*r,B,W#F,e,e,*rG, m,*rG, F"))]
"TARGET_ARCH64
&& (register_operand (operands[0], DFmode)
|| register_or_zero_or_all_ones_operand (operands[1], DFmode))"
@@ -2486,17 +2522,19 @@ visl")
fmovd\t%1, %0
movdtox\t%1, %0
movxtod\t%1, %0
+ ldmfd\t%1, %0
ldd\t%1, %0
+ stmfd\t%1, %0
std\t%1, %0
mov\t%r1, %0
ldx\t%1, %0
stx\t%r1, %0
#"
- [(set_attr "type" "visl,visl,fpmove,vismv,vismv,load,store,*,load,store,*")
- (set_attr "subtype" "double,double,*,movdtox,movxtod,regular,*,*,regular,*,*")
- (set_attr "length" "*,*,*,*,*,*,*,*,*,*,2")
- (set_attr "fptype" "double,double,double,double,double,*,*,*,*,*,*")
- (set_attr "cpu_feature" "vis,vis,fpu,vis3,vis3,fpu,fpu,*,*,*,*")])
+ [(set_attr "type" "visl,visl,fpmove,vismv,vismv,fpload_mis,fpload,fpstore_mis,fpstore,*,load,store,*")
+ (set_attr "subtype" "double,double,*,movdtox,movxtod,*,regular,*,*,*,regular,*,*")
+ (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,2")
+ (set_attr "fptype" "double,double,double,double,double,*,*,*,*,*,*,*,*")
+ (set_attr "cpu_feature" "vis,vis,fpu,vis3,vis3,misalign,fpu,misalign,fpu,*,*,*,*")])
;; This pattern builds DFmode constants in integer registers.
(define_split
@@ -2603,6 +2641,7 @@ visl")
"reload_completed
&& (!TARGET_V9
|| (TARGET_ARCH32
+ && !TARGET_MISALIGN
&& !mem_min_alignment (operands[0], 8)))
&& offsettable_memref_p (operands[0])"
[(clobber (const_int 0))]
@@ -8634,8 +8673,8 @@ visl")
})
(define_insn "*mov<VM32:mode>_insn"
- [(set (match_operand:VM32 0 "nonimmediate_operand" "=f,f,f,f,m,m,*r, m,*r,*r, f")
- (match_operand:VM32 1 "input_operand" "Y,Z,f,m,f,Y, m,*r,*r, f,*r"))]
+ [(set (match_operand:VM32 0 "nonimmediate_operand" "=f,f,f,f,f,B,m,B,m,*r,*r, B, m,*r,*r, f")
+ (match_operand:VM32 1 "input_operand" "Y,Z,f,B,m,f,f,Y,Y, B, m,*r,*r,*r, f,*r"))]
"TARGET_VIS
&& (register_operand (operands[0], <VM32:MODE>mode)
|| register_or_zero_or_all_ones_operand (operands[1], <VM32:MODE>mode))"
@@ -8643,21 +8682,27 @@ visl")
fzeros\t%0
fones\t%0
fsrc2s\t%1, %0
+ ldmfs\t%1, %0
ld\t%1, %0
+ stmfs\t%1, %0
st\t%1, %0
+ stmw\t%r1, %0
st\t%r1, %0
+ ldmsw\t%1, %0
ld\t%1, %0
+ stmw\t%1, %0
st\t%1, %0
mov\t%1, %0
movstouw\t%1, %0
movwtos\t%1, %0"
- [(set_attr "type" "visl,visl,vismv,fpload,fpstore,store,load,store,*,vismv,vismv")
- (set_attr "subtype" "single,single,single,*,*,*,regular,*,*,movstouw,single")
- (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")])
+ [(set_attr "type" "visl,visl,vismv,fpload_mis,fpload,fpstore_mis,fpstore,store_mis,store,load_mis,load,store_mis,store,*,vismv,vismv")
+ (set_attr "subtype" "single,single,single,*,*,*,*,*,*,*,regular,*,*,*,movstouw,single")
+ (set_attr "cpu_feature" "vis,vis,vis,misalign,*,misalign,*,misalign,*,misalign,*,misalign,*,*,vis3,vis3")])
+
(define_insn "*mov<VM64:mode>_insn_sp64"
- [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,W,m,*r, m,*r, e,*r")
- (match_operand:VM64 1 "input_operand" "Y,Z,e,W,e,Y, m,*r, e,*r,*r"))]
+ [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,e,B,W,B,m,*r,*r, B, m,*r, e,*r")
+ (match_operand:VM64 1 "input_operand" "Y,Z,e,B,W,e,e,Y,Y, B, m,*r,*r, e,*r,*r"))]
"TARGET_VIS
&& TARGET_ARCH64
&& (register_operand (operands[0], <VM64:MODE>mode)
@@ -8666,28 +8711,36 @@ visl")
fzero\t%0
fone\t%0
fsrc2\t%1, %0
+ ldmfd\t%1, %0
ldd\t%1, %0
+ stmfd\t%1, %0
std\t%1, %0
+ stmx\t%r1, %0
stx\t%r1, %0
+ ldmx\t%1, %0
ldx\t%1, %0
+ stmx\t%1, %0
stx\t%1, %0
movdtox\t%1, %0
movxtod\t%1, %0
mov\t%1, %0"
- [(set_attr "type" "visl,visl,vismv,fpload,fpstore,store,load,store,vismv,vismv,*")
- (set_attr "subtype" "double,double,double,*,*,*,regular,*,movdtox,movxtod,*")
- (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,vis3,vis3,*")])
+ [(set_attr "type" "visl,visl,vismv,fpload_mis,fpload,fpstore_mis,fpstore,store_mis,store,load_mis,load,store_mis,store,vismv,vismv,*")
+ (set_attr "subtype" "double,double,double,*,*,*,*,*,*,*,regular,*,*,movdtox,movxtod,*")
+ (set_attr "cpu_feature" "vis,vis,vis,misalign,*,misalign,*,misalign,*,misalign,*,misalign,*,vis3,vis3,*")])
+
+
(define_insn "*mov<VM64:mode>_insn_sp32"
[(set (match_operand:VM64 0 "nonimmediate_operand"
- "=T,o,e,e,e,*r, f,e,T,U,T,f,o,*r,*r, o")
+ "=B,T,o,e,e,e,*r, f,e,T,U,T,f,o,*r,*r, o")
(match_operand:VM64 1 "input_operand"
- " Y,Y,Y,Z,e, f,*r,T,e,T,U,o,f,*r, o,*r"))]
+ " Y,Y,Y,Y,Z,e, f,*r,T,e,T,U,o,f,*r, o,*r"))]
"TARGET_VIS
&& TARGET_ARCH32
&& (register_operand (operands[0], <VM64:MODE>mode)
|| register_or_zero_or_all_ones_operand (operands[1], <VM64:MODE>mode))"
"@
+ stmx\t%r1, %0
stx\t%r1, %0
#
fzero\t%0
@@ -8704,11 +8757,12 @@ visl")
#
ldd\t%1, %0
std\t%1, %0"
- [(set_attr "type" "store,*,visl,visl,vismv,*,*,fpload,fpstore,load,store,*,*,*,load,store")
- (set_attr "subtype" "*,*,double,double,double,*,*,*,*,regular,*,*,*,*,regular,*")
- (set_attr "length" "*,2,*,*,*,2,2,*,*,*,*,2,2,2,*,*")
- (set_attr "cpu_feature" "*,*,vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*,*,*")
- (set_attr "lra" "*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
+ [(set_attr "type" "store_mis,store,*,visl,visl,vismv,*,*,fpload,fpstore,load,store,*,*,*,load,store")
+ (set_attr "subtype" "*,*,*,double,double,double,*,*,*,*,regular,*,*,*,*,regular,*")
+ (set_attr "length" "*,*,2,*,*,*,2,2,*,*,*,*,2,2,2,*,*")
+ (set_attr "cpu_feature" "misalign,*,*,vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*,*,*")
+ (set_attr "lra" "*,*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
+
(define_split
[(set (match_operand:VM64 0 "register_operand" "")
@@ -8755,6 +8809,7 @@ visl")
"reload_completed
&& TARGET_VIS
&& TARGET_ARCH32
+ && !TARGET_MISALIGN
&& !mem_min_alignment (operands[0], 8)
&& offsettable_memref_p (operands[0])"
[(clobber (const_int 0))]
@@ -85,6 +85,10 @@ mvis4b
Target Report Mask(VIS4B)
Use additional VIS instructions introduced in OSA2017.
+mmisalign
+Target Report Mask(MISALIGN)
+Use OSA2017 misaligned load and store instructions.
+
mcbcond
Target Report Mask(CBCOND)
Use UltraSPARC Compare-and-Branch extensions.
@@ -1123,6 +1123,7 @@ See RS/6000 and PowerPC Options.
-mv8plus -mno-v8plus -mvis -mno-vis @gol
-mvis2 -mno-vis2 -mvis3 -mno-vis3 @gol
-mvis4 -mno-vis4 -mvis4b -mno-vis4b @gol
+-mmisalign -mno-misalign @gol
-mcbcond -mno-cbcond -mfmaf -mno-fmaf -mfsmuld -mno-fsmuld @gol
-mpopc -mno-popc -msubxc -mno-subxc @gol
-mfix-at697f -mfix-ut699 -mfix-ut700 -mfix-gr712rc @gol
@@ -24045,6 +24046,15 @@ cpu that supports such instructions, such as m8 and later. Setting
@option{-mvis4b} also sets @option{-mvis4}, @option{-mvis3},
@option{-mvis2} and @option{-mvis}.
+@item -mmisalign
+@itemx -mno-misalign
+@opindex mmisalign
+@opindex mno-misalign
+With @option{-mmisalign}, GCC generates code that takes advantage of
+the misaligned load and store instructions introduced in the Oracle
+SPARC Architecture 2017. The default is @option{-mmisalign} when
+targeting a cpu that supports such instructions, such as m8 and later.
+
@item -mcbcond
@itemx -mno-cbcond
@opindex mcbcond
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-mmisalign" } */
+
+typedef struct __attribute__((packed)) {
+ char old;
+ short ip;
+ int new;
+ long long ll;
+ float fp;
+ double dp;
+} NCO;
+
+NCO a;
+short eip;
+int enew;
+long long ell;
+float efp;
+double edp;
+
+int main(void) {
+
+ a.old = 'c';
+ a.ip = 10;
+ a.new = 20;
+ a.ll = 50;
+ a.fp = 30.0;
+ a.dp = 40.0;
+ eip = a.ip;
+ enew = a.new;
+ ell = a.ll;
+ efp = a.fp;
+ edp = a.dp;
+ return 0;
+}
+
+/* { dg-final { scan-assembler "stmh" } } */
+/* { dg-final { scan-assembler "stmw" } } */
+/* { dg-final { scan-assembler "stmx" } } */
+/* { dg-final { scan-assembler "stmfs" } } */
+/* { dg-final { scan-assembler "stmfd" } } */
+/* { dg-final { scan-assembler "ldmuh" } } */
+/* { dg-final { scan-assembler "ldmsw" } } */
+/* { dg-final { scan-assembler "ldmx" } } */
+/* { dg-final { scan-assembler "ldmfs" } } */
+/* { dg-final { scan-assembler "ldmfd" } } */
new file mode 100644
@@ -0,0 +1,23 @@
+/* this is to verify that the store of constant zero to misaligned memory address of type
+ * long long or double use stmx insns on m8 and -m32 */
+
+/* { dg-do compile } */
+/* { dg-options "-mcpu=m8 -m32" } */
+
+typedef struct __attribute__((packed)) {
+ char old;
+ long long ll;
+ double dp;
+} NCO;
+
+NCO a;
+
+int main(void) {
+
+ a.old = 'c';
+ a.ll = 0;
+ a.dp = 0.0;
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "stmx" 2 } } */
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-mmisalign" } */
+
+typedef int v1si __attribute__ ((vector_size (4),aligned(2)));
+typedef short v2hi __attribute__ ((vector_size (4),aligned(1)));
+
+typedef long long v1di __attribute__ ((vector_size (8),aligned(4)));
+typedef int v2si __attribute__ ((vector_size (8),aligned(2)));
+typedef short v4hi __attribute__ ((vector_size (8),aligned(1)));
+
+v1si a1 = {1};
+v1si b1 = {2};
+v1si c1;
+v2hi a2 = {1,2};
+v2hi b2 = {2,1};
+v2hi c2;
+
+v1di a3 = {1};
+v1di b3 = {2};
+v1di c3;
+v2si a4 = {1,2};
+v2si b4 = {2,1};
+v2si c4;
+v4hi a5 = {1,2,3,4};
+v4hi b5 = {4,3,2,1};
+v4hi c5;
+
+int main()
+{
+ c1 = a1 + b1;
+ c2 = a2 - b2;
+
+ c3 = a3 * b3;
+ c4 = a4 / b4;
+ c5 = a5 == b5;
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "ldm" 10} } */
+/* { dg-final { scan-assembler-times "stm" 5} } */
+
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mmisalign" } */
+
+/* this test case is added to test the fact that, misaligned ld/st insns does NOT
+ * support REG + IMM address mode, when the IMM is too big to be represented by a
+ * 10 bit signed interger */
+typedef struct __attribute__((packed)) {
+ char old;
+ char pad[1024];
+ long ll;
+} NCO;
+
+NCO a;
+extern long el;
+
+int main(void) {
+
+ a.ll = 0;
+ el = a.ll;
+ return 0;
+}
+
+/* { dg-final { scan-assembler "stmx" } } */
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mmisalign -O1" } */
+
+/* this test case is added to test the fact that, misaligned ld/st insns does NOT
+ * support LOSUM + IMM address mode */
+typedef struct __attribute__((packed)) {
+ char old;
+ long long ll;
+} NCO;
+
+NCO a;
+
+int main(void) {
+
+ a.ll = 0;
+ return 0;
+}
+
+/* { dg-final { scan-assembler "stmx" } } */
new file mode 100644
@@ -0,0 +1,34 @@
+/* { dg-do run} */
+/* { dg-require-effective-target misalign_hw } */
+/* { dg-options "-mcpu=m8" } */
+
+extern void abort (void);
+
+typedef struct __attribute__((packed)) {
+ char old;
+ short ip;
+ int new;
+ long long ll;
+ float fp;
+ double dp;
+} NCO;
+
+NCO a;
+
+int main(void) {
+
+ a.old = 'c';
+ a.ip = 10;
+ a.new = 20;
+ a.ll = 50;
+ a.fp = 30.0;
+ a.dp = 40.0;
+ if ((a.ip != 10)
+ || (a.new != 20)
+ || (a.ll != 50)
+ || (a.fp != 30.0)
+ || (a.dp != 40.0))
+ abort();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do run} */
+/* { dg-require-effective-target misalign_hw } */
+/* { dg-options "-mcpu=m8 -m32" } */
+
+extern void abort (void);
+
+typedef struct __attribute__((packed)) {
+ char old;
+ long long ll;
+ double dp;
+} NCO;
+
+NCO a;
+
+int main(void) {
+
+ a.old = 'c';
+ a.ll = 0;
+ a.dp = 0.0;
+ if ((a.ll != 0) || (a.dp != 0.0))
+ abort ();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,53 @@
+/* { dg-do run} */
+/* { dg-require-effective-target misalign_hw } */
+/* { dg-options "-mmisalign" } */
+
+typedef int v1si __attribute__ ((vector_size (4),aligned(2)));
+typedef short v2hi __attribute__ ((vector_size (4),aligned(1)));
+
+typedef long long v1di __attribute__ ((vector_size (8),aligned(4)));
+typedef int v2si __attribute__ ((vector_size (8),aligned(2)));
+typedef short v4hi __attribute__ ((vector_size (8),aligned(1)));
+
+v1si a1 = {1};
+v1si b1 = {2};
+v1si c1;
+v2hi a2 = {1,2};
+v2hi b2 = {2,1};
+v2hi c2;
+
+v1di a3 = {1};
+v1di b3 = {2};
+v1di c3;
+v2si a4 = {1,2};
+v2si b4 = {2,1};
+v2si c4;
+v4hi a5 = {1,2,3,4};
+v4hi b5 = {4,3,2,1};
+v4hi c5;
+
+extern void abort (void);
+
+int main()
+{
+ c1 = a1 + b1;
+ c2 = a2 - b2;
+
+ c3 = a3 * b3;
+ c4 = a4 / b4;
+ c5 = a5 == b5;
+
+ if ((c1[0] != 3)
+ || (c2[0] != -1)
+ || (c2[1] != 1)
+ || (c3[0] != 2)
+ || (c4[0] != 0)
+ || (c4[1] != 2)
+ || (c5[0] != 0)
+ || (c5[1] != 0)
+ || (c5[2] != 0)
+ || (c5[3] != 0))
+ abort ();
+
+ return 0;
+}
@@ -8292,6 +8292,21 @@ proc check_effective_target_offload_nvptx { } {
} "-foffload=nvptx-none" ]
}
+# Return 1 if the target supports the following misaligned load instruction:
+# ldmx o1, o2
+proc check_effective_target_misalign_hw { } {
+ return [check_runtime misalign_hw {
+ int main (void)
+ {
+ register void *p __asm__ ("o1") = &main;
+ register long res __asm__ ("o2");
+ asm volatile (".word 0xd58a5400");
+ return 0;
+ }
+ } "-mmisalign"]
+}
+
+
# Return 1 if the compiler has been configured with hsa offloading.
proc check_effective_target_offload_hsa { } {