===================================================================
@@ -107,6 +107,10 @@ (define_constants
(P13_REGNUM 81)
(P14_REGNUM 82)
(P15_REGNUM 83)
+ (FFR_REGNUM 84)
+ ;; "FFR token": a fake register used for representing the scheduling
+ ;; restrictions on FFR-related operations.
+ (FFRT_REGNUM 85)
;; Scratch register used by stack clash protection to calculate
;; SVE CFA offsets during probing.
(STACK_CLASH_SVE_CFA_REGNUM 11)
===================================================================
@@ -366,6 +366,9 @@ #define TARGET_SIMD_RDMA (TARGET_SIMD &&
P0-P7 Predicate low registers: valid in all predicate contexts
P8-P15 Predicate high registers: used as scratch space
+ FFR First Fault Register, a fixed-use SVE predicate register
+ FFRT FFR token: a fake register used for modelling dependencies
+
VG Pseudo "vector granules" register
VG is the number of 64-bit elements in an SVE vector. We define
@@ -386,6 +389,7 @@ #define FIXED_REGISTERS \
1, 1, 1, 1, /* SFP, AP, CC, VG */ \
0, 0, 0, 0, 0, 0, 0, 0, /* P0 - P7 */ \
0, 0, 0, 0, 0, 0, 0, 0, /* P8 - P15 */ \
+ 1, 1 /* FFR and FFRT */ \
}
/* X30 is marked as caller-saved which is in line with regular function call
@@ -408,6 +412,7 @@ #define CALL_USED_REGISTERS \
1, 1, 1, 1, /* SFP, AP, CC, VG */ \
1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \
1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \
+ 1, 1 /* FFR and FFRT */ \
}
#define REGISTER_NAMES \
@@ -423,6 +428,7 @@ #define REGISTER_NAMES \
"sfp", "ap", "cc", "vg", \
"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", \
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", \
+ "ffr", "ffrt" \
}
/* Generate the register aliases for core register N */
@@ -471,7 +477,7 @@ #define HARD_FRAME_POINTER_REGNUM R29_RE
#define FRAME_POINTER_REGNUM SFP_REGNUM
#define STACK_POINTER_REGNUM SP_REGNUM
#define ARG_POINTER_REGNUM AP_REGNUM
-#define FIRST_PSEUDO_REGISTER (P15_REGNUM + 1)
+#define FIRST_PSEUDO_REGISTER (FFRT_REGNUM + 1)
/* The number of (integer) argument register available. */
#define NUM_ARG_REGS 8
@@ -601,6 +607,8 @@ enum reg_class
PR_LO_REGS,
PR_HI_REGS,
PR_REGS,
+ FFR_REGS,
+ PR_AND_FFR_REGS,
ALL_REGS,
LIM_REG_CLASSES /* Last */
};
@@ -621,6 +629,8 @@ #define REG_CLASS_NAMES \
"PR_LO_REGS", \
"PR_HI_REGS", \
"PR_REGS", \
+ "FFR_REGS", \
+ "PR_AND_FFR_REGS", \
"ALL_REGS" \
}
@@ -638,6 +648,8 @@ #define REG_CLASS_CONTENTS \
{ 0x00000000, 0x00000000, 0x00000ff0 }, /* PR_LO_REGS */ \
{ 0x00000000, 0x00000000, 0x000ff000 }, /* PR_HI_REGS */ \
{ 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \
+ { 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \
+ { 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \
{ 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \
}
===================================================================
@@ -1426,6 +1426,14 @@ aarch64_err_no_fpadvsimd (machine_mode m
" vector types", "+nofp");
}
+/* Return true if REGNO is P0-P15 or one of the special FFR-related
+ registers. */
+inline bool
+pr_or_ffr_regnum_p (unsigned int regno)
+{
+ return PR_REGNUM_P (regno) || regno == FFR_REGNUM || regno == FFRT_REGNUM;
+}
+
/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and
GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much
@@ -1810,6 +1818,8 @@ aarch64_hard_regno_nregs (unsigned regno
case PR_REGS:
case PR_LO_REGS:
case PR_HI_REGS:
+ case FFR_REGS:
+ case PR_AND_FFR_REGS:
return 1;
default:
return CEIL (lowest_size, UNITS_PER_WORD);
@@ -1836,10 +1846,10 @@ aarch64_hard_regno_mode_ok (unsigned reg
return false;
if (vec_flags & VEC_SVE_PRED)
- return PR_REGNUM_P (regno);
+ return pr_or_ffr_regnum_p (regno);
- if (PR_REGNUM_P (regno))
- return 0;
+ if (pr_or_ffr_regnum_p (regno))
+ return false;
if (regno == SP_REGNUM)
/* The purpose of comparing with ptr_mode is to support the
@@ -9163,6 +9173,9 @@ aarch64_regno_regclass (unsigned regno)
if (PR_REGNUM_P (regno))
return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
+ if (regno == FFR_REGNUM || regno == FFRT_REGNUM)
+ return FFR_REGS;
+
return NO_REGS;
}
@@ -9461,6 +9474,8 @@ aarch64_class_max_nregs (reg_class_t reg
case PR_REGS:
case PR_LO_REGS:
case PR_HI_REGS:
+ case FFR_REGS:
+ case PR_AND_FFR_REGS:
return 1;
case NO_REGS:
@@ -11641,6 +11656,14 @@ aarch64_register_move_cost (machine_mode
if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
from = GENERAL_REGS;
+ /* Make RDFFR very expensive. In particular, if we know that the FFR
+ contains a PTRUE (e.g. after a SETFFR), we must never use RDFFR
+ as a way of obtaining a PTRUE. */
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+ && hard_reg_set_subset_p (reg_class_contents[from_i],
+ reg_class_contents[FFR_REGS]))
+ return 80;
+
/* Moving between GPR and stack cost is the same as GP2GP. */
if ((from == GENERAL_REGS && to == STACK_REG)
|| (to == GENERAL_REGS && from == STACK_REG))
@@ -14802,6 +14825,10 @@ aarch64_conditional_register_usage (void
call_used_regs[i] = 1;
}
+ /* Only allow the FFR and FFRT to be accessed via special patterns. */
+ CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM);
+ CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM);
+
/* When tracking speculation, we need a couple of call-clobbered registers
to track the speculation state. It would be nice to just use
IP0 and IP1, but currently there are numerous places that just