@@ -1657,3 +1657,84 @@
(ior (match_operand 0 "register_operand")
(and (match_code "const_int")
(match_test "op == constm1_rtx"))))
+
+;; Return true if the vector ends with between 12 and 18 register saves using
+;; RAX as the base address.
+(define_predicate "save_multiple"
+ (match_code "parallel")
+{
+ const unsigned len = XVECLEN (op, 0);
+ unsigned i;
+
+ /* Starting from end of vector, count register saves. */
+ for (i = 0; i < len; ++i)
+ {
+ rtx src, dest, addr;
+ rtx e = XVECEXP (op, 0, len - 1 - i);
+
+ if (GET_CODE (e) != SET)
+ break;
+
+ src = SET_SRC (e);
+ dest = SET_DEST (e);
+
+ if (!REG_P (src) || !MEM_P (dest))
+ break;
+
+ addr = XEXP (dest, 0);
+
+ /* Good if dest address is in RAX. */
+ if (REG_P (addr) && REGNO (addr) == AX_REG)
+ continue;
+
+ /* Good if dest address is offset of RAX. */
+ if (GET_CODE (addr) == PLUS
+ && REG_P (XEXP (addr, 0))
+ && REGNO (XEXP (addr, 0)) == AX_REG)
+ continue;
+
+ break;
+ }
+ return (i >= 12 && i <= 18);
+})
+
+
+;; Return true if the vector ends with between 12 and 18 register loads using
+;; RSI as the base address.
+(define_predicate "restore_multiple"
+ (match_code "parallel")
+{
+ const unsigned len = XVECLEN (op, 0);
+ unsigned i;
+
+ /* Starting from end of vector, count register restores. */
+ for (i = 0; i < len; ++i)
+ {
+ rtx src, dest, addr;
+ rtx e = XVECEXP (op, 0, len - 1 - i);
+
+ if (GET_CODE (e) != SET)
+ break;
+
+ src = SET_SRC (e);
+ dest = SET_DEST (e);
+
+ if (!MEM_P (src) || !REG_P (dest))
+ break;
+
+ addr = XEXP (src, 0);
+
+ /* Good if src address is in RSI. */
+ if (REG_P (addr) && REGNO (addr) == SI_REG)
+ continue;
+
+ /* Good if src address is offset of RSI. */
+ if (GET_CODE (addr) == PLUS
+ && REG_P (XEXP (addr, 0))
+ && REGNO (XEXP (addr, 0)) == SI_REG)
+ continue;
+
+ break;
+ }
+ return (i >= 12 && i <= 18);
+})
@@ -20010,3 +20010,40 @@
(match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512VPOPCNTDQ"
"vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
+
+;; Save multiple registers out-of-line.
+(define_insn "save_multiple<mode>"
+ [(match_parallel 0 "save_multiple"
+ [(use (match_operand:P 1 "symbol_operand"))])]
+ "TARGET_SSE && TARGET_64BIT"
+ "call\t%P1")
+
+;; Restore multiple registers out-of-line.
+(define_insn "restore_multiple<mode>"
+ [(match_parallel 0 "restore_multiple"
+ [(use (match_operand:P 1 "symbol_operand"))])]
+ "TARGET_SSE && TARGET_64BIT"
+ "call\t%P1")
+
+;; Restore multiple registers out-of-line and return.
+(define_insn "restore_multiple_and_return<mode>"
+ [(match_parallel 0 "restore_multiple"
+ [(return)
+ (use (match_operand:P 1 "symbol_operand"))
+ (set (reg:DI SP_REG) (reg:DI R10_REG))
+ ])]
+ "TARGET_SSE && TARGET_64BIT"
+ "jmp\t%P1")
+
+;; Restore multiple registers out-of-line when hard frame pointer is used,
+;; perform the leave operation prior to returning (from the function).
+(define_insn "restore_multiple_leave_return<mode>"
+ [(match_parallel 0 "restore_multiple"
+ [(return)
+ (use (match_operand:P 1 "symbol_operand"))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
+ (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
+ (clobber (mem:BLK (scratch)))
+ ])]
+ "TARGET_SSE && TARGET_64BIT"
+ "jmp\t%P1")
I've cleaned up the patterns and predicates as per your instructions, resulting in 74 less lines of code. Adding explicit insns to restore the stack pointer and pointer perform the "leave" (to the patterns restore_multiple_and_return and restore_multiple_leave_return, respectively) disambiguates them just fine without the const_int tag while correctly describing exactly what the pattern does. Thanks for your guidance. I understand RTL much better now. Signed-off-by: Daniel Santos <daniel.santos@pobox.com> --- gcc/config/i386/predicates.md | 81 +++++++++++++++++++++++++++++++++++++++++++ gcc/config/i386/sse.md | 37 ++++++++++++++++++++ 2 files changed, 118 insertions(+)