diff mbox

[09/12,i386] Add patterns and predicates foutline-msabi-xlouges

Message ID 20170427080932.11703-9-daniel.santos@pobox.com
State New
Headers show

Commit Message

Daniel Santos April 27, 2017, 8:09 a.m. UTC
Adds the predicates save_multiple and restore_multiple to predicates.md,
which are used by following patterns in sse.md:

* save_multiple - insn that calls a save stub
* restore_multiple - call_insn that calls a save stub and returns to the
  function to allow a sibling call (which should typically offer better
  optimization than the restore stub as the tail call)
* restore_multiple_and_return - a jump_insn that returns from the
  function as a tail-call.
* restore_multiple_leave_return - like the above, but restores the frame
  pointer before returning.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/predicates.md | 155 ++++++++++++++++++++++++++++++++++++++++++
 gcc/config/i386/sse.md        |  37 ++++++++++
 2 files changed, 192 insertions(+)

Comments

Uros Bizjak May 1, 2017, 11:18 a.m. UTC | #1
On Thu, Apr 27, 2017 at 10:09 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
> Adds the predicates save_multiple and restore_multiple to predicates.md,
> which are used by following patterns in sse.md:
>
> * save_multiple - insn that calls a save stub
> * restore_multiple - call_insn that calls a save stub and returns to the
>   function to allow a sibling call (which should typically offer better
>   optimization than the restore stub as the tail call)
> * restore_multiple_and_return - a jump_insn that returns from the
>   function as a tail-call.
> * restore_multiple_leave_return - like the above, but restores the frame
>   pointer before returning.
>
> Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
> ---
>  gcc/config/i386/predicates.md | 155 ++++++++++++++++++++++++++++++++++++++++++
>  gcc/config/i386/sse.md        |  37 ++++++++++
>  2 files changed, 192 insertions(+)
>
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index 8f250a2e720..36fe8abc3f4 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -1657,3 +1657,158 @@
>    (ior (match_operand 0 "register_operand")
>         (and (match_code "const_int")
>             (match_test "op == constm1_rtx"))))
> +
> +;; Return true if:
> +;; 1. first op is a symbol reference,
> +;; 2. >= 13 operands, and
> +;; 3. operands 2 to end is one of:
> +;;   a. save a register to a memory location, or
> +;;   b. restore stack pointer.
> +(define_predicate "save_multiple"
> +  (match_code "parallel")
> +{
> +  const unsigned nregs = XVECLEN (op, 0);
> +  rtx head = XVECEXP (op, 0, 0);
> +  unsigned i;
> +
> +  if (GET_CODE (head) != USE)
> +    return false;
> +  else
> +    {
> +      rtx op0 = XEXP (head, 0);
> +      if (op0 == NULL_RTX || GET_CODE (op0) != SYMBOL_REF)
> +       return false;
> +    }
> +
> +  if (nregs < 13)
> +    return false;
> +
> +  for (i = 2; i < nregs; i++)
> +    {
> +      rtx e, src, dest;
> +
> +      e = XVECEXP (op, 0, i);
> +
> +      switch (GET_CODE (e))
> +       {
> +         case SET:
> +           src  = SET_SRC (e);
> +           dest = SET_DEST (e);
> +
> +           /* storing a register to memory.  */
> +           if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)

Please use REG_P (...) and MEM_P (...) - and possible others -
predicates in the code.

> +             {
> +               rtx addr = XEXP (dest, 0);
> +
> +               /* Good if dest address is in RAX.  */
> +               if (GET_CODE (addr) == REG
> +                   && REGNO (addr) == AX_REG)
> +                 continue;
> +
> +               /* Good if dest address is offset of RAX.  */
> +               if (GET_CODE (addr) == PLUS
> +                   && GET_CODE (XEXP (addr, 0)) == REG
> +                   && REGNO (XEXP (addr, 0)) == AX_REG)
> +                 continue;
> +             }
> +           break;
> +
> +         default:
> +           break;
> +       }
> +       return false;
> +    }
> +  return true;
> +})
> +
> +;; Return true if:
> +;; * first op is (return) or a a use (symbol reference),
> +;; * >= 14 operands, and
> +;; * operands 2 to end are one of:
> +;;   - restoring a register from a memory location that's an offset of RSI.
> +;;   - clobbering a reg
> +;;   - adjusting SP
> +(define_predicate "restore_multiple"
> +  (match_code "parallel")
> +{
> +  const unsigned nregs = XVECLEN (op, 0);
> +  rtx head = XVECEXP (op, 0, 0);
> +  unsigned i;
> +
> +  switch (GET_CODE (head))
> +    {
> +      case RETURN:
> +       i = 3;
> +       break;
> +
> +      case USE:
> +      {
> +       rtx op0 = XEXP (head, 0);
> +
> +       if (op0 == NULL_RTX || GET_CODE (op0) != SYMBOL_REF)
> +         return false;
> +
> +       i = 1;
> +       break;
> +      }
> +
> +      default:
> +       return false;
> +    }
> +
> +  if (nregs < i + 12)
> +    return false;
> +
> +  for (; i < nregs; i++)
> +    {
> +      rtx e, src, dest;
> +
> +      e = XVECEXP (op, 0, i);
> +
> +      switch (GET_CODE (e))
> +       {
> +         case CLOBBER:
> +           continue;

I don't see where CLOBBER is genreated in ix86_emit_outlined_ms2sysv_restore.

> +
> +         case SET:
> +           src  = SET_SRC (e);
> +           dest = SET_DEST (e);
> +
> +           /* Restoring a register from memory.  */
> +           if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
> +             {
> +               rtx addr = XEXP (src, 0);
> +
> +               /* Good if src address is in RSI.  */
> +               if (GET_CODE (addr) == REG
> +                   && REGNO (addr) == SI_REG)
> +                 continue;
> +
> +               /* Good if src address is offset of RSI.  */
> +               if (GET_CODE (addr) == PLUS
> +                   && GET_CODE (XEXP (addr, 0)) == REG
> +                   && REGNO (XEXP (addr, 0)) == SI_REG)
> +                 continue;
> +
> +               /* Good if adjusting stack pointer.  */
> +               if (GET_CODE (dest) == REG
> +                   && REGNO (dest) == SP_REG
> +                   && GET_CODE (src) == PLUS
> +                   && GET_CODE (XEXP (src, 0)) == REG
> +                   && REGNO (XEXP (src, 0)) == SP_REG)
> +                 continue;
> +             }
> +
> +           /* Restoring stack pointer from another register.  */
> +           if (GET_CODE (dest) == REG && REGNO (dest) == SP_REG
> +               && GET_CODE (src) == REG)
> +             continue;
> +           break;
> +
> +         default:
> +           break;
> +       }
> +       return false;
> +    }
> +  return true;
> +})

I think that the above functions should check only if the function is
storing/restoring correct registers, all other RTXes should be
explicitly written in the insn patterns.

> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index e8ccb1e10c3..c9fe7274def 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -19997,3 +19997,40 @@
>            (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512VPOPCNTDQ"
>    "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
> +
> +;; Save multiple registers out-of-line.
> +(define_insn "save_multiple<mode>"
> +  [(match_parallel 0 "save_multiple"
> +    [(use (match_operand:P 1 "symbol_operand"))
> +     (const_int 0)
> +    ])]
> +  "TARGET_SSE && TARGET_64BIT"
> +  "call\t%P1")

You probably don't need a (const_int 0) tag. According to the
documentation, RTX matching guarantees subexpression match, so in the
predicate you should check only stores of registers (as suggested
above).

> +;; Restore multiple registers out-of-line.
> +(define_insn "restore_multiple<mode>"
> +  [(match_parallel 0 "restore_multiple"
> +    [(use (match_operand:P 1 "symbol_operand"))])]
> +  "TARGET_SSE && TARGET_64BIT"
> +  "call\t%P1")
> +
> +;; Restore multiple registers out-of-line and return.
> +(define_insn "restore_multiple_and_return<mode>"
> +  [(match_parallel 0 "restore_multiple"
> +    [(return)
> +     (use (match_operand:P 1 "symbol_operand"))
> +     (const_int 0)
> +    ])]
> +  "TARGET_SSE && TARGET_64BIT"
> +  "jmp\t%P1")

No need for (const_int 0) tag.

> +;; Restore multiple registers out-of-line when hard frame pointer is used,
> +;; perform the leave operation prior to returning (from the function).
> +(define_insn "restore_multiple_leave_return<mode>"
> +  [(match_parallel 0 "restore_multiple"
> +    [(return)
> +     (use (match_operand:P 1 "symbol_operand"))
> +     (const_int 1)
> +    ])]
> +  "TARGET_SSE && TARGET_64BIT"
> +  "jmp\t%P1")

You will have to write out all  sub-RTXes of the "leave" pattern,
including clobber.

I'd recommend that in the predicate, you check match_parallel from the
bottom up, since subexpressions on the top are already matched, and
you can have different number of subexpressions at the top.

Uros.
Daniel Santos May 2, 2017, 10:16 p.m. UTC | #2
Thank you for the review.

On 05/01/2017 06:18 AM, Uros Bizjak wrote:
> On Thu, Apr 27, 2017 at 10:09 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
>> Adds the predicates save_multiple and restore_multiple to predicates.md,
>> which are used by following patterns in sse.md:
>>
>> * save_multiple - insn that calls a save stub
>> * restore_multiple - call_insn that calls a save stub and returns to the
>>    function to allow a sibling call (which should typically offer better
>>    optimization than the restore stub as the tail call)
>> * restore_multiple_and_return - a jump_insn that returns from the
>>    function as a tail-call.
>> * restore_multiple_leave_return - like the above, but restores the frame
>>    pointer before returning.
>>
>> Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
>> ---
>>   gcc/config/i386/predicates.md | 155 ++++++++++++++++++++++++++++++++++++++++++
>>   gcc/config/i386/sse.md        |  37 ++++++++++
>>   2 files changed, 192 insertions(+)
>>
>> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
>> index 8f250a2e720..36fe8abc3f4 100644
>> --- a/gcc/config/i386/predicates.md
>> +++ b/gcc/config/i386/predicates.md
>> @@ -1657,3 +1657,158 @@
>>     (ior (match_operand 0 "register_operand")
>>          (and (match_code "const_int")
>>              (match_test "op == constm1_rtx"))))
>> +
>> +;; Return true if:
>> +;; 1. first op is a symbol reference,
>> +;; 2. >= 13 operands, and
>> +;; 3. operands 2 to end is one of:
>> +;;   a. save a register to a memory location, or
>> +;;   b. restore stack pointer.
>> +(define_predicate "save_multiple"
>> +  (match_code "parallel")
>> +{
>> +  const unsigned nregs = XVECLEN (op, 0);
>> +  rtx head = XVECEXP (op, 0, 0);
>> +  unsigned i;
>> +
>> +  if (GET_CODE (head) != USE)
>> +    return false;
>> +  else
>> +    {
>> +      rtx op0 = XEXP (head, 0);
>> +      if (op0 == NULL_RTX || GET_CODE (op0) != SYMBOL_REF)
>> +       return false;
>> +    }
>> +
>> +  if (nregs < 13)
>> +    return false;
>> +
>> +  for (i = 2; i < nregs; i++)
>> +    {
>> +      rtx e, src, dest;
>> +
>> +      e = XVECEXP (op, 0, i);
>> +
>> +      switch (GET_CODE (e))
>> +       {
>> +         case SET:
>> +           src  = SET_SRC (e);
>> +           dest = SET_DEST (e);
>> +
>> +           /* storing a register to memory.  */
>> +           if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
> Please use REG_P (...) and MEM_P (...) - and possible others -
> predicates in the code.
>
>> +             {
>> +               rtx addr = XEXP (dest, 0);
>> +
>> +               /* Good if dest address is in RAX.  */
>> +               if (GET_CODE (addr) == REG
>> +                   && REGNO (addr) == AX_REG)
>> +                 continue;
>> +
>> +               /* Good if dest address is offset of RAX.  */
>> +               if (GET_CODE (addr) == PLUS
>> +                   && GET_CODE (XEXP (addr, 0)) == REG
>> +                   && REGNO (XEXP (addr, 0)) == AX_REG)
>> +                 continue;
>> +             }
>> +           break;
>> +
>> +         default:
>> +           break;
>> +       }
>> +       return false;
>> +    }
>> +  return true;
>> +})
>> +
>> +;; Return true if:
>> +;; * first op is (return) or a a use (symbol reference),
>> +;; * >= 14 operands, and
>> +;; * operands 2 to end are one of:
>> +;;   - restoring a register from a memory location that's an offset of RSI.
>> +;;   - clobbering a reg
>> +;;   - adjusting SP
>> +(define_predicate "restore_multiple"
>> +  (match_code "parallel")
>> +{
>> +  const unsigned nregs = XVECLEN (op, 0);
>> +  rtx head = XVECEXP (op, 0, 0);
>> +  unsigned i;
>> +
>> +  switch (GET_CODE (head))
>> +    {
>> +      case RETURN:
>> +       i = 3;
>> +       break;
>> +
>> +      case USE:
>> +      {
>> +       rtx op0 = XEXP (head, 0);
>> +
>> +       if (op0 == NULL_RTX || GET_CODE (op0) != SYMBOL_REF)
>> +         return false;
>> +
>> +       i = 1;
>> +       break;
>> +      }
>> +
>> +      default:
>> +       return false;
>> +    }
>> +
>> +  if (nregs < i + 12)
>> +    return false;
>> +
>> +  for (; i < nregs; i++)
>> +    {
>> +      rtx e, src, dest;
>> +
>> +      e = XVECEXP (op, 0, i);
>> +
>> +      switch (GET_CODE (e))
>> +       {
>> +         case CLOBBER:
>> +           continue;
> I don't see where CLOBBER is genreated in ix86_emit_outlined_ms2sysv_restore.

I think this is clutter that I didn't remove after changing the stubs.

>> +
>> +         case SET:
>> +           src  = SET_SRC (e);
>> +           dest = SET_DEST (e);
>> +
>> +           /* Restoring a register from memory.  */
>> +           if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
>> +             {
>> +               rtx addr = XEXP (src, 0);
>> +
>> +               /* Good if src address is in RSI.  */
>> +               if (GET_CODE (addr) == REG
>> +                   && REGNO (addr) == SI_REG)
>> +                 continue;
>> +
>> +               /* Good if src address is offset of RSI.  */
>> +               if (GET_CODE (addr) == PLUS
>> +                   && GET_CODE (XEXP (addr, 0)) == REG
>> +                   && REGNO (XEXP (addr, 0)) == SI_REG)
>> +                 continue;
>> +
>> +               /* Good if adjusting stack pointer.  */
>> +               if (GET_CODE (dest) == REG
>> +                   && REGNO (dest) == SP_REG
>> +                   && GET_CODE (src) == PLUS
>> +                   && GET_CODE (XEXP (src, 0)) == REG
>> +                   && REGNO (XEXP (src, 0)) == SP_REG)
>> +                 continue;
>> +             }
>> +
>> +           /* Restoring stack pointer from another register.  */
>> +           if (GET_CODE (dest) == REG && REGNO (dest) == SP_REG
>> +               && GET_CODE (src) == REG)
>> +             continue;
>> +           break;
>> +
>> +         default:
>> +           break;
>> +       }
>> +       return false;
>> +    }
>> +  return true;
>> +})
> I think that the above functions should check only if the function is
> storing/restoring correct registers, all other RTXes should be
> explicitly written in the insn patterns.
>
>> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
>> index e8ccb1e10c3..c9fe7274def 100644
>> --- a/gcc/config/i386/sse.md
>> +++ b/gcc/config/i386/sse.md
>> @@ -19997,3 +19997,40 @@
>>             (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
>>     "TARGET_AVX512VPOPCNTDQ"
>>     "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
>> +
>> +;; Save multiple registers out-of-line.
>> +(define_insn "save_multiple<mode>"
>> +  [(match_parallel 0 "save_multiple"
>> +    [(use (match_operand:P 1 "symbol_operand"))
>> +     (const_int 0)
>> +    ])]
>> +  "TARGET_SSE && TARGET_64BIT"
>> +  "call\t%P1")
> You probably don't need a (const_int 0) tag. According to the
> documentation, RTX matching guarantees subexpression match, so in the
> predicate you should check only stores of registers (as suggested
> above).
>
>> +;; Restore multiple registers out-of-line.
>> +(define_insn "restore_multiple<mode>"
>> +  [(match_parallel 0 "restore_multiple"
>> +    [(use (match_operand:P 1 "symbol_operand"))])]
>> +  "TARGET_SSE && TARGET_64BIT"
>> +  "call\t%P1")
>> +
>> +;; Restore multiple registers out-of-line and return.
>> +(define_insn "restore_multiple_and_return<mode>"
>> +  [(match_parallel 0 "restore_multiple"
>> +    [(return)
>> +     (use (match_operand:P 1 "symbol_operand"))
>> +     (const_int 0)
>> +    ])]
>> +  "TARGET_SSE && TARGET_64BIT"
>> +  "jmp\t%P1")
> No need for (const_int 0) tag.
>
>> +;; Restore multiple registers out-of-line when hard frame pointer is used,
>> +;; perform the leave operation prior to returning (from the function).
>> +(define_insn "restore_multiple_leave_return<mode>"
>> +  [(match_parallel 0 "restore_multiple"
>> +    [(return)
>> +     (use (match_operand:P 1 "symbol_operand"))
>> +     (const_int 1)
>> +    ])]
>> +  "TARGET_SSE && TARGET_64BIT"
>> +  "jmp\t%P1")
> You will have to write out all  sub-RTXes of the "leave" pattern,
> including clobber.
>
> I'd recommend that in the predicate, you check match_parallel from the
> bottom up, since subexpressions on the top are already matched, and
> you can have different number of subexpressions at the top.
>
> Uros.

OK, this all makes sense then. I was using const_int tags to 
differentiate the insns, but omitting the insns for the leave (for 
example) are what ambiguates them in the first place.

So one question I never had resolved is rather or not the order the 
insns in a parallel matters.  From your suggestions, I'm guessing that 
it would be perfectly OK for the leave insns to follow use symbol and 
then have the register restores after that, even though that wouldn't 
make sense from an assembly level because we would be writing beyond the 
stack pointer.  Am I reading this correctly?  If so, then putting the 
register save/restore matching in the predicate (checking top down) and 
having all others in the pattern make great sense and should both 
simplify it and make it more clear.

Also, I'm wondering if there's anything wrong with calling 
ix86_gen_leave () and plucking the insns out of the generated parallel 
insn and moving that into my own parallel rather than generating them in 
my own function.  I guess all the matters is what is cleanest.

Thanks,
Daniel
Uros Bizjak May 3, 2017, 6:10 a.m. UTC | #3
On Wed, May 3, 2017 at 12:16 AM, Daniel Santos <daniel.santos@pobox.com> wrote:

>> I'd recommend that in the predicate, you check match_parallel from the
>> bottom up, since subexpressions on the top are already matched, and
>> you can have different number of subexpressions at the top.
>>
>> Uros.
>
>
> OK, this all makes sense then. I was using const_int tags to differentiate
> the insns, but omitting the insns for the leave (for example) are what
> ambiguates them in the first place.
>
> So one question I never had resolved is rather or not the order the insns in
> a parallel matters.  From your suggestions, I'm guessing that it would be
> perfectly OK for the leave insns to follow use symbol and then have the
> register restores after that, even though that wouldn't make sense from an
> assembly level because we would be writing beyond the stack pointer.  Am I
> reading this correctly?  If so, then putting the register save/restore
> matching in the predicate (checking top down) and having all others in the
> pattern make great sense and should both simplify it and make it more clear.

The order of subexpressions of parallel in general does not matter.

> Also, I'm wondering if there's anything wrong with calling ix86_gen_leave ()
> and plucking the insns out of the generated parallel insn and moving that
> into my own parallel rather than generating them in my own function.  I
> guess all the matters is what is cleanest.

Hm... I'd rather see subexpressions generated "by hand".

Uros.
Daniel Santos May 3, 2017, 7:38 a.m. UTC | #4
On 05/03/2017 01:10 AM, Uros Bizjak wrote:
> The order of subexpressions of parallel in general does not matter.

Thanks, this makes things much clearer.

>> Also, I'm wondering if there's anything wrong with calling ix86_gen_leave ()
>> and plucking the insns out of the generated parallel insn and moving that
>> into my own parallel rather than generating them in my own function.  I
>> guess all the matters is what is cleanest.
> Hm... I'd rather see subexpressions generated "by hand".

OK.  While we're on the topic, are you OK with my changes to 
ix86_emit_leave to generate the notes or would you prefer those by hand 
as well?

Also, are these predicates what you had in mind?  (I haven't actually 
tested them just yet.)

(define_predicate "save_multiple"
   (match_code "parallel")
{
   const unsigned len = XVECLEN (op, 0);
   unsigned i;

   /* Starting from end of vector, count register saves.  */
   for (i = 0; i < len; ++i)
     {
       rtx src, dest, addr;
       rtx e = XVECEXP (op, 0, len - 1 - i);

       if (GET_CODE (e) != SET)
         break;

       src  = SET_SRC (e);
       dest = SET_DEST (e);

       if (!REG_P (src) || !MEM_P (dest))
         break;

       addr = XEXP (dest, 0);

       /* Good if dest address is in RAX.  */
       if (REG_P (addr) && REGNO (addr) == AX_REG)
         continue;

       /* Good if dest address is offset of RAX.  */
       if (GET_CODE (addr) == PLUS
           && REG_P (XEXP (addr, 0))
           && REGNO (XEXP (addr, 0)) == AX_REG)
         continue;

       break;
     }
   return (i >= 12 && i <= 18);
})


(define_predicate "restore_multiple"
   (match_code "parallel")
{
   const unsigned len = XVECLEN (op, 0);
   unsigned i;

   /* Starting from end of vector, count register restores.  */
   for (i = 0; i < len; ++i)
     {
       rtx src, dest, addr;
       rtx e = XVECEXP (op, 0, len - 1 - i);

       if (GET_CODE (e) != SET)
         break;

       src  = SET_SRC (e);
       dest = SET_DEST (e);

       if (!MEM_P (src) || !REG_P (dest))
         break;

       addr = XEXP (src, 0);

       /* Good if src address is in RSI.  */
       if (REG_P (addr) && REGNO (addr) == SI_REG)
         continue;

       /* Good if src address is offset of RSI.  */
       if (GET_CODE (addr) == PLUS
           && REG_P (XEXP (addr, 0))
           && REGNO (XEXP (addr, 0)) == SI_REG)
         continue;

       break;
     }
   return (i >= 12 && i <= 18);
})


Thanks,
Daniel
Uros Bizjak May 3, 2017, 8:26 a.m. UTC | #5
On Wed, May 3, 2017 at 9:38 AM, Daniel Santos <daniel.santos@pobox.com> wrote:
> On 05/03/2017 01:10 AM, Uros Bizjak wrote:
>>
>> The order of subexpressions of parallel in general does not matter.
>
>
> Thanks, this makes things much clearer.
>
>>> Also, I'm wondering if there's anything wrong with calling ix86_gen_leave
>>> ()
>>> and plucking the insns out of the generated parallel insn and moving that
>>> into my own parallel rather than generating them in my own function.  I
>>> guess all the matters is what is cleanest.
>>
>> Hm... I'd rather see subexpressions generated "by hand".
>
>
> OK.  While we're on the topic, are you OK with my changes to ix86_emit_leave
> to generate the notes or would you prefer those by hand as well?

I think they are OK. We are effectively emitting a leave here.

> Also, are these predicates what you had in mind?  (I haven't actually tested
> them just yet.)

Yes, these look good to me.

Uros.

> (define_predicate "save_multiple"
>   (match_code "parallel")
> {
>   const unsigned len = XVECLEN (op, 0);
>   unsigned i;
>
>   /* Starting from end of vector, count register saves.  */
>   for (i = 0; i < len; ++i)
>     {
>       rtx src, dest, addr;
>       rtx e = XVECEXP (op, 0, len - 1 - i);
>
>       if (GET_CODE (e) != SET)
>         break;
>
>       src  = SET_SRC (e);
>       dest = SET_DEST (e);
>
>       if (!REG_P (src) || !MEM_P (dest))
>         break;
>
>       addr = XEXP (dest, 0);
>
>       /* Good if dest address is in RAX.  */
>       if (REG_P (addr) && REGNO (addr) == AX_REG)
>         continue;
>
>       /* Good if dest address is offset of RAX.  */
>       if (GET_CODE (addr) == PLUS
>           && REG_P (XEXP (addr, 0))
>           && REGNO (XEXP (addr, 0)) == AX_REG)
>         continue;
>
>       break;
>     }
>   return (i >= 12 && i <= 18);
> })
>
>
> (define_predicate "restore_multiple"
>   (match_code "parallel")
> {
>   const unsigned len = XVECLEN (op, 0);
>   unsigned i;
>
>   /* Starting from end of vector, count register restores.  */
>   for (i = 0; i < len; ++i)
>     {
>       rtx src, dest, addr;
>       rtx e = XVECEXP (op, 0, len - 1 - i);
>
>       if (GET_CODE (e) != SET)
>         break;
>
>       src  = SET_SRC (e);
>       dest = SET_DEST (e);
>
>       if (!MEM_P (src) || !REG_P (dest))
>         break;
>
>       addr = XEXP (src, 0);
>
>       /* Good if src address is in RSI.  */
>       if (REG_P (addr) && REGNO (addr) == SI_REG)
>         continue;
>
>       /* Good if src address is offset of RSI.  */
>       if (GET_CODE (addr) == PLUS
>           && REG_P (XEXP (addr, 0))
>           && REGNO (XEXP (addr, 0)) == SI_REG)
>         continue;
>
>       break;
>     }
>   return (i >= 12 && i <= 18);
> })
>
>
> Thanks,
> Daniel
>
diff mbox

Patch

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 8f250a2e720..36fe8abc3f4 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1657,3 +1657,158 @@ 
   (ior (match_operand 0 "register_operand")
        (and (match_code "const_int")
 	    (match_test "op == constm1_rtx"))))
+
+;; Return true if:
+;; 1. first op is a symbol reference,
+;; 2. >= 13 operands, and
+;; 3. operands 2 to end is one of:
+;;   a. save a register to a memory location, or
+;;   b. restore stack pointer.
+(define_predicate "save_multiple"
+  (match_code "parallel")
+{
+  const unsigned nregs = XVECLEN (op, 0);
+  rtx head = XVECEXP (op, 0, 0);
+  unsigned i;
+
+  if (GET_CODE (head) != USE)
+    return false;
+  else
+    {
+      rtx op0 = XEXP (head, 0);
+      if (op0 == NULL_RTX || GET_CODE (op0) != SYMBOL_REF)
+	return false;
+    }
+
+  if (nregs < 13)
+    return false;
+
+  for (i = 2; i < nregs; i++)
+    {
+      rtx e, src, dest;
+
+      e = XVECEXP (op, 0, i);
+
+      switch (GET_CODE (e))
+	{
+	  case SET:
+	    src  = SET_SRC (e);
+	    dest = SET_DEST (e);
+
+	    /* storing a register to memory.  */
+	    if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
+	      {
+		rtx addr = XEXP (dest, 0);
+
+		/* Good if dest address is in RAX.  */
+		if (GET_CODE (addr) == REG
+		    && REGNO (addr) == AX_REG)
+		  continue;
+
+		/* Good if dest address is offset of RAX.  */
+		if (GET_CODE (addr) == PLUS
+		    && GET_CODE (XEXP (addr, 0)) == REG
+		    && REGNO (XEXP (addr, 0)) == AX_REG)
+		  continue;
+	      }
+	    break;
+
+	  default:
+	    break;
+	}
+	return false;
+    }
+  return true;
+})
+
+;; Return true if:
+;; * first op is (return) or a a use (symbol reference),
+;; * >= 14 operands, and
+;; * operands 2 to end are one of:
+;;   - restoring a register from a memory location that's an offset of RSI.
+;;   - clobbering a reg
+;;   - adjusting SP
+(define_predicate "restore_multiple"
+  (match_code "parallel")
+{
+  const unsigned nregs = XVECLEN (op, 0);
+  rtx head = XVECEXP (op, 0, 0);
+  unsigned i;
+
+  switch (GET_CODE (head))
+    {
+      case RETURN:
+	i = 3;
+	break;
+
+      case USE:
+      {
+	rtx op0 = XEXP (head, 0);
+
+	if (op0 == NULL_RTX || GET_CODE (op0) != SYMBOL_REF)
+	  return false;
+
+	i = 1;
+	break;
+      }
+
+      default:
+	return false;
+    }
+
+  if (nregs < i + 12)
+    return false;
+
+  for (; i < nregs; i++)
+    {
+      rtx e, src, dest;
+
+      e = XVECEXP (op, 0, i);
+
+      switch (GET_CODE (e))
+	{
+	  case CLOBBER:
+	    continue;
+
+	  case SET:
+	    src  = SET_SRC (e);
+	    dest = SET_DEST (e);
+
+	    /* Restoring a register from memory.  */
+	    if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
+	      {
+		rtx addr = XEXP (src, 0);
+
+		/* Good if src address is in RSI.  */
+		if (GET_CODE (addr) == REG
+		    && REGNO (addr) == SI_REG)
+		  continue;
+
+		/* Good if src address is offset of RSI.  */
+		if (GET_CODE (addr) == PLUS
+		    && GET_CODE (XEXP (addr, 0)) == REG
+		    && REGNO (XEXP (addr, 0)) == SI_REG)
+		  continue;
+
+		/* Good if adjusting stack pointer.  */
+		if (GET_CODE (dest) == REG
+		    && REGNO (dest) == SP_REG
+		    && GET_CODE (src) == PLUS
+		    && GET_CODE (XEXP (src, 0)) == REG
+		    && REGNO (XEXP (src, 0)) == SP_REG)
+		  continue;
+	      }
+
+	    /* Restoring stack pointer from another register.  */
+	    if (GET_CODE (dest) == REG && REGNO (dest) == SP_REG
+		&& GET_CODE (src) == REG)
+	      continue;
+	    break;
+
+	  default:
+	    break;
+	}
+	return false;
+    }
+  return true;
+})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e8ccb1e10c3..c9fe7274def 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -19997,3 +19997,40 @@ 
           (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512VPOPCNTDQ"
   "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
+
+;; Save multiple registers out-of-line.
+(define_insn "save_multiple<mode>"
+  [(match_parallel 0 "save_multiple"
+    [(use (match_operand:P 1 "symbol_operand"))
+     (const_int 0)
+    ])]
+  "TARGET_SSE && TARGET_64BIT"
+  "call\t%P1")
+
+;; Restore multiple registers out-of-line.
+(define_insn "restore_multiple<mode>"
+  [(match_parallel 0 "restore_multiple"
+    [(use (match_operand:P 1 "symbol_operand"))])]
+  "TARGET_SSE && TARGET_64BIT"
+  "call\t%P1")
+
+;; Restore multiple registers out-of-line and return.
+(define_insn "restore_multiple_and_return<mode>"
+  [(match_parallel 0 "restore_multiple"
+    [(return)
+     (use (match_operand:P 1 "symbol_operand"))
+     (const_int 0)
+    ])]
+  "TARGET_SSE && TARGET_64BIT"
+  "jmp\t%P1")
+
+;; Restore multiple registers out-of-line when hard frame pointer is used,
+;; perform the leave operation prior to returning (from the function).
+(define_insn "restore_multiple_leave_return<mode>"
+  [(match_parallel 0 "restore_multiple"
+    [(return)
+     (use (match_operand:P 1 "symbol_operand"))
+     (const_int 1)
+    ])]
+  "TARGET_SSE && TARGET_64BIT"
+  "jmp\t%P1")