diff mbox series

[ARM] Remove support for MULS

Message ID VI1PR0801MB2127BA15FECEB9AB870CC07083B90@VI1PR0801MB2127.eurprd08.prod.outlook.com
State New
Headers show
Series [ARM] Remove support for MULS | expand

Commit Message

Wilco Dijkstra Sept. 3, 2019, 3:36 p.m. UTC
Remove various MULS/MLAS patterns which are enabled when optimizing for
size.  However the codesize gain from these patterns is so minimal that
there is no point in keeping them.

Bootstrap OK on armhf, regress passes.

ChangeLog:
2019-09-03  Wilco Dijkstra  <wdijkstr@arm.com>

	* config/arm/arm.md (mulsi3_compare0): Remove pattern.
	(mulsi3_compare0_v6): Likewise.
	(mulsi_compare0_scratch): Likewise.
	(mulsi_compare0_scratch_v6): Likewise.
	(mulsi3addsi_compare0): Likewise.
	(mulsi3addsi_compare0_v6): Likewise.
	(mulsi3addsi_compare0_scratch): Likewise.
	(mulsi3addsi_compare0_scratch_v6): Likewise.
	* config/arm/thumb2.md (thumb2_mulsi_short_compare0): Remove pattern.
	(thumb2_mulsi_short_compare0_scratch): Likewise.

--

Comments

Wilco Dijkstra Sept. 9, 2019, 5:07 p.m. UTC | #1
ping
   
 
Remove various MULS/MLAS patterns which are enabled when optimizing for
 size.  However the codesize gain from these patterns is so minimal that
 there is no point in keeping them.
 
 Bootstrap OK on armhf, regress passes.
 
 ChangeLog:
 2019-09-03  Wilco Dijkstra  <wdijkstr@arm.com>
 
         * config/arm/arm.md (mulsi3_compare0): Remove pattern.
         (mulsi3_compare0_v6): Likewise.
         (mulsi_compare0_scratch): Likewise.
         (mulsi_compare0_scratch_v6): Likewise.
         (mulsi3addsi_compare0): Likewise.
         (mulsi3addsi_compare0_v6): Likewise.
         (mulsi3addsi_compare0_scratch): Likewise.
         (mulsi3addsi_compare0_scratch_v6): Likewise.
         * config/arm/thumb2.md (thumb2_mulsi_short_compare0): Remove pattern.
         (thumb2_mulsi_short_compare0_scratch): Likewise.
 
 --
 diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
 index 738d42fd164f117f1dec1108a824d984ccd70d09..66dafdc47b7cfc37c131764e482d47bcaab90538 100644
 --- a/gcc/config/arm/arm.md
 +++ b/gcc/config/arm/arm.md
 @@ -1618,60 +1618,6 @@ (define_insn "*arm_mulsi3_v6"
     (set_attr "predicable_short_it" "yes,yes,no")]
  )
  
 -(define_insn "*mulsi3_compare0"
 -  [(set (reg:CC_NOOV CC_REGNUM)
 -       (compare:CC_NOOV (mult:SI
 -                         (match_operand:SI 2 "s_register_operand" "r,r")
 -                         (match_operand:SI 1 "s_register_operand" "%0,r"))
 -                        (const_int 0)))
 -   (set (match_operand:SI 0 "s_register_operand" "=&r,&r")
 -       (mult:SI (match_dup 2) (match_dup 1)))]
 -  "TARGET_ARM && !arm_arch6"
 -  "muls%?\\t%0, %2, %1"
 -  [(set_attr "conds" "set")
 -   (set_attr "type" "muls")]
 -)
 -
 -(define_insn "*mulsi3_compare0_v6"
 -  [(set (reg:CC_NOOV CC_REGNUM)
 -       (compare:CC_NOOV (mult:SI
 -                         (match_operand:SI 2 "s_register_operand" "r")
 -                         (match_operand:SI 1 "s_register_operand" "r"))
 -                        (const_int 0)))
 -   (set (match_operand:SI 0 "s_register_operand" "=r")
 -       (mult:SI (match_dup 2) (match_dup 1)))]
 -  "TARGET_ARM && arm_arch6 && optimize_size"
 -  "muls%?\\t%0, %2, %1"
 -  [(set_attr "conds" "set")
 -   (set_attr "type" "muls")]
 -)
 -
 -(define_insn "*mulsi_compare0_scratch"
 -  [(set (reg:CC_NOOV CC_REGNUM)
 -       (compare:CC_NOOV (mult:SI
 -                         (match_operand:SI 2 "s_register_operand" "r,r")
 -                         (match_operand:SI 1 "s_register_operand" "%0,r"))
 -                        (const_int 0)))
 -   (clobber (match_scratch:SI 0 "=&r,&r"))]
 -  "TARGET_ARM && !arm_arch6"
 -  "muls%?\\t%0, %2, %1"
 -  [(set_attr "conds" "set")
 -   (set_attr "type" "muls")]
 -)
 -
 -(define_insn "*mulsi_compare0_scratch_v6"
 -  [(set (reg:CC_NOOV CC_REGNUM)
 -       (compare:CC_NOOV (mult:SI
 -                         (match_operand:SI 2 "s_register_operand" "r")
 -                         (match_operand:SI 1 "s_register_operand" "r"))
 -                        (const_int 0)))
 -   (clobber (match_scratch:SI 0 "=r"))]
 -  "TARGET_ARM && arm_arch6 && optimize_size"
 -  "muls%?\\t%0, %2, %1"
 -  [(set_attr "conds" "set")
 -   (set_attr "type" "muls")]
 -)
 -
  ;; Unnamed templates to match MLA instruction.
  
  (define_insn "*mulsi3addsi"
 @@ -1698,70 +1644,6 @@ (define_insn "*mulsi3addsi_v6"
     (set_attr "predicable" "yes")]
  )
  
 -(define_insn "*mulsi3addsi_compare0"
 -  [(set (reg:CC_NOOV CC_REGNUM)
 -       (compare:CC_NOOV
 -        (plus:SI (mult:SI
 -                  (match_operand:SI 2 "s_register_operand" "r,r,r,r")
 -                  (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
 -                 (match_operand:SI 3 "s_register_operand" "r,r,0,0"))
 -        (const_int 0)))
 -   (set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r")
 -       (plus:SI (mult:SI (match_dup 2) (match_dup 1))
 -                (match_dup 3)))]
 -  "TARGET_ARM && arm_arch6"
 -  "mlas%?\\t%0, %2, %1, %3"
 -  [(set_attr "conds" "set")
 -   (set_attr "type" "mlas")]
 -)
 -
 -(define_insn "*mulsi3addsi_compare0_v6"
 -  [(set (reg:CC_NOOV CC_REGNUM)
 -       (compare:CC_NOOV
 -        (plus:SI (mult:SI
 -                  (match_operand:SI 2 "s_register_operand" "r")
 -                  (match_operand:SI 1 "s_register_operand" "r"))
 -                 (match_operand:SI 3 "s_register_operand" "r"))
 -        (const_int 0)))
 -   (set (match_operand:SI 0 "s_register_operand" "=r")
 -       (plus:SI (mult:SI (match_dup 2) (match_dup 1))
 -                (match_dup 3)))]
 -  "TARGET_ARM && arm_arch6 && optimize_size"
 -  "mlas%?\\t%0, %2, %1, %3"
 -  [(set_attr "conds" "set")
 -   (set_attr "type" "mlas")]
 -)
 -
 -(define_insn "*mulsi3addsi_compare0_scratch"
 -  [(set (reg:CC_NOOV CC_REGNUM)
 -       (compare:CC_NOOV
 -        (plus:SI (mult:SI
 -                  (match_operand:SI 2 "s_register_operand" "r,r,r,r")
 -                  (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
 -                 (match_operand:SI 3 "s_register_operand" "?r,r,0,0"))
 -        (const_int 0)))
 -   (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))]
 -  "TARGET_ARM && !arm_arch6"
 -  "mlas%?\\t%0, %2, %1, %3"
 -  [(set_attr "conds" "set")
 -   (set_attr "type" "mlas")]
 -)
 -
 -(define_insn "*mulsi3addsi_compare0_scratch_v6"
 -  [(set (reg:CC_NOOV CC_REGNUM)
 -       (compare:CC_NOOV
 -        (plus:SI (mult:SI
 -                  (match_operand:SI 2 "s_register_operand" "r")
 -                  (match_operand:SI 1 "s_register_operand" "r"))
 -                 (match_operand:SI 3 "s_register_operand" "r"))
 -        (const_int 0)))
 -   (clobber (match_scratch:SI 0 "=r"))]
 -  "TARGET_ARM && arm_arch6 && optimize_size"
 -  "mlas%?\\t%0, %2, %1, %3"
 -  [(set_attr "conds" "set")
 -   (set_attr "type" "mlas")]
 -)
 -
  (define_insn "*mulsi3subsi"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
          (minus:SI
 diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
 index 6ccc875e2b4e7b8ce256e52da966dfe220c6f5d6..8e26689b66263e7304a0da6163ceccfb4483d3e7 100644
 --- a/gcc/config/arm/thumb2.md
 +++ b/gcc/config/arm/thumb2.md
 @@ -1381,31 +1381,6 @@ (define_insn "*thumb2_mulsi_short"
     (set_attr "length" "2")
     (set_attr "type" "muls")])
  
 -(define_insn "*thumb2_mulsi_short_compare0"
 -  [(set (reg:CC_NOOV CC_REGNUM)
 -        (compare:CC_NOOV
 -         (mult:SI (match_operand:SI 1 "register_operand" "%0")
 -                 (match_operand:SI 2 "register_operand" "l"))
 -         (const_int 0)))
 -   (set (match_operand:SI 0 "register_operand" "=l")
 -       (mult:SI (match_dup 1) (match_dup 2)))]
 -  "TARGET_THUMB2 && optimize_size"
 -  "muls\\t%0, %2, %0"
 -  [(set_attr "length" "2")
 -   (set_attr "type" "muls")])
 -
 -(define_insn "*thumb2_mulsi_short_compare0_scratch"
 -  [(set (reg:CC_NOOV CC_REGNUM)
 -        (compare:CC_NOOV
 -         (mult:SI (match_operand:SI 1 "register_operand" "%0")
 -                 (match_operand:SI 2 "register_operand" "l"))
 -         (const_int 0)))
 -   (clobber (match_scratch:SI 0 "=l"))]
 -  "TARGET_THUMB2 && optimize_size"
 -  "muls\\t%0, %2, %0"
 -  [(set_attr "length" "2")
 -   (set_attr "type" "muls")])
 -
  (define_insn "*thumb2_cbz"
    [(set (pc) (if_then_else
                (eq (match_operand:SI 0 "s_register_operand" "l,?r")
Kyrill Tkachov Sept. 18, 2019, 4:31 p.m. UTC | #2
Hi Wilco,

On 9/9/19 6:07 PM, Wilco Dijkstra wrote:
> ping
>
>
> Remove various MULS/MLAS patterns which are enabled when optimizing for
>  size.  However the codesize gain from these patterns is so minimal that
>  there is no point in keeping them.
>
I disagree. If they still trigger and generate better code than without 
we should keep them.

What kind of code is *common* varies greatly from user to user.

Thanks,

Kyrill


>  Bootstrap OK on armhf, regress passes.
>
>  ChangeLog:
>  2019-09-03  Wilco Dijkstra  <wdijkstr@arm.com>
>
>          * config/arm/arm.md (mulsi3_compare0): Remove pattern.
>          (mulsi3_compare0_v6): Likewise.
>          (mulsi_compare0_scratch): Likewise.
>          (mulsi_compare0_scratch_v6): Likewise.
>          (mulsi3addsi_compare0): Likewise.
>          (mulsi3addsi_compare0_v6): Likewise.
>          (mulsi3addsi_compare0_scratch): Likewise.
>          (mulsi3addsi_compare0_scratch_v6): Likewise.
>          * config/arm/thumb2.md (thumb2_mulsi_short_compare0): Remove 
> pattern.
>          (thumb2_mulsi_short_compare0_scratch): Likewise.
>
>  --
>  diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
>  index 
> 738d42fd164f117f1dec1108a824d984ccd70d09..66dafdc47b7cfc37c131764e482d47bcaab90538 
> 100644
>  --- a/gcc/config/arm/arm.md
>  +++ b/gcc/config/arm/arm.md
>  @@ -1618,60 +1618,6 @@ (define_insn "*arm_mulsi3_v6"
>      (set_attr "predicable_short_it" "yes,yes,no")]
>   )
>
>  -(define_insn "*mulsi3_compare0"
>  -  [(set (reg:CC_NOOV CC_REGNUM)
>  -       (compare:CC_NOOV (mult:SI
>  -                         (match_operand:SI 2 "s_register_operand" "r,r")
>  -                         (match_operand:SI 1 "s_register_operand" 
> "%0,r"))
>  -                        (const_int 0)))
>  -   (set (match_operand:SI 0 "s_register_operand" "=&r,&r")
>  -       (mult:SI (match_dup 2) (match_dup 1)))]
>  -  "TARGET_ARM && !arm_arch6"
>  -  "muls%?\\t%0, %2, %1"
>  -  [(set_attr "conds" "set")
>  -   (set_attr "type" "muls")]
>  -)
>  -
>  -(define_insn "*mulsi3_compare0_v6"
>  -  [(set (reg:CC_NOOV CC_REGNUM)
>  -       (compare:CC_NOOV (mult:SI
>  -                         (match_operand:SI 2 "s_register_operand" "r")
>  -                         (match_operand:SI 1 "s_register_operand" "r"))
>  -                        (const_int 0)))
>  -   (set (match_operand:SI 0 "s_register_operand" "=r")
>  -       (mult:SI (match_dup 2) (match_dup 1)))]
>  -  "TARGET_ARM && arm_arch6 && optimize_size"
>  -  "muls%?\\t%0, %2, %1"
>  -  [(set_attr "conds" "set")
>  -   (set_attr "type" "muls")]
>  -)
>  -
>  -(define_insn "*mulsi_compare0_scratch"
>  -  [(set (reg:CC_NOOV CC_REGNUM)
>  -       (compare:CC_NOOV (mult:SI
>  -                         (match_operand:SI 2 "s_register_operand" "r,r")
>  -                         (match_operand:SI 1 "s_register_operand" 
> "%0,r"))
>  -                        (const_int 0)))
>  -   (clobber (match_scratch:SI 0 "=&r,&r"))]
>  -  "TARGET_ARM && !arm_arch6"
>  -  "muls%?\\t%0, %2, %1"
>  -  [(set_attr "conds" "set")
>  -   (set_attr "type" "muls")]
>  -)
>  -
>  -(define_insn "*mulsi_compare0_scratch_v6"
>  -  [(set (reg:CC_NOOV CC_REGNUM)
>  -       (compare:CC_NOOV (mult:SI
>  -                         (match_operand:SI 2 "s_register_operand" "r")
>  -                         (match_operand:SI 1 "s_register_operand" "r"))
>  -                        (const_int 0)))
>  -   (clobber (match_scratch:SI 0 "=r"))]
>  -  "TARGET_ARM && arm_arch6 && optimize_size"
>  -  "muls%?\\t%0, %2, %1"
>  -  [(set_attr "conds" "set")
>  -   (set_attr "type" "muls")]
>  -)
>  -
>   ;; Unnamed templates to match MLA instruction.
>
>   (define_insn "*mulsi3addsi"
>  @@ -1698,70 +1644,6 @@ (define_insn "*mulsi3addsi_v6"
>      (set_attr "predicable" "yes")]
>   )
>
>  -(define_insn "*mulsi3addsi_compare0"
>  -  [(set (reg:CC_NOOV CC_REGNUM)
>  -       (compare:CC_NOOV
>  -        (plus:SI (mult:SI
>  -                  (match_operand:SI 2 "s_register_operand" "r,r,r,r")
>  -                  (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
>  -                 (match_operand:SI 3 "s_register_operand" "r,r,0,0"))
>  -        (const_int 0)))
>  -   (set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r")
>  -       (plus:SI (mult:SI (match_dup 2) (match_dup 1))
>  -                (match_dup 3)))]
>  -  "TARGET_ARM && arm_arch6"
>  -  "mlas%?\\t%0, %2, %1, %3"
>  -  [(set_attr "conds" "set")
>  -   (set_attr "type" "mlas")]
>  -)
>  -
>  -(define_insn "*mulsi3addsi_compare0_v6"
>  -  [(set (reg:CC_NOOV CC_REGNUM)
>  -       (compare:CC_NOOV
>  -        (plus:SI (mult:SI
>  -                  (match_operand:SI 2 "s_register_operand" "r")
>  -                  (match_operand:SI 1 "s_register_operand" "r"))
>  -                 (match_operand:SI 3 "s_register_operand" "r"))
>  -        (const_int 0)))
>  -   (set (match_operand:SI 0 "s_register_operand" "=r")
>  -       (plus:SI (mult:SI (match_dup 2) (match_dup 1))
>  -                (match_dup 3)))]
>  -  "TARGET_ARM && arm_arch6 && optimize_size"
>  -  "mlas%?\\t%0, %2, %1, %3"
>  -  [(set_attr "conds" "set")
>  -   (set_attr "type" "mlas")]
>  -)
>  -
>  -(define_insn "*mulsi3addsi_compare0_scratch"
>  -  [(set (reg:CC_NOOV CC_REGNUM)
>  -       (compare:CC_NOOV
>  -        (plus:SI (mult:SI
>  -                  (match_operand:SI 2 "s_register_operand" "r,r,r,r")
>  -                  (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
>  -                 (match_operand:SI 3 "s_register_operand" "?r,r,0,0"))
>  -        (const_int 0)))
>  -   (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))]
>  -  "TARGET_ARM && !arm_arch6"
>  -  "mlas%?\\t%0, %2, %1, %3"
>  -  [(set_attr "conds" "set")
>  -   (set_attr "type" "mlas")]
>  -)
>  -
>  -(define_insn "*mulsi3addsi_compare0_scratch_v6"
>  -  [(set (reg:CC_NOOV CC_REGNUM)
>  -       (compare:CC_NOOV
>  -        (plus:SI (mult:SI
>  -                  (match_operand:SI 2 "s_register_operand" "r")
>  -                  (match_operand:SI 1 "s_register_operand" "r"))
>  -                 (match_operand:SI 3 "s_register_operand" "r"))
>  -        (const_int 0)))
>  -   (clobber (match_scratch:SI 0 "=r"))]
>  -  "TARGET_ARM && arm_arch6 && optimize_size"
>  -  "mlas%?\\t%0, %2, %1, %3"
>  -  [(set_attr "conds" "set")
>  -   (set_attr "type" "mlas")]
>  -)
>  -
>   (define_insn "*mulsi3subsi"
>     [(set (match_operand:SI 0 "s_register_operand" "=r")
>           (minus:SI
>  diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
>  index 
> 6ccc875e2b4e7b8ce256e52da966dfe220c6f5d6..8e26689b66263e7304a0da6163ceccfb4483d3e7 
> 100644
>  --- a/gcc/config/arm/thumb2.md
>  +++ b/gcc/config/arm/thumb2.md
>  @@ -1381,31 +1381,6 @@ (define_insn "*thumb2_mulsi_short"
>      (set_attr "length" "2")
>      (set_attr "type" "muls")])
>
>  -(define_insn "*thumb2_mulsi_short_compare0"
>  -  [(set (reg:CC_NOOV CC_REGNUM)
>  -        (compare:CC_NOOV
>  -         (mult:SI (match_operand:SI 1 "register_operand" "%0")
>  -                 (match_operand:SI 2 "register_operand" "l"))
>  -         (const_int 0)))
>  -   (set (match_operand:SI 0 "register_operand" "=l")
>  -       (mult:SI (match_dup 1) (match_dup 2)))]
>  -  "TARGET_THUMB2 && optimize_size"
>  -  "muls\\t%0, %2, %0"
>  -  [(set_attr "length" "2")
>  -   (set_attr "type" "muls")])
>  -
>  -(define_insn "*thumb2_mulsi_short_compare0_scratch"
>  -  [(set (reg:CC_NOOV CC_REGNUM)
>  -        (compare:CC_NOOV
>  -         (mult:SI (match_operand:SI 1 "register_operand" "%0")
>  -                 (match_operand:SI 2 "register_operand" "l"))
>  -         (const_int 0)))
>  -   (clobber (match_scratch:SI 0 "=l"))]
>  -  "TARGET_THUMB2 && optimize_size"
>  -  "muls\\t%0, %2, %0"
>  -  [(set_attr "length" "2")
>  -   (set_attr "type" "muls")])
>  -
>   (define_insn "*thumb2_cbz"
>     [(set (pc) (if_then_else
>                 (eq (match_operand:SI 0 "s_register_operand" "l,?r")
Richard Earnshaw (lists) Sept. 19, 2019, 9:52 a.m. UTC | #3
On 18/09/2019 17:31, Kyrill Tkachov wrote:
> Hi Wilco,
> 
> On 9/9/19 6:07 PM, Wilco Dijkstra wrote:
>> ping
>>
>>
>> Remove various MULS/MLAS patterns which are enabled when optimizing for
>>  size.  However the codesize gain from these patterns is so minimal that
>>  there is no point in keeping them.
>>
> I disagree. If they still trigger and generate better code than without 
> we should keep them.
> 
> What kind of code is *common* varies greatly from user to user.

Also, the main reason for restricting their use was that in the 'olden 
days', when we had multi-cycle implementations of the multiply 
instructions with short-circuit fast termination when the result was 
completed, the flag setting variants would never short-circuit.

These days we have fixed cycle counts for multiply instructions, so this 
is no-longer a penalty.  In the thumb2 case in particular we can often 
reduce mul-cmp (6 bytes) to muls (2 bytes), that's a 66% saving on this 
sequence and definitely worth exploiting when we can, even if it's not 
all that common.

In fact, give the latest architectural implementations, I think we 
should look again at re-enabling these when doing normal optimization as 
well.

R.

> 
> Thanks,
> 
> Kyrill
> 
> 
>>  Bootstrap OK on armhf, regress passes.
>>
>>  ChangeLog:
>>  2019-09-03  Wilco Dijkstra  <wdijkstr@arm.com>
>>
>>          * config/arm/arm.md (mulsi3_compare0): Remove pattern.
>>          (mulsi3_compare0_v6): Likewise.
>>          (mulsi_compare0_scratch): Likewise.
>>          (mulsi_compare0_scratch_v6): Likewise.
>>          (mulsi3addsi_compare0): Likewise.
>>          (mulsi3addsi_compare0_v6): Likewise.
>>          (mulsi3addsi_compare0_scratch): Likewise.
>>          (mulsi3addsi_compare0_scratch_v6): Likewise.
>>          * config/arm/thumb2.md (thumb2_mulsi_short_compare0): Remove 
>> pattern.
>>          (thumb2_mulsi_short_compare0_scratch): Likewise.
>>
>>  --
>>  diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
>>  index 
>> 738d42fd164f117f1dec1108a824d984ccd70d09..66dafdc47b7cfc37c131764e482d47bcaab90538 
>> 100644
>>  --- a/gcc/config/arm/arm.md
>>  +++ b/gcc/config/arm/arm.md
>>  @@ -1618,60 +1618,6 @@ (define_insn "*arm_mulsi3_v6"
>>      (set_attr "predicable_short_it" "yes,yes,no")]
>>   )
>>
>>  -(define_insn "*mulsi3_compare0"
>>  -  [(set (reg:CC_NOOV CC_REGNUM)
>>  -       (compare:CC_NOOV (mult:SI
>>  -                         (match_operand:SI 2 "s_register_operand" 
>> "r,r")
>>  -                         (match_operand:SI 1 "s_register_operand" 
>> "%0,r"))
>>  -                        (const_int 0)))
>>  -   (set (match_operand:SI 0 "s_register_operand" "=&r,&r")
>>  -       (mult:SI (match_dup 2) (match_dup 1)))]
>>  -  "TARGET_ARM && !arm_arch6"
>>  -  "muls%?\\t%0, %2, %1"
>>  -  [(set_attr "conds" "set")
>>  -   (set_attr "type" "muls")]
>>  -)
>>  -
>>  -(define_insn "*mulsi3_compare0_v6"
>>  -  [(set (reg:CC_NOOV CC_REGNUM)
>>  -       (compare:CC_NOOV (mult:SI
>>  -                         (match_operand:SI 2 "s_register_operand" "r")
>>  -                         (match_operand:SI 1 "s_register_operand" "r"))
>>  -                        (const_int 0)))
>>  -   (set (match_operand:SI 0 "s_register_operand" "=r")
>>  -       (mult:SI (match_dup 2) (match_dup 1)))]
>>  -  "TARGET_ARM && arm_arch6 && optimize_size"
>>  -  "muls%?\\t%0, %2, %1"
>>  -  [(set_attr "conds" "set")
>>  -   (set_attr "type" "muls")]
>>  -)
>>  -
>>  -(define_insn "*mulsi_compare0_scratch"
>>  -  [(set (reg:CC_NOOV CC_REGNUM)
>>  -       (compare:CC_NOOV (mult:SI
>>  -                         (match_operand:SI 2 "s_register_operand" 
>> "r,r")
>>  -                         (match_operand:SI 1 "s_register_operand" 
>> "%0,r"))
>>  -                        (const_int 0)))
>>  -   (clobber (match_scratch:SI 0 "=&r,&r"))]
>>  -  "TARGET_ARM && !arm_arch6"
>>  -  "muls%?\\t%0, %2, %1"
>>  -  [(set_attr "conds" "set")
>>  -   (set_attr "type" "muls")]
>>  -)
>>  -
>>  -(define_insn "*mulsi_compare0_scratch_v6"
>>  -  [(set (reg:CC_NOOV CC_REGNUM)
>>  -       (compare:CC_NOOV (mult:SI
>>  -                         (match_operand:SI 2 "s_register_operand" "r")
>>  -                         (match_operand:SI 1 "s_register_operand" "r"))
>>  -                        (const_int 0)))
>>  -   (clobber (match_scratch:SI 0 "=r"))]
>>  -  "TARGET_ARM && arm_arch6 && optimize_size"
>>  -  "muls%?\\t%0, %2, %1"
>>  -  [(set_attr "conds" "set")
>>  -   (set_attr "type" "muls")]
>>  -)
>>  -
>>   ;; Unnamed templates to match MLA instruction.
>>
>>   (define_insn "*mulsi3addsi"
>>  @@ -1698,70 +1644,6 @@ (define_insn "*mulsi3addsi_v6"
>>      (set_attr "predicable" "yes")]
>>   )
>>
>>  -(define_insn "*mulsi3addsi_compare0"
>>  -  [(set (reg:CC_NOOV CC_REGNUM)
>>  -       (compare:CC_NOOV
>>  -        (plus:SI (mult:SI
>>  -                  (match_operand:SI 2 "s_register_operand" "r,r,r,r")
>>  -                  (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
>>  -                 (match_operand:SI 3 "s_register_operand" "r,r,0,0"))
>>  -        (const_int 0)))
>>  -   (set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r")
>>  -       (plus:SI (mult:SI (match_dup 2) (match_dup 1))
>>  -                (match_dup 3)))]
>>  -  "TARGET_ARM && arm_arch6"
>>  -  "mlas%?\\t%0, %2, %1, %3"
>>  -  [(set_attr "conds" "set")
>>  -   (set_attr "type" "mlas")]
>>  -)
>>  -
>>  -(define_insn "*mulsi3addsi_compare0_v6"
>>  -  [(set (reg:CC_NOOV CC_REGNUM)
>>  -       (compare:CC_NOOV
>>  -        (plus:SI (mult:SI
>>  -                  (match_operand:SI 2 "s_register_operand" "r")
>>  -                  (match_operand:SI 1 "s_register_operand" "r"))
>>  -                 (match_operand:SI 3 "s_register_operand" "r"))
>>  -        (const_int 0)))
>>  -   (set (match_operand:SI 0 "s_register_operand" "=r")
>>  -       (plus:SI (mult:SI (match_dup 2) (match_dup 1))
>>  -                (match_dup 3)))]
>>  -  "TARGET_ARM && arm_arch6 && optimize_size"
>>  -  "mlas%?\\t%0, %2, %1, %3"
>>  -  [(set_attr "conds" "set")
>>  -   (set_attr "type" "mlas")]
>>  -)
>>  -
>>  -(define_insn "*mulsi3addsi_compare0_scratch"
>>  -  [(set (reg:CC_NOOV CC_REGNUM)
>>  -       (compare:CC_NOOV
>>  -        (plus:SI (mult:SI
>>  -                  (match_operand:SI 2 "s_register_operand" "r,r,r,r")
>>  -                  (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
>>  -                 (match_operand:SI 3 "s_register_operand" "?r,r,0,0"))
>>  -        (const_int 0)))
>>  -   (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))]
>>  -  "TARGET_ARM && !arm_arch6"
>>  -  "mlas%?\\t%0, %2, %1, %3"
>>  -  [(set_attr "conds" "set")
>>  -   (set_attr "type" "mlas")]
>>  -)
>>  -
>>  -(define_insn "*mulsi3addsi_compare0_scratch_v6"
>>  -  [(set (reg:CC_NOOV CC_REGNUM)
>>  -       (compare:CC_NOOV
>>  -        (plus:SI (mult:SI
>>  -                  (match_operand:SI 2 "s_register_operand" "r")
>>  -                  (match_operand:SI 1 "s_register_operand" "r"))
>>  -                 (match_operand:SI 3 "s_register_operand" "r"))
>>  -        (const_int 0)))
>>  -   (clobber (match_scratch:SI 0 "=r"))]
>>  -  "TARGET_ARM && arm_arch6 && optimize_size"
>>  -  "mlas%?\\t%0, %2, %1, %3"
>>  -  [(set_attr "conds" "set")
>>  -   (set_attr "type" "mlas")]
>>  -)
>>  -
>>   (define_insn "*mulsi3subsi"
>>     [(set (match_operand:SI 0 "s_register_operand" "=r")
>>           (minus:SI
>>  diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
>>  index 
>> 6ccc875e2b4e7b8ce256e52da966dfe220c6f5d6..8e26689b66263e7304a0da6163ceccfb4483d3e7 
>> 100644
>>  --- a/gcc/config/arm/thumb2.md
>>  +++ b/gcc/config/arm/thumb2.md
>>  @@ -1381,31 +1381,6 @@ (define_insn "*thumb2_mulsi_short"
>>      (set_attr "length" "2")
>>      (set_attr "type" "muls")])
>>
>>  -(define_insn "*thumb2_mulsi_short_compare0"
>>  -  [(set (reg:CC_NOOV CC_REGNUM)
>>  -        (compare:CC_NOOV
>>  -         (mult:SI (match_operand:SI 1 "register_operand" "%0")
>>  -                 (match_operand:SI 2 "register_operand" "l"))
>>  -         (const_int 0)))
>>  -   (set (match_operand:SI 0 "register_operand" "=l")
>>  -       (mult:SI (match_dup 1) (match_dup 2)))]
>>  -  "TARGET_THUMB2 && optimize_size"
>>  -  "muls\\t%0, %2, %0"
>>  -  [(set_attr "length" "2")
>>  -   (set_attr "type" "muls")])
>>  -
>>  -(define_insn "*thumb2_mulsi_short_compare0_scratch"
>>  -  [(set (reg:CC_NOOV CC_REGNUM)
>>  -        (compare:CC_NOOV
>>  -         (mult:SI (match_operand:SI 1 "register_operand" "%0")
>>  -                 (match_operand:SI 2 "register_operand" "l"))
>>  -         (const_int 0)))
>>  -   (clobber (match_scratch:SI 0 "=l"))]
>>  -  "TARGET_THUMB2 && optimize_size"
>>  -  "muls\\t%0, %2, %0"
>>  -  [(set_attr "length" "2")
>>  -   (set_attr "type" "muls")])
>>  -
>>   (define_insn "*thumb2_cbz"
>>     [(set (pc) (if_then_else
>>                 (eq (match_operand:SI 0 "s_register_operand" "l,?r")
Wilco Dijkstra Sept. 19, 2019, 4:46 p.m. UTC | #4
Hi Richard, Kyrill,

>> I disagree. If they still trigger and generate better code than without 
>> we should keep them.
> 
>> What kind of code is *common* varies greatly from user to user.

Not really - doing a multiply and checking whether the result is zero is
exceedingly rare. I found only 3 cases out of 7300 mul/mla in all of
SPEC2006... Overall codesize effect with -Os: 28 bytes or 0.00045%.

So we really should not even consider wasting any more time on
maintaining such useless patterns.

> Also, the main reason for restricting their use was that in the 'olden 
> days', when we had multi-cycle implementations of the multiply 
> instructions with short-circuit fast termination when the result was 
> completed, the flag setting variants would never short-circuit.

That only applied to conditional multiplies IIRC, some implementations
would not early-terminate if the condition failed. Today there are serious
penalties for conditional multiplies - but that's something to address in a
different patch.

> These days we have fixed cycle counts for multiply instructions, so this 
> is no-longer a penalty.  

No, there is a large overhead on modern cores when you set the flags,
and there are other penalties due to the extra micro-ops.

> In the thumb2 case in particular we can often 
> reduce mul-cmp (6 bytes) to muls (2 bytes), that's a 66% saving on this 
> sequence and definitely worth exploiting when we can, even if it's not 
> all that common.

Using muls+cbz is equally small. With my patch we generate this with -Os:

void g(void);
int f(int x)
{ 
  if (x * x != 0)
    g();
}

f:
	muls	r0, r0, r0
	push	{r3, lr}
	cbz	r0, .L9
	bl	g
.L9:
	pop	{r3, pc}

Wilco
Wilco Dijkstra Oct. 10, 2019, 5:18 p.m. UTC | #5
Any further comments? Note GCC doesn't support S/UMULLS either since it is equally
useless. It's no surprise that Thumb-2 removed support for flag-setting 64-bit multiplies,
while AArch64 didn't add flag-setting multiplies. So there is no argument that these
instructions are in any way useful to compilers.

Wilco

Hi Richard, Kyrill,

>> I disagree. If they still trigger and generate better code than without 
>> we should keep them.
> 
>> What kind of code is *common* varies greatly from user to user.

Not really - doing a multiply and checking whether the result is zero is
exceedingly rare. I found only 3 cases out of 7300 mul/mla in all of
SPEC2006... Overall codesize effect with -Os: 28 bytes or 0.00045%.

So we really should not even consider wasting any more time on
maintaining such useless patterns.

> Also, the main reason for restricting their use was that in the 'olden 
> days', when we had multi-cycle implementations of the multiply 
> instructions with short-circuit fast termination when the result was 
> completed, the flag setting variants would never short-circuit.

That only applied to conditional multiplies IIRC, some implementations
would not early-terminate if the condition failed. Today there are serious
penalties for conditional multiplies - but that's something to address in a
different patch.

> These days we have fixed cycle counts for multiply instructions, so this 
> is no-longer a penalty.  

No, there is a large overhead on modern cores when you set the flags,
and there are other penalties due to the extra micro-ops.

> In the thumb2 case in particular we can often 
> reduce mul-cmp (6 bytes) to muls (2 bytes), that's a 66% saving on this 
> sequence and definitely worth exploiting when we can, even if it's not 
> all that common.

Using muls+cbz is equally small. With my patch we generate this with -Os:

void g(void);
int f(int x)
{
  if (x * x != 0)
    g();
}

f:
        muls    r0, r0, r0
        push    {r3, lr}
        cbz     r0, .L9
        bl      g
.L9:
        pop     {r3, pc}

Wilco
Wilco Dijkstra Feb. 4, 2020, 10:58 a.m. UTC | #6
Any further comments? Note GCC doesn't support S/UMULLS either since it is equally
useless. It's no surprise that Thumb-2 removed support for flag-setting 64-bit multiplies,
while AArch64 didn't add flag-setting multiplies. So there is no argument that these
instructions are in any way useful to compilers.

Wilco

Hi Richard, Kyrill,

>> I disagree. If they still trigger and generate better code than without 
>> we should keep them.
> 
>> What kind of code is *common* varies greatly from user to user.

Not really - doing a multiply and checking whether the result is zero is
exceedingly rare. I found only 3 cases out of 7300 mul/mla in all of
SPEC2006... Overall codesize effect with -Os: 28 bytes or 0.00045%.

So we really should not even consider wasting any more time on
maintaining such useless patterns.

> Also, the main reason for restricting their use was that in the 'olden 
> days', when we had multi-cycle implementations of the multiply 
> instructions with short-circuit fast termination when the result was 
> completed, the flag setting variants would never short-circuit.

That only applied to conditional multiplies IIRC, some implementations
would not early-terminate if the condition failed. Today there are serious
penalties for conditional multiplies - but that's something to address in a
different patch.

> These days we have fixed cycle counts for multiply instructions, so this 
> is no-longer a penalty.  

No, there is a large overhead on modern cores when you set the flags,
and there are other penalties due to the extra micro-ops.

> In the thumb2 case in particular we can often 
> reduce mul-cmp (6 bytes) to muls (2 bytes), that's a 66% saving on this 
> sequence and definitely worth exploiting when we can, even if it's not 
> all that common.

Using muls+cbz is equally small. With my patch we generate this with -Os:

void g(void);
int f(int x)
{
  if (x * x != 0)
    g();
}

f:
        muls    r0, r0, r0
        push    {r3, lr}
        cbz     r0, .L9
        bl      g
.L9:
        pop     {r3, pc}

Wilco
diff mbox series

Patch

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 738d42fd164f117f1dec1108a824d984ccd70d09..66dafdc47b7cfc37c131764e482d47bcaab90538 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1618,60 +1618,6 @@  (define_insn "*arm_mulsi3_v6"
    (set_attr "predicable_short_it" "yes,yes,no")]
 )
 
-(define_insn "*mulsi3_compare0"
-  [(set (reg:CC_NOOV CC_REGNUM)
-	(compare:CC_NOOV (mult:SI
-			  (match_operand:SI 2 "s_register_operand" "r,r")
-			  (match_operand:SI 1 "s_register_operand" "%0,r"))
-			 (const_int 0)))
-   (set (match_operand:SI 0 "s_register_operand" "=&r,&r")
-	(mult:SI (match_dup 2) (match_dup 1)))]
-  "TARGET_ARM && !arm_arch6"
-  "muls%?\\t%0, %2, %1"
-  [(set_attr "conds" "set")
-   (set_attr "type" "muls")]
-)
-
-(define_insn "*mulsi3_compare0_v6"
-  [(set (reg:CC_NOOV CC_REGNUM)
-	(compare:CC_NOOV (mult:SI
-			  (match_operand:SI 2 "s_register_operand" "r")
-			  (match_operand:SI 1 "s_register_operand" "r"))
-			 (const_int 0)))
-   (set (match_operand:SI 0 "s_register_operand" "=r")
-	(mult:SI (match_dup 2) (match_dup 1)))]
-  "TARGET_ARM && arm_arch6 && optimize_size"
-  "muls%?\\t%0, %2, %1"
-  [(set_attr "conds" "set")
-   (set_attr "type" "muls")]
-)
-
-(define_insn "*mulsi_compare0_scratch"
-  [(set (reg:CC_NOOV CC_REGNUM)
-	(compare:CC_NOOV (mult:SI
-			  (match_operand:SI 2 "s_register_operand" "r,r")
-			  (match_operand:SI 1 "s_register_operand" "%0,r"))
-			 (const_int 0)))
-   (clobber (match_scratch:SI 0 "=&r,&r"))]
-  "TARGET_ARM && !arm_arch6"
-  "muls%?\\t%0, %2, %1"
-  [(set_attr "conds" "set")
-   (set_attr "type" "muls")]
-)
-
-(define_insn "*mulsi_compare0_scratch_v6"
-  [(set (reg:CC_NOOV CC_REGNUM)
-	(compare:CC_NOOV (mult:SI
-			  (match_operand:SI 2 "s_register_operand" "r")
-			  (match_operand:SI 1 "s_register_operand" "r"))
-			 (const_int 0)))
-   (clobber (match_scratch:SI 0 "=r"))]
-  "TARGET_ARM && arm_arch6 && optimize_size"
-  "muls%?\\t%0, %2, %1"
-  [(set_attr "conds" "set")
-   (set_attr "type" "muls")]
-)
-
 ;; Unnamed templates to match MLA instruction.
 
 (define_insn "*mulsi3addsi"
@@ -1698,70 +1644,6 @@  (define_insn "*mulsi3addsi_v6"
    (set_attr "predicable" "yes")]
 )
 
-(define_insn "*mulsi3addsi_compare0"
-  [(set (reg:CC_NOOV CC_REGNUM)
-	(compare:CC_NOOV
-	 (plus:SI (mult:SI
-		   (match_operand:SI 2 "s_register_operand" "r,r,r,r")
-		   (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
-		  (match_operand:SI 3 "s_register_operand" "r,r,0,0"))
-	 (const_int 0)))
-   (set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r")
-	(plus:SI (mult:SI (match_dup 2) (match_dup 1))
-		 (match_dup 3)))]
-  "TARGET_ARM && arm_arch6"
-  "mlas%?\\t%0, %2, %1, %3"
-  [(set_attr "conds" "set")
-   (set_attr "type" "mlas")]
-)
-
-(define_insn "*mulsi3addsi_compare0_v6"
-  [(set (reg:CC_NOOV CC_REGNUM)
-	(compare:CC_NOOV
-	 (plus:SI (mult:SI
-		   (match_operand:SI 2 "s_register_operand" "r")
-		   (match_operand:SI 1 "s_register_operand" "r"))
-		  (match_operand:SI 3 "s_register_operand" "r"))
-	 (const_int 0)))
-   (set (match_operand:SI 0 "s_register_operand" "=r")
-	(plus:SI (mult:SI (match_dup 2) (match_dup 1))
-		 (match_dup 3)))]
-  "TARGET_ARM && arm_arch6 && optimize_size"
-  "mlas%?\\t%0, %2, %1, %3"
-  [(set_attr "conds" "set")
-   (set_attr "type" "mlas")]
-)
-
-(define_insn "*mulsi3addsi_compare0_scratch"
-  [(set (reg:CC_NOOV CC_REGNUM)
-	(compare:CC_NOOV
-	 (plus:SI (mult:SI
-		   (match_operand:SI 2 "s_register_operand" "r,r,r,r")
-		   (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
-		  (match_operand:SI 3 "s_register_operand" "?r,r,0,0"))
-	 (const_int 0)))
-   (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))]
-  "TARGET_ARM && !arm_arch6"
-  "mlas%?\\t%0, %2, %1, %3"
-  [(set_attr "conds" "set")
-   (set_attr "type" "mlas")]
-)
-
-(define_insn "*mulsi3addsi_compare0_scratch_v6"
-  [(set (reg:CC_NOOV CC_REGNUM)
-	(compare:CC_NOOV
-	 (plus:SI (mult:SI
-		   (match_operand:SI 2 "s_register_operand" "r")
-		   (match_operand:SI 1 "s_register_operand" "r"))
-		  (match_operand:SI 3 "s_register_operand" "r"))
-	 (const_int 0)))
-   (clobber (match_scratch:SI 0 "=r"))]
-  "TARGET_ARM && arm_arch6 && optimize_size"
-  "mlas%?\\t%0, %2, %1, %3"
-  [(set_attr "conds" "set")
-   (set_attr "type" "mlas")]
-)
-
 (define_insn "*mulsi3subsi"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(minus:SI
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 6ccc875e2b4e7b8ce256e52da966dfe220c6f5d6..8e26689b66263e7304a0da6163ceccfb4483d3e7 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1381,31 +1381,6 @@  (define_insn "*thumb2_mulsi_short"
    (set_attr "length" "2")
    (set_attr "type" "muls")])
 
-(define_insn "*thumb2_mulsi_short_compare0"
-  [(set (reg:CC_NOOV CC_REGNUM)
-        (compare:CC_NOOV
-         (mult:SI (match_operand:SI 1 "register_operand" "%0")
-	          (match_operand:SI 2 "register_operand" "l"))
-         (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=l")
-	(mult:SI (match_dup 1) (match_dup 2)))]
-  "TARGET_THUMB2 && optimize_size"
-  "muls\\t%0, %2, %0"
-  [(set_attr "length" "2")
-   (set_attr "type" "muls")])
-
-(define_insn "*thumb2_mulsi_short_compare0_scratch"
-  [(set (reg:CC_NOOV CC_REGNUM)
-        (compare:CC_NOOV
-         (mult:SI (match_operand:SI 1 "register_operand" "%0")
-	          (match_operand:SI 2 "register_operand" "l"))
-         (const_int 0)))
-   (clobber (match_scratch:SI 0 "=l"))]
-  "TARGET_THUMB2 && optimize_size"
-  "muls\\t%0, %2, %0"
-  [(set_attr "length" "2")
-   (set_attr "type" "muls")])
-
 (define_insn "*thumb2_cbz"
   [(set (pc) (if_then_else
 	      (eq (match_operand:SI 0 "s_register_operand" "l,?r")