diff mbox

RFC [1/2] divmod transform

Message ID CAAgBjM=tvq76zKQwUx1cb8kz37WJYV52RVA9P+Z7j7x3ekQjsw@mail.gmail.com
State New
Headers show

Commit Message

Prathamesh Kulkarni May 30, 2016, 5:55 a.m. UTC
On 27 May 2016 at 17:31, Richard Biener <rguenther@suse.de> wrote:
> On Fri, 27 May 2016, Prathamesh Kulkarni wrote:
>
>> On 27 May 2016 at 15:45, Richard Biener <rguenther@suse.de> wrote:
>> > On Wed, 25 May 2016, Prathamesh Kulkarni wrote:
>> >
>> >> On 25 May 2016 at 12:52, Richard Biener <rguenther@suse.de> wrote:
>> >> > On Tue, 24 May 2016, Prathamesh Kulkarni wrote:
>> >> >
>> >> >> On 24 May 2016 at 19:39, Richard Biener <rguenther@suse.de> wrote:
>> >> >> > On Tue, 24 May 2016, Prathamesh Kulkarni wrote:
>> >> >> >
>> >> >> >> On 24 May 2016 at 17:42, Richard Biener <rguenther@suse.de> wrote:
>> >> >> >> > On Tue, 24 May 2016, Prathamesh Kulkarni wrote:
>> >> >> >> >
>> >> >> >> >> On 23 May 2016 at 17:35, Richard Biener <richard.guenther@gmail.com> wrote:
>> >> >> >> >> > On Mon, May 23, 2016 at 10:58 AM, Prathamesh Kulkarni
>> >> >> >> >> > <prathamesh.kulkarni@linaro.org> wrote:
>> >> >> >> >> >> Hi,
>> >> >> >> >> >> I have updated my patch for divmod (attached), which was originally
>> >> >> >> >> >> based on Kugan's patch.
>> >> >> >> >> >> The patch transforms stmts with code TRUNC_DIV_EXPR and TRUNC_MOD_EXPR
>> >> >> >> >> >> having same operands to divmod representation, so we can cse computation of mod.
>> >> >> >> >> >>
>> >> >> >> >> >> t1 = a TRUNC_DIV_EXPR b;
>> >> >> >> >> >> t2 = a TRUNC_MOD_EXPR b
>> >> >> >> >> >> is transformed to:
>> >> >> >> >> >> complex_tmp = DIVMOD (a, b);
>> >> >> >> >> >> t1 = REALPART_EXPR (complex_tmp);
>> >> >> >> >> >> t2 = IMAGPART_EXPR (complex_tmp);
>> >> >> >> >> >>
>> >> >> >> >> >> * New hook divmod_expand_libfunc
>> >> >> >> >> >> The rationale for introducing the hook is that different targets have
>> >> >> >> >> >> incompatible calling conventions for divmod libfunc.
>> >> >> >> >> >> Currently three ports define divmod libfunc: c6x, spu and arm.
>> >> >> >> >> >> c6x and spu follow the convention of libgcc2.c:__udivmoddi4:
>> >> >> >> >> >> return quotient and store remainder in argument passed as pointer,
>> >> >> >> >> >> while the arm version takes two arguments and returns both
>> >> >> >> >> >> quotient and remainder having mode double the size of the operand mode.
>> >> >> >> >> >> The port should hence override the hook expand_divmod_libfunc
>> >> >> >> >> >> to generate call to target-specific divmod.
>> >> >> >> >> >> Ports should define this hook if:
>> >> >> >> >> >> a) The port does not have divmod or div insn for the given mode.
>> >> >> >> >> >> b) The port defines divmod libfunc for the given mode.
>> >> >> >> >> >> The default hook default_expand_divmod_libfunc() generates call
>> >> >> >> >> >> to libgcc2.c:__udivmoddi4 provided the operands are unsigned and
>> >> >> >> >> >> are of DImode.
>> >> >> >> >> >>
>> >> >> >> >> >> Patch passes bootstrap+test on x86_64-unknown-linux-gnu and
>> >> >> >> >> >> cross-tested on arm*-*-*.
>> >> >> >> >> >> Bootstrap+test in progress on arm-linux-gnueabihf.
>> >> >> >> >> >> Does this patch look OK ?
>> >> >> >> >> >
>> >> >> >> >> > diff --git a/gcc/targhooks.c b/gcc/targhooks.c
>> >> >> >> >> > index 6b4601b..e4a021a 100644
>> >> >> >> >> > --- a/gcc/targhooks.c
>> >> >> >> >> > +++ b/gcc/targhooks.c
>> >> >> >> >> > @@ -1965,4 +1965,31 @@ default_optab_supported_p (int, machine_mode,
>> >> >> >> >> > machine_mode, optimization_type)
>> >> >> >> >> >    return true;
>> >> >> >> >> >  }
>> >> >> >> >> >
>> >> >> >> >> > +void
>> >> >> >> >> > +default_expand_divmod_libfunc (bool unsignedp, machine_mode mode,
>> >> >> >> >> > +                              rtx op0, rtx op1,
>> >> >> >> >> > +                              rtx *quot_p, rtx *rem_p)
>> >> >> >> >> >
>> >> >> >> >> > functions need a comment.
>> >> >> >> >> >
>> >> >> >> >> > ISTR it was suggested that ARM change to libgcc2.c__udivmoddi4 style?  In that
>> >> >> >> >> > case we could avoid the target hook.
>> >> >> >> >> Well I would prefer adding the hook because that's more easier -;)
>> >> >> >> >> Would it be ok for now to go with the hook ?
>> >> >> >> >> >
>> >> >> >> >> > +      /* If target overrides expand_divmod_libfunc hook
>> >> >> >> >> > +        then perform divmod by generating call to the target-specifc divmod
>> >> >> >> >> > libfunc.  */
>> >> >> >> >> > +      if (targetm.expand_divmod_libfunc != default_expand_divmod_libfunc)
>> >> >> >> >> > +       return true;
>> >> >> >> >> > +
>> >> >> >> >> > +      /* Fall back to using libgcc2.c:__udivmoddi4.  */
>> >> >> >> >> > +      return (mode == DImode && unsignedp);
>> >> >> >> >> >
>> >> >> >> >> > I don't understand this - we know optab_libfunc returns non-NULL for 'mode'
>> >> >> >> >> > but still restrict this to DImode && unsigned?  Also if
>> >> >> >> >> > targetm.expand_divmod_libfunc
>> >> >> >> >> > is not the default we expect the target to handle all modes?
>> >> >> >> >> Ah indeed, the check for DImode is unnecessary.
>> >> >> >> >> However I suppose the check for unsignedp should be there,
>> >> >> >> >> since we want to generate call to __udivmoddi4 only if operand is unsigned ?
>> >> >> >> >
>> >> >> >> > The optab libfunc for sdivmod should be NULL in that case.
>> >> >> >> Ah indeed, thanks.
>> >> >> >> >
>> >> >> >> >> >
>> >> >> >> >> > That said - I expected the above piece to be simply a 'return true;' ;)
>> >> >> >> >> >
>> >> >> >> >> > Usually we use some can_expand_XXX helper in optabs.c to query if the target
>> >> >> >> >> > supports a specific operation (for example SImode divmod would use DImode
>> >> >> >> >> > divmod by means of widening operands - for the unsigned case of course).
>> >> >> >> >> Thanks for pointing out. So if a target does not support divmod
>> >> >> >> >> libfunc for a mode
>> >> >> >> >> but for a wider mode, then we could zero-extend operands to the wider-mode,
>> >> >> >> >> perform divmod on the wider-mode, and then cast result back to the
>> >> >> >> >> original mode.
>> >> >> >> >> I haven't done that in this patch, would it be OK to do that as a follow up ?
>> >> >> >> >
>> >> >> >> > I think that you should conservatively handle the div_optab query, thus if
>> >> >> >> > the target has a HW division in a wider mode don't use the divmod IFN.
>> >> >> >> > You'd simply iterate over GET_MODE_WIDER_MODE and repeat the
>> >> >> >> > if (optab_handler (div_optab, mode) != CODE_FOR_nothing) check, bailing
>> >> >> >> > out if that is available.
>> >> >> >> Done.
>> >> >> >> >
>> >> >> >> >> > +  /* Disable the transform if either is a constant, since
>> >> >> >> >> > division-by-constant
>> >> >> >> >> > +     may have specialized expansion.  */
>> >> >> >> >> > +  if (TREE_CONSTANT (op1) || TREE_CONSTANT (op2))
>> >> >> >> >> > +    return false;
>> >> >> >> >> >
>> >> >> >> >> > please use CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2)
>> >> >> >> >> >
>> >> >> >> >> > +  if (TYPE_OVERFLOW_TRAPS (type))
>> >> >> >> >> > +    return false;
>> >> >> >> >> >
>> >> >> >> >> > why's that?  Generally please first test cheap things (trapping, constant-ness)
>> >> >> >> >> > before checking expensive stuff (target_supports_divmod_p).
>> >> >> >> >> I added TYPE_OVERFLOW_TRAPS (type) based on your suggestion in:
>> >> >> >> >> https://www.mail-archive.com/gcc@gcc.gnu.org/msg78534.html
>> >> >> >> >> "When looking at TRUNC_DIV_EXPR you should also exclude
>> >> >> >> >> the case where TYPE_OVERFLOW_TRAPS (type) as that should
>> >> >> >> >> expand using the [su]divv optabs (no trapping overflow
>> >> >> >> >> divmod optab exists)."
>> >> >> >> >
>> >> >> >> > Ok, didn't remember that.
>> >> >> >> >
>> >> >> >> >> >
>> >> >> >> >> > +static bool
>> >> >> >> >> > +convert_to_divmod (gassign *stmt)
>> >> >> >> >> > +{
>> >> >> >> >> > +  if (!divmod_candidate_p (stmt))
>> >> >> >> >> > +    return false;
>> >> >> >> >> > +
>> >> >> >> >> > +  tree op1 = gimple_assign_rhs1 (stmt);
>> >> >> >> >> > +  tree op2 = gimple_assign_rhs2 (stmt);
>> >> >> >> >> > +
>> >> >> >> >> > +  vec<gimple *> stmts = vNULL;
>> >> >> >> >> >
>> >> >> >> >> > use an auto_vec <gimple *> - you currently leak it in at least one place.
>> >> >> >> >> >
>> >> >> >> >> > +      if (maybe_clean_or_replace_eh_stmt (use_stmt, use_stmt))
>> >> >> >> >> > +       cfg_changed = true;
>> >> >> >> >> >
>> >> >> >> >> > note that this suggests you should check whether any of the stmts may throw
>> >> >> >> >> > internally as you don't update / transfer EH info correctly.  So for 'stmt' and
>> >> >> >> >> > all 'use_stmt' check stmt_can_throw_internal and if so do not add it to
>> >> >> >> >> > the list of stmts to modify.
>> >> >> >> >> >
>> >> >> >> >> > Btw, I think you should not add 'stmt' immediately but when iterating over
>> >> >> >> >> > all uses also gather uses in TRUNC_MOD_EXPR.
>> >> >> >> >> >
>> >> >> >> >> > Otherwise looks ok.
>> >> >> >> >> Done changes in this version. I am gathering mod uses same time as div uses,
>> >> >> >> >> so this imposes a constraint that mod dominates mod. I am not sure if
>> >> >> >> >> that's desirable.
>> >> >> >> >
>> >> >> >> > I think you also need a mod_seen variable now that you don't necessarily
>> >> >> >> > end up with 'stmt' in the vector of stmts.  I don't see how there is a
>> >> >> >> > constraint that mod dominates mod - it's just that the top_stmt needs
>> >> >> >> > to dominate all other uses that can be replaced with replacing top_stmt
>> >> >> >> > with a divmod.  It's just that the actual stmt set we choose may now
>> >> >> >> > depend on immediate uses order which on a second thought is bad
>> >> >> >> > as immediate uses order could be affected by debug stmts ... hmm.
>> >> >> >> >
>> >> >> >> > To avoid this please re-add the code adding 'stmt' to stmts immediately
>> >> >> >> > and add a use_stmt != stmt check to the immediate use processing loop
>> >> >> >> > so that we don't end up adding it twice.
>> >> >> >> Well I wonder what will happen for the following case:
>> >> >> >> t1 = x / y;
>> >> >> >> if (cond)
>> >> >> >>   t2 = x % y;
>> >> >> >> else
>> >> >> >>   t3 = x % y;
>> >> >> >>
>> >> >> >> Assuming stmt == top_stmt is "t2 = x % y" and use_stmt is "t3 = x % y",
>> >> >> >> use_stmt will not get added to stmts vector, since top_stmt and
>> >> >> >> use_stmt are not in same bb,
>> >> >> >> and bb's containing top_stmt and use_stmt don't dominate each other.
>> >> >> >> Not sure if this is practical case (I assume fre will hoist mod
>> >> >> >> outside if-else?)
>> >> >> >>
>> >> >> >> Now that we immediately add stmt to stmts vector, I suppose mod_seen
>> >> >> >> shall not be required ?
>> >> >> >
>> >> >> > In that case mod_seen is not needed.  But the situation you say will
>> >> >> > still happen so I wonder if we'd need a better way of iterating over
>> >> >> > immediate uses, like first pushing all candidates into a worklist
>> >> >> > vector and then iterating over that until we find no more candidates.
>> >> >> >
>> >> >> > You can then also handle the case of more than one group of stmts
>> >> >> > (the pass currently doesn't iterate in any particular useful order
>> >> >> > over BBs).
>> >> >> IIUC, we want to perform the transform if:
>> >> >> i) there exists top_stmt with code trunc_div_expr/trunc_mod_expr and
>> >> >> having same operands as stmt.
>> >> >> ii) top_stmt dominates all other stmts with code
>> >> >> trunc_div_expr/trunc_mod_expr and having same operands as top_stmt.
>> >> >>
>> >> >> Firstly, we try to get to top_stmt if it exists, by iterating over uses of stmt,
>> >> >> and then iterate over all uses of top_stmt and add them to stmts vector
>> >> >> only if top_stmt dominates all the stmts with same operands as top_stmt
>> >> >> and have code trunc_div_expr/trunc_mod_expr.
>> >> >>
>> >> >> /* Get to top_stmt.  */
>> >> >> top_stmt = stmt;
>> >> >> top_bb = gimple_bb (stmt);
>> >> >>
>> >> >> FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
>> >> >> {
>> >> >>   if (use_stmt code is TRUNC_DIV_EXPR or TRUNC_MOD_EXPR
>> >> >>       && use_stmt has same operands as stmt)
>> >> >>     {
>> >> >>       if (gimple_bb (use_stmt) dominates top_bb)
>> >> >>         {
>> >> >>           top_bb = gimple_bb (use_stmt);
>> >> >>           top_stmt = use_stmt;
>> >> >>         }
>> >> >>       else if (gimple_bb (use_stmt) == top_stmt
>> >> >>                && gimple_uid (use_stmt) < top_stmt)
>> >> >>         top_stmt = use_stmt;
>> >> >>     }
>> >> >> }
>> >> >>
>> >> >> /* Speculatively consider top_stmt as dominating all other
>> >> >> div_expr/mod_expr stmts with same operands as stmt.  */
>> >> >> stmts.safe_push (top_stmt);
>> >> >>
>> >> >> /* Walk uses of top_stmt to ensure that all stmts are dominated by top_stmt.  */
>> >> >> top_op1 = gimple_assign_rhs1 (top_stmt);
>> >> >> FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
>> >> >> {
>> >> >>   if (use_stmt code is TRUNC_DIV_EXPR or TRUNC_MOD_EXPR
>> >> >>       && use_stmt has same operands as top_stmt)
>> >> >>     {
>> >> >>       if (use_stmt == top_stmt)
>> >> >>         continue;
>> >> >>
>> >> >>       /* No top_stmt exits, do not proceed with transform  */
>> >> >>       if (top_bb does not dominate gimple_bb (use_stmt))
>> >> >>         return false;
>> >> >>
>> >> >>       stmts.safe_push (use_stmt);
>> >> >>     }
>> >> >> }
>> >> >>
>> >> >> For the case:
>> >> >> t1 = x / y;
>> >> >> if (cond)
>> >> >>   t2 = x % y;
>> >> >> else
>> >> >>   t3 = x % y;
>> >> >>
>> >> >> Assuming stmt is "t2 = x % y", it will walk uses of stmt and set
>> >> >> top_stmt to "t1 = x / y"
>> >> >> Then it will walk all uses of top_stmt:
>> >> >> "t2 = x % y" -> dominated by top_stmt
>> >> >> "t3 = x % y" -> dominated by top_stmt
>> >> >> Since all stmts are dominated by top_stmt, we add all three stmts to
>> >> >> vector of stmts and proceed with transform.
>> >> >>
>> >> >> For the case where, top_stmt dominates original stmt but not all stmts:
>> >> >>
>> >> >> if (cond)
>> >> >>   t1 = x / y;
>> >> >> else
>> >> >> {
>> >> >>   t2 = x % y;
>> >> >>   return;
>> >> >> }
>> >> >>
>> >> >> t3 = x % y;
>> >> >>
>> >> >> Assuming stmt is "t3 = x % y",
>> >> >> Walking stmt uses will set top_stmt to "t1 = x / y";
>> >> >>
>> >> >> Walking immediate uses of top_stmt, we find that "t2 = x % y" is not
>> >> >> dominated by top_stmt,
>> >> >> and hence don't do the transform.
>> >> >>
>> >> >> Does this sound reasonable ?
>> >> >
>> >> > Yes, that's reasonable.
>> >> Thanks, I have attached patch that implements above approach.
>> >> Does it look OK ?
>> >
>> > Please start the top-stmt search with
>> >
>> >   top_stmt = stmt;
>> >   top_bb = gimple_bb (stmt);
>> >
>> > this makes sure to process all stmts via the IL walk in case
>> > the uses have multiple independent "dominated" trees.  This also
>> > simplifies the loop body (no need to check for NULL).  This also
>> > makes mod_seen always true and you can compute div_seen in that
>> > loop as well.
>> Done. Um I don't understand why setting top_stmt to NULL won't process
>> all stmts ? AFAIU it will have one extra iteration compared to
>> initializing top_stmt to stmt (since first iteration would initialize
>> top_stmt to stmt assuming stmt does not throw) ?
>
> If you have
>
>   if (cond)
>     {
>       r = x % y;
>       q = x / y;
>     }
>  else
>     {
>       r = x % y;
>       q = x / y;
>    }
>
> then the loop over the function might end up transforming the else
> block when visiting the then block modulo and thus it will never
> transform the then block.  Because you walk immediate uses which
> do not guarantee that you end up with a top_stmt related to the
> IL point you were coming from - the first iteration does _not_
> necessarily have use_stmt == stmt.
Thanks for the explanation,
I overlooked the fact that for first iteration use_stmt may not equal stmt -;)
>
>> >
>> > Otherwise looks ok now.
>> >
>> >> The patch does not still handle the following case:
>> >> int f(int x, int y)
>> >> {
>> >>   extern int cond;
>> >>   int q, r;
>> >>
>> >>   if (cond)
>> >>     q = x % y;
>> >>   else
>> >>     q = x % y;
>> >>
>> >>   r = x % y;
>> >>   return q + r;
>> >> }
>> >>
>> >> In above case although the mod stmt is not dominated by either div
>> >> stmt, I suppose the transform
>> >> is still possible by inserting DIVMOD (x, y) before if-else ?
>> >
>> > Yeah, same for sincos where doing this requires some LCM algorithm.
>> Well I don't have a good approach for this.
>> I was thinking, before doing the divmod transform, we could walk
>> GIMPLE_COND of "diamond" shape
>> (having both arms), and check "then" bb and "else" bb have same div or
>> mod stmts and in that case put an artificial
>> same stmt above GIMPLE_COND.
>>
>> So the above case would be transformed to:
>>
>> int tmp = x / y;  // artificial top_stmt
>> if (cond)
>>   q = x / y;
>> else
>>   q = x / y;
>>
>> r = x % y;
>> return q + r;
>>
>> and then the divmod transform will see "tmp = x / y" as the topmost stmt.
>> Since top_stmt is artificially introduced, we will replace that with DIVMOD ifn
>> rather than inserting DIVMOD ifn above top_stmt as in other cases.
>
> Yeah, but it is really a general missed optimization that should be not
> required for this transform.
The attached patch ICE's during bootstrap for x86_64, and is reproducible with
following case with -m32 -O2:

typedef long long type;

type f(type x, type y)
{
  type q = x / y;
  type r = x % y;
  return q + r;
}

The ICE happens because the test-case hits
gcc_assert (unsignedp);
in default_expand_divmod_libfunc ().

Surprisingly, optab_libfunc (sdivmod_optab, DImode) returns optab_libfunc
with name "__divmoddi4" although __divmoddi4() is nowhere defined in
libgcc for x86.
(I verified that by forcing the patch to generate call to __divmoddi4,
which results in undefined reference to __divmoddi4).

This happens because in optabs.def we have:
OPTAB_NL(sdivmod_optab, "divmod$a4", UNKNOWN, "divmod", '4', gen_int_libfunc)

and gen_int_libfunc generates "__divmoddi4" on first call to optab_libfunc
and sets optab_libfunc (sdivmod_optab, DImode) to "__divmoddi4".
I wonder if we should remove gen_int_libfunc entry in optabs.def for
sdivmod_optab ?

Thanks,
Prathamesh
>
> Richard.
>
>> >
>> >> For the following test-case, I am surprised why CSE didn't take place before
>> >> widening_mul pass ?
>> >>
>> >> int
>> >> f_1 (int x, int y)
>> >> {
>> >>   int q = x / y;
>> >>   int r1 = 0, r2 = 0;
>> >>   if (cond)
>> >>     r1 = x % y;
>> >>   else
>> >>     r2 = x % y;
>> >>   return q + r1 + r2;
>> >> }
>> >
>> > This is not CSE but code hoisting which is not implemented on GIMPLE
>> > (see PR23286)
>> Ah right, thanks for pointing out the PR.
>>
>> Thanks,
>> Prathamesh
>> >
>> >> The input to widening_mul pass is:
>> >> f_1 (int x, int y)
>> >> {
>> >>   int r2;
>> >>   int r1;
>> >>   int q;
>> >>   int cond.0_1;
>> >>   int _2;
>> >>   int _11;
>> >>
>> >>   <bb 2>:
>> >>   q_7 = x_5(D) / y_6(D);
>> >>   cond.0_1 = cond;
>> >>   if (cond.0_1 != 0)
>> >>     goto <bb 3>;
>> >>   else
>> >>     goto <bb 4>;
>> >>
>> >>   <bb 3>:
>> >>   r1_9 = x_5(D) % y_6(D);
>> >>   goto <bb 5>;
>> >>
>> >>   <bb 4>:
>> >>   r2_10 = x_5(D) % y_6(D);
>> >>
>> >>   <bb 5>:
>> >>   # r1_3 = PHI <r1_9(3), 0(4)>
>> >>   # r2_4 = PHI <0(3), r2_10(4)>
>> >>   _2 = r1_3 + q_7;
>> >>   _11 = _2 + r2_4;
>> >>   return _11;
>> >>
>> >> }
>> >>
>> >> Thanks,
>> >> Prathamesh
>> >> >
>> >> > Richard.
>> >> >
>> >> >> Thanks,
>> >> >> Prathamesh
>> >> >> >
>> >> >> > Richard.
>> >> >> >
>> >> >>
>> >> >>
>> >> >
>> >> > --
>> >> > Richard Biener <rguenther@suse.de>
>> >> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
>> >>
>> >
>> > --
>> > Richard Biener <rguenther@suse.de>
>> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
>>
>
> --
> Richard Biener <rguenther@suse.de>
> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

Comments

Richard Biener May 30, 2016, 7:45 a.m. UTC | #1
On Mon, 30 May 2016, Prathamesh Kulkarni wrote:

> The attached patch ICE's during bootstrap for x86_64, and is reproducible with
> following case with -m32 -O2:
> 
> typedef long long type;
> 
> type f(type x, type y)
> {
>   type q = x / y;
>   type r = x % y;
>   return q + r;
> }
> 
> The ICE happens because the test-case hits
> gcc_assert (unsignedp);
> in default_expand_divmod_libfunc ().

That's of course your function (and ICE).

> Surprisingly, optab_libfunc (sdivmod_optab, DImode) returns optab_libfunc
> with name "__divmoddi4" although __divmoddi4() is nowhere defined in
> libgcc for x86.
> (I verified that by forcing the patch to generate call to __divmoddi4,
> which results in undefined reference to __divmoddi4).
> 
> This happens because in optabs.def we have:
> OPTAB_NL(sdivmod_optab, "divmod$a4", UNKNOWN, "divmod", '4', gen_int_libfunc)
> 
> and gen_int_libfunc generates "__divmoddi4" on first call to optab_libfunc
> and sets optab_libfunc (sdivmod_optab, DImode) to "__divmoddi4".
> I wonder if we should remove gen_int_libfunc entry in optabs.def for
> sdivmod_optab ?

Hum, not sure - you might want to look at expand_divmod (though that
always just computes one part of the result in the end).

Joseph - do you know sth about why there's not a full set of divmod
libfuncs in libgcc?

Thanks,
Richard.
Joseph Myers June 3, 2016, 9:09 p.m. UTC | #2
On Mon, 30 May 2016, Richard Biener wrote:

> Joseph - do you know sth about why there's not a full set of divmod
> libfuncs in libgcc?

I'm not familiar with the choice of divmod libfuncs.
Jim Wilson June 3, 2016, 11:31 p.m. UTC | #3
On Mon, May 30, 2016 at 12:45 AM, Richard Biener <rguenther@suse.de> wrote:
> Joseph - do you know sth about why there's not a full set of divmod
> libfuncs in libgcc?

Because udivmoddi4 isn't a libfunc, it is a helper function for the
div and mov libfuncs.  Since we can compute the signed div and mod
results from udivmoddi4, there was no need to also add a signed
version of it.  It was given a libfunc style name so that we had the
option of making it a libfunc in the future, but that never happened.
There was no support for calling any divmod libfunc until it was added
as a special case to call an ARM library (not libgcc) function.  This
happened here

2004-08-09  Mark Mitchell  <mark@codesourcery.com>

        * config.gcc (arm*-*-eabi*): New target.
        * defaults.h (TARGET_LIBGCC_FUNCS): New macro.
        (TARGET_LIB_INT_CMP_BIASED): Likewise.
        * expmed.c (expand_divmod): Try a two-valued divmod function as a
        last resort.
        ...
        * config/arm/arm.c (arm_init_libfuncs): New function.
        (arm_compute_initial_eliminatino_offset): Return HOST_WIDE_INT.
        (TARGET_INIT_LIBFUNCS): Define it.
        ...

Later, two ports added their own divmod libfuncs, but I don't see any
evidence that they were ever used, since there is no support for
calling divmod other than the expand_divmod last resort code that only
triggers for ARM.

It is only now that Prathamesh is adding gimple support for divmod
operations that we need to worry about getting this right, without
breaking the existing ARM library support or the existing udivmoddi4
support.

Jim
Richard Biener June 8, 2016, 2:23 p.m. UTC | #4
On Fri, 3 Jun 2016, Jim Wilson wrote:

> On Mon, May 30, 2016 at 12:45 AM, Richard Biener <rguenther@suse.de> wrote:
> > Joseph - do you know sth about why there's not a full set of divmod
> > libfuncs in libgcc?
> 
> Because udivmoddi4 isn't a libfunc, it is a helper function for the
> div and mov libfuncs.  Since we can compute the signed div and mod
> results from udivmoddi4, there was no need to also add a signed
> version of it.  It was given a libfunc style name so that we had the
> option of making it a libfunc in the future, but that never happened.
> There was no support for calling any divmod libfunc until it was added
> as a special case to call an ARM library (not libgcc) function.  This
> happened here
> 
> 2004-08-09  Mark Mitchell  <mark@codesourcery.com>
> 
>         * config.gcc (arm*-*-eabi*): New target.
>         * defaults.h (TARGET_LIBGCC_FUNCS): New macro.
>         (TARGET_LIB_INT_CMP_BIASED): Likewise.
>         * expmed.c (expand_divmod): Try a two-valued divmod function as a
>         last resort.
>         ...
>         * config/arm/arm.c (arm_init_libfuncs): New function.
>         (arm_compute_initial_eliminatino_offset): Return HOST_WIDE_INT.
>         (TARGET_INIT_LIBFUNCS): Define it.
>         ...
> 
> Later, two ports added their own divmod libfuncs, but I don't see any
> evidence that they were ever used, since there is no support for
> calling divmod other than the expand_divmod last resort code that only
> triggers for ARM.
> 
> It is only now that Prathamesh is adding gimple support for divmod
> operations that we need to worry about getting this right, without
> breaking the existing ARM library support or the existing udivmoddi4
> support.

Ok, so as he is primarily targeting the special arm divmod libcall
I suppose we can live with special-casing libcall handling to
udivmoddi3.  It would be nice to not lie about divmod availablilty
as libcall though... - it looks like the libcall is also guarded
on TARGET_HAS_NO_HW_DIVIDE (unless it was available historically
like on x86).

So not sure where to go from here.

Richard.
diff mbox

Patch

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 12060ba..1310006 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -61,6 +61,7 @@ 
 #include "builtins.h"
 #include "tm-constrs.h"
 #include "rtl-iter.h"
+#include "optabs-libfuncs.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -300,6 +301,7 @@  static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 				     const_tree);
+static void arm_expand_divmod_libfunc (bool, machine_mode, rtx, rtx, rtx *, rtx *);
 
 
 /* Table of machine attributes.  */
@@ -730,6 +732,9 @@  static const struct attribute_spec arm_attribute_table[] =
 #undef TARGET_SCHED_FUSION_PRIORITY
 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 
+#undef TARGET_EXPAND_DIVMOD_LIBFUNC
+#define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Obstack for minipool constant handling.  */
@@ -30354,6 +30359,37 @@  arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
   return;
 }
 
+/* Expand call to __aeabi_[mode]divmod (op0, op1).  */
+
+static void
+arm_expand_divmod_libfunc (bool unsignedp, machine_mode mode,
+			   rtx op0, rtx op1,
+			   rtx *quot_p, rtx *rem_p)
+{
+  if (mode == SImode)
+    gcc_assert (!TARGET_IDIV);
+
+  optab tab = (unsignedp) ? udivmod_optab : sdivmod_optab;
+  rtx libfunc = optab_libfunc (tab, mode);
+  gcc_assert (libfunc);
+
+  machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
+						     MODE_INT);
+
+  rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
+					libval_mode, 2,
+					op0, GET_MODE (op0),
+					op1, GET_MODE (op1));
+
+  rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
+  rtx remainder = simplify_gen_subreg (mode, libval, libval_mode, GET_MODE_SIZE (mode));
+
+  gcc_assert (quotient);
+  gcc_assert (remainder);
+  
+  *quot_p = quotient;
+  *rem_p = remainder;
+}
 
 /* Construct and return a PARALLEL RTX vector with elements numbering the
    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 8c7f2a1..111f19f 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6963,6 +6963,12 @@  This is firstly introduced on ARM/AArch64 targets, please refer to
 the hook implementation for how different fusion types are supported.
 @end deftypefn
 
+@deftypefn {Target Hook} void TARGET_EXPAND_DIVMOD_LIBFUNC (bool @var{unsignedp}, machine_mode @var{mode}, @var{rtx}, @var{rtx}, rtx *@var{quot}, rtx *@var{rem})
+Define this hook if the port does not have hardware div and divmod insn for
+the given mode but has divmod libfunc, which is incompatible
+with libgcc2.c:__udivmoddi4
+@end deftypefn
+
 @node Sections
 @section Dividing the Output into Sections (Texts, Data, @dots{})
 @c the above section title is WAY too long.  maybe cut the part between
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f963a58..2c9a800 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4848,6 +4848,8 @@  them: try the first ones in this list first.
 
 @hook TARGET_SCHED_FUSION_PRIORITY
 
+@hook TARGET_EXPAND_DIVMOD_LIBFUNC
+
 @node Sections
 @section Dividing the Output into Sections (Texts, Data, @dots{})
 @c the above section title is WAY too long.  maybe cut the part between
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index c867ddc..0cb59f7 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -2276,6 +2276,48 @@  multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_mask_store_optab_supported_p direct_optab_supported_p
 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
 
+/* Expand DIVMOD() using:
+ a) optab handler for udivmod/sdivmod if it is available.
+ b) If optab_handler doesn't exist, Generate call to
+    optab_libfunc for udivmod/sdivmod.  */
+
+static void 
+expand_DIVMOD (internal_fn, gcall *stmt)
+{
+  tree lhs = gimple_call_lhs (stmt);
+  tree arg0 = gimple_call_arg (stmt, 0);
+  tree arg1 = gimple_call_arg (stmt, 1);
+
+  gcc_assert (TREE_CODE (TREE_TYPE (lhs)) == COMPLEX_TYPE);
+  tree type = TREE_TYPE (TREE_TYPE (lhs));
+  machine_mode mode = TYPE_MODE (type);
+  bool unsignedp = TYPE_UNSIGNED (type);
+  optab tab = (unsignedp) ? udivmod_optab : sdivmod_optab;
+
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+
+  rtx quotient, remainder;
+
+  /* Check if optab handler exists for [u]divmod.  */
+  if (optab_handler (tab, mode) != CODE_FOR_nothing)
+    {
+      quotient = gen_reg_rtx (mode);
+      remainder = gen_reg_rtx (mode);
+      expand_twoval_binop (tab, op0, op1, quotient, remainder, unsignedp);
+    }
+  else
+    targetm.expand_divmod_libfunc (unsignedp, mode, op0, op1,
+				   &quotient, &remainder);
+
+  /* Wrap the return value (quotient, remainder) within COMPLEX_EXPR.  */
+  expand_expr (build2 (COMPLEX_EXPR, TREE_TYPE (lhs),
+		       make_tree (TREE_TYPE (arg0), quotient),
+		       make_tree (TREE_TYPE (arg1), remainder)),
+	       target, VOIDmode, EXPAND_NORMAL);
+}
+
 /* Return true if FN is supported for the types in TYPES when the
    optimization type is OPT_TYPE.  The types are those associated with
    the "type0" and "type1" fields of FN's direct_internal_fn_info
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index e729d85..56a80f1 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -194,6 +194,9 @@  DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL)
 
+/* Divmod function.  */
+DEF_INTERNAL_FN (DIVMOD, ECF_CONST | ECF_LEAF, NULL)
+
 #undef DEF_INTERNAL_INT_FN
 #undef DEF_INTERNAL_FLT_FN
 #undef DEF_INTERNAL_OPTAB_FN
diff --git a/gcc/target.def b/gcc/target.def
index 6392e73..4496f9a 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4948,6 +4948,16 @@  Normally, this is not needed.",
  bool, (const_tree field, machine_mode mode),
  default_member_type_forces_blk)
 
+/* See tree-ssa-math-opts.c:divmod_candidate_p for conditions that gate
+   the divmod transform.  */
+DEFHOOK
+(expand_divmod_libfunc,
+ "Define this hook if the port does not have hardware div and divmod insn for\n\
+the given mode but has divmod libfunc, which is incompatible\n\
+with libgcc2.c:__udivmoddi4",
+ void, (bool unsignedp, machine_mode mode, rtx, rtx, rtx *quot, rtx *rem),
+ default_expand_divmod_libfunc)
+
 /* Return the class for a secondary reload, and fill in extra information.  */
 DEFHOOK
 (secondary_reload,
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 6b4601b..20327a6 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1965,4 +1965,31 @@  default_optab_supported_p (int, machine_mode, machine_mode, optimization_type)
   return true;
 }
 
+/* Generate call to
+   DImode __udivmoddi4 (DImode op0, DImode op1, DImode *rem).  */
+
+void
+default_expand_divmod_libfunc (bool unsignedp, machine_mode mode,
+			       rtx op0, rtx op1,
+			       rtx *quot_p, rtx *rem_p)
+{
+  gcc_assert (mode == DImode);
+  gcc_assert (unsignedp);
+
+  rtx libfunc = optab_libfunc (udivmod_optab, DImode);
+  gcc_assert (libfunc);
+
+  rtx remainder = assign_stack_temp (DImode, GET_MODE_SIZE (DImode));
+  rtx address = XEXP (remainder, 0);
+
+  rtx quotient = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
+					  DImode, 3,
+					  op0, GET_MODE (op0),
+					  op1, GET_MODE (op1),
+					  address, GET_MODE (address));
+
+  *quot_p = quotient;
+  *rem_p = remainder;
+}
+
 #include "gt-targhooks.h"
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 7687c39..dc5e8e7 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -254,4 +254,7 @@  extern void default_setup_incoming_vararg_bounds (cumulative_args_t ca ATTRIBUTE
 extern bool default_optab_supported_p (int, machine_mode, machine_mode,
 				       optimization_type);
 
+extern void default_expand_divmod_libfunc (bool, machine_mode,
+					   rtx, rtx, rtx *, rtx *);
+
 #endif /* GCC_TARGHOOKS_H */
diff --git a/gcc/testsuite/gcc.dg/divmod-1-simode.c b/gcc/testsuite/gcc.dg/divmod-1-simode.c
new file mode 100644
index 0000000..7405f66
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/divmod-1-simode.c
@@ -0,0 +1,22 @@ 
+/* { dg-require-effective-target divmod_simode } */
+/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */
+/* div dominates mod.  */
+
+extern int cond;
+void foo(void);
+
+#define FOO(smalltype, bigtype, no)  \
+bigtype f_##no(smalltype x, bigtype y) \
+{					 \
+  bigtype q = x / y;                     \
+  if (cond)                              \
+    foo ();                              \
+  bigtype r = x % y;                     \
+  return q + r;                          \
+}
+
+FOO(int, int, 1)
+FOO(int, unsigned, 2)
+FOO(unsigned, unsigned, 5)
+
+/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */
diff --git a/gcc/testsuite/gcc.dg/divmod-1.c b/gcc/testsuite/gcc.dg/divmod-1.c
new file mode 100644
index 0000000..40aec74
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/divmod-1.c
@@ -0,0 +1,26 @@ 
+/* { dg-require-effective-target divmod } */
+/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */
+/* div dominates mod.  */
+
+extern int cond;
+void foo(void);
+
+#define FOO(smalltype, bigtype, no)	 \
+bigtype f_##no(smalltype x, bigtype y)   \
+{					 \
+  bigtype q = x / y;                     \
+  if (cond)                              \
+    foo ();                              \
+  bigtype r = x % y;                     \
+  return q + r;                          \
+}
+
+FOO(int, long long, 3)
+FOO(int, unsigned long long, 4)
+FOO(unsigned, long long, 6)
+FOO(unsigned, unsigned long long, 7)
+FOO(long long, long long, 8)
+FOO(long long, unsigned long long, 9)
+FOO(unsigned long long, unsigned long long, 10)
+
+/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */
diff --git a/gcc/testsuite/gcc.dg/divmod-2-simode.c b/gcc/testsuite/gcc.dg/divmod-2-simode.c
new file mode 100644
index 0000000..7c8313b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/divmod-2-simode.c
@@ -0,0 +1,22 @@ 
+/* { dg-require-effective-target divmod_simode } */
+/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */
+/* mod dominates div.  */
+
+extern int cond;
+void foo(void);
+
+#define FOO(smalltype, bigtype, no)  \
+bigtype f_##no(smalltype x, bigtype y) \
+{					 \
+  bigtype r = x % y;                     \
+  if (cond)                              \
+    foo ();                              \
+  bigtype q = x / y;                     \
+  return q + r;                          \
+}
+
+FOO(int, int, 1)
+FOO(int, unsigned, 2)
+FOO(unsigned, unsigned, 5)
+
+/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */
diff --git a/gcc/testsuite/gcc.dg/divmod-2.c b/gcc/testsuite/gcc.dg/divmod-2.c
new file mode 100644
index 0000000..6a2216c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/divmod-2.c
@@ -0,0 +1,26 @@ 
+/* { dg-require-effective-target divmod } */
+/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */
+/* mod dominates div.  */
+
+extern int cond;
+void foo(void);
+
+#define FOO(smalltype, bigtype, no)	 \
+bigtype f_##no(smalltype x, bigtype y)   \
+{					 \
+  bigtype r = x % y;                     \
+  if (cond)                              \
+    foo ();                              \
+  bigtype q = x / y;                     \
+  return q + r;                          \
+}
+
+FOO(int, long long, 3)
+FOO(int, unsigned long long, 4)
+FOO(unsigned, long long, 6)
+FOO(unsigned, unsigned long long, 7)
+FOO(long long, long long, 8)
+FOO(long long, unsigned long long, 9)
+FOO(unsigned long long, unsigned long long, 10)
+
+/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */
diff --git a/gcc/testsuite/gcc.dg/divmod-3-simode.c b/gcc/testsuite/gcc.dg/divmod-3-simode.c
new file mode 100644
index 0000000..6f0f63d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/divmod-3-simode.c
@@ -0,0 +1,20 @@ 
+/* { dg-require-effective-target divmod_simode } */
+/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */
+/* div comes before mod in same bb.  */ 
+
+extern int cond;
+void foo(void);
+
+#define FOO(smalltype, bigtype, no)  	 \
+bigtype f_##no(smalltype x, bigtype y)	 \
+{					 \
+  bigtype q = x / y;                     \
+  bigtype r = x % y;                     \
+  return q + r;                          \
+}
+
+FOO(int, int, 1)
+FOO(int, unsigned, 2)
+FOO(unsigned, unsigned, 5)
+
+/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */
diff --git a/gcc/testsuite/gcc.dg/divmod-3.c b/gcc/testsuite/gcc.dg/divmod-3.c
new file mode 100644
index 0000000..9fe6f64
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/divmod-3.c
@@ -0,0 +1,24 @@ 
+/* { dg-require-effective-target divmod } */
+/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */
+/* div comes before mod in same bb.  */ 
+
+extern int cond;
+void foo(void);
+
+#define FOO(smalltype, bigtype, no)      \
+bigtype f_##no(smalltype x, bigtype y)   \
+{					 \
+  bigtype q = x / y;                     \
+  bigtype r = x % y;                     \
+  return q + r;                          \
+}
+
+FOO(int, long long, 3)
+FOO(int, unsigned long long, 4)
+FOO(unsigned, long long, 6)
+FOO(unsigned, unsigned long long, 7)
+FOO(long long, long long, 8)
+FOO(long long, unsigned long long, 9)
+FOO(unsigned long long, unsigned long long, 10)
+
+/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */
diff --git a/gcc/testsuite/gcc.dg/divmod-4-simode.c b/gcc/testsuite/gcc.dg/divmod-4-simode.c
new file mode 100644
index 0000000..9c326f2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/divmod-4-simode.c
@@ -0,0 +1,20 @@ 
+/* { dg-require-effective-target divmod_simode } */
+/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */
+/* mod comes before div in same bb.  */ 
+
+extern int cond;
+void foo(void);
+
+#define FOO(smalltype, bigtype, no)      \
+bigtype f_##no(smalltype x, bigtype y)   \
+{					 \
+  bigtype r = x % y;                     \
+  bigtype q = x / y;                     \
+  return q + r;                          \
+}
+
+FOO(int, int, 1)
+FOO(int, unsigned, 2)
+FOO(unsigned, unsigned, 5)
+
+/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */
diff --git a/gcc/testsuite/gcc.dg/divmod-4.c b/gcc/testsuite/gcc.dg/divmod-4.c
new file mode 100644
index 0000000..a5686cc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/divmod-4.c
@@ -0,0 +1,24 @@ 
+/* { dg-require-effective-target divmod } */
+/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */
+/* mod comes before div in same bb.  */ 
+
+extern int cond;
+void foo(void);
+
+#define FOO(smalltype, bigtype, no)	\
+bigtype f_##no(smalltype x, bigtype y)  \
+{					\
+  bigtype r = x % y;                    \
+  bigtype q = x / y;                    \
+  return q + r;                         \
+}
+
+FOO(int, long long, 3)
+FOO(int, unsigned long long, 4)
+FOO(unsigned, long long, 6)
+FOO(unsigned, unsigned long long, 7)
+FOO(long long, long long, 8)
+FOO(long long, unsigned long long, 9)
+FOO(unsigned long long, unsigned long long, 10)
+
+/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */
diff --git a/gcc/testsuite/gcc.dg/divmod-5.c b/gcc/testsuite/gcc.dg/divmod-5.c
new file mode 100644
index 0000000..8a8cee5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/divmod-5.c
@@ -0,0 +1,19 @@ 
+/* { dg-require-effective-target divmod_simode } */
+/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */
+/* div and mod are not in same bb and
+   bb's containing div and mod don't dominate each other.  */
+
+int f(int x, int y)
+{
+  int q = 0;
+  int r = 0;
+  extern int cond;
+
+  if (cond)
+    q = x / y;
+
+  r = x % y;
+  return q + r;
+}
+
+/* { dg-final { scan-tree-dump-times "DIVMOD" 0 "widening_mul" } } */
diff --git a/gcc/testsuite/gcc.dg/divmod-6-simode.c b/gcc/testsuite/gcc.dg/divmod-6-simode.c
new file mode 100644
index 0000000..3bf6fa3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/divmod-6-simode.c
@@ -0,0 +1,24 @@ 
+/* { dg-require-effective-target divmod_simode } */
+/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */
+
+
+extern int cond;
+void foo(void);
+
+#define FOO(smalltype, bigtype, no)  \
+bigtype f_##no(smalltype x, bigtype y) \
+{					 \
+  bigtype q = x / y;                     \
+  bigtype r1 = 0, r2 = 0;                \
+  if (cond)                              \
+    r1 = x % y;                          \
+  else                                   \
+    r2 = x % y;                          \
+  return q + r1 + r2;                    \
+}
+
+FOO(int, int, 1)
+FOO(int, unsigned, 2)
+FOO(unsigned, unsigned, 5)
+
+/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */
diff --git a/gcc/testsuite/gcc.dg/divmod-6.c b/gcc/testsuite/gcc.dg/divmod-6.c
new file mode 100644
index 0000000..70e4321
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/divmod-6.c
@@ -0,0 +1,27 @@ 
+/* { dg-require-effective-target divmod } */
+/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */
+
+extern int cond;
+void foo(void);
+
+#define FOO(smalltype, bigtype, no)	 \
+bigtype f_##no(smalltype x, bigtype y)   \
+{					 \
+  bigtype q = x / y;                     \
+  bigtype r1 = 0, r2 = 0;                \
+  if (cond)                              \
+    r1 = x % y;                          \
+  else                                   \
+    r2 = x % y;                          \
+  return q + r1 + r2;                    \
+}
+
+FOO(int, long long, 3)
+FOO(int, unsigned long long, 4)
+FOO(unsigned, long long, 6)
+FOO(unsigned, unsigned long long, 7)
+FOO(long long, long long, 8)
+FOO(long long, unsigned long long, 9)
+FOO(unsigned long long, unsigned long long, 10)
+
+/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */
diff --git a/gcc/testsuite/gcc.dg/divmod-7.c b/gcc/testsuite/gcc.dg/divmod-7.c
new file mode 100644
index 0000000..a6e7fcd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/divmod-7.c
@@ -0,0 +1,21 @@ 
+/* { dg-require-effective-target divmod_simode } */
+/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */
+
+int f(int x, int y)
+{
+  int q = 0, r1 = 0, r2 = 0;
+  extern int cond;
+
+  if (cond)
+    q = x / y;
+  else
+    {
+      r1 = x % y;
+      return q + r1;
+    }
+
+  r2 = x % y;
+  return q + r2;
+}
+
+/* { dg-final { scan-tree-dump-times "DIVMOD" 0 "widening_mul" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 04ca176..ad7c487 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6986,3 +6986,32 @@  proc check_effective_target_offload_hsa { } {
 	int main () {return 0;}
     } "-foffload=hsa" ]
 }
+
+# For ARM configs defining __ARM_ARCH_EXT_IDIV__, disable divmod_simode test-cases.
+
+proc check_effective_target_arm_divmod_simode { } {
+    return [check_no_compiler_messages arm_divmod assembly {
+	#ifdef __ARM_ARCH_EXT_IDIV__
+	#error has div insn
+	#endif
+	int i;
+    }]
+}
+
+proc check_effective_target_divmod { } {
+    #TODO: Add checks for all targets that have either hardware divmod insn
+    # or define libfunc for divmod.
+    if { [istarget arm*-*-*]
+	 || [istarget x86_64-*-*] } {
+	return 1
+    }
+    return 0
+}
+
+proc check_effective_target_divmod_simode { } {
+    if { [istarget arm*-*-*] } {
+	return [check_effective_target_arm_divmod_simode]
+    }
+
+    return [check_effective_target_divmod]
+}
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 81688cd..4e5cd2b 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -112,6 +112,9 @@  along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
+#include "optabs-libfuncs.h"
+#include "tree-eh.h"
+#include "targhooks.h"
 
 /* This structure represents one basic block that either computes a
    division, or is a common dominator for basic block that compute a
@@ -184,6 +187,9 @@  static struct
 
   /* Number of fp fused multiply-add ops inserted.  */
   int fmas_inserted;
+
+  /* Number of divmod calls inserted.  */
+  int divmod_calls_inserted;
 } widen_mul_stats;
 
 /* The instance of "struct occurrence" representing the highest
@@ -3784,6 +3790,222 @@  match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
   return true;
 }
 
+/* Return true if target has support for divmod.  */
+
+static bool
+target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode) 
+{
+  /* If target supports hardware divmod insn, use it for divmod.  */
+  if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
+    return true;
+
+  /* Check if libfunc for divmod is available.  */
+  if (optab_libfunc (divmod_optab, mode) != NULL_RTX)
+    {
+      /* If optab_handler exists for div_optab, perhaps in a wider mode,
+	 we don't want to use the libfunc even if it exists for given mode.  */ 
+      for (machine_mode div_mode = mode;
+	   div_mode != VOIDmode;
+	   div_mode = GET_MODE_WIDER_MODE (div_mode))
+	if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
+	  return false;
+
+      return true; 
+    }
+
+  return false;
+}
+
+/* Check if stmt is candidate for divmod transform.  */
+
+static bool
+divmod_candidate_p (gassign *stmt)
+{
+  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
+  enum machine_mode mode = TYPE_MODE (type);
+  optab divmod_optab, div_optab;
+
+  if (TYPE_UNSIGNED (type))
+    {
+      divmod_optab = udivmod_optab;
+      div_optab = udiv_optab;
+    }
+  else
+    {
+      divmod_optab = sdivmod_optab;
+      div_optab = sdiv_optab;
+    }
+
+  tree op1 = gimple_assign_rhs1 (stmt);
+  tree op2 = gimple_assign_rhs2 (stmt);
+
+  /* Disable the transform if either is a constant, since division-by-constant
+     may have specialized expansion.  */
+  if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
+    return false;
+
+  /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
+     expand using the [su]divv optabs.  */
+  if (TYPE_OVERFLOW_TRAPS (type))
+    return false;
+  
+  if (!target_supports_divmod_p (divmod_optab, div_optab, mode)) 
+    return false;
+
+  return true;
+}
+
+/* This function looks for:
+   t1 = a TRUNC_DIV_EXPR b;
+   t2 = a TRUNC_MOD_EXPR b;
+   and transforms it to the following sequence:
+   complex_tmp = DIVMOD (a, b);
+   t1 = REALPART_EXPR(a);
+   t2 = IMAGPART_EXPR(b);
+   For conditions enabling the transform see divmod_candidate_p().
+
+   The pass works in two phases:
+   1) Walk through all immediate uses of stmt's operand and find a
+      TRUNC_DIV_EXPR with matching operands and if such a stmt is found add
+      it to stmts vector.
+   2) Insert DIVMOD call before first div/mod stmt in top_bb (basic block that
+      dominates other div/mod stmts with same operands) and update entries in
+      stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
+      IMAGPART_EXPR for mod).  */
+
+static bool
+convert_to_divmod (gassign *stmt)
+{
+  if (!divmod_candidate_p (stmt))
+    return false;
+
+  tree op1 = gimple_assign_rhs1 (stmt);
+  tree op2 = gimple_assign_rhs2 (stmt);
+  
+  imm_use_iterator use_iter;
+  gimple *use_stmt;
+  auto_vec<gimple *> stmts; 
+
+  gimple *top_stmt = stmt; 
+  basic_block top_bb = gimple_bb (stmt);
+
+  /* Try to set top_stmt to "topmost" stmt
+     with code TRUNC_DIV_EXPR/TRUNC_MOD_EXPR having same operands as stmt.  */
+
+  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
+    {
+      if (is_gimple_assign (use_stmt)
+	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
+	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
+	  && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
+	  && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
+	{
+	  if (stmt_can_throw_internal (use_stmt))
+	    continue;
+
+	  basic_block bb = gimple_bb (use_stmt);
+
+	  if (bb == top_bb)
+	    {
+	      if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
+		top_stmt = use_stmt;
+	    }
+	  else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
+	    {
+	      top_bb = bb;
+	      top_stmt = use_stmt;
+	    }
+	}
+    }
+
+  if (top_stmt == stmt && stmt_can_throw_internal (top_stmt))
+    return false;
+
+  tree top_op1 = gimple_assign_rhs1 (top_stmt);
+  tree top_op2 = gimple_assign_rhs2 (top_stmt);
+
+  stmts.safe_push (top_stmt);
+  bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
+
+  /* Ensure that gimple_bb (use_stmt) is dominated by top_bb.  */    
+
+  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
+    {
+      if (is_gimple_assign (use_stmt)
+	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
+	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
+	  && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
+	  && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
+	{
+	  if (use_stmt == top_stmt)
+	    continue;
+
+	  if (stmt_can_throw_internal (use_stmt))
+	    continue;
+
+	  if (!dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
+	    {
+	      end_imm_use_stmt_traverse (&use_iter);
+	      return false;
+	    }
+
+	  stmts.safe_push (use_stmt);
+	  if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
+	    div_seen = true;
+	}
+    }
+
+  if (!div_seen)
+    return false;
+
+  /* Create libcall to internal fn DIVMOD:
+     divmod_tmp = DIVMOD (op1, op2).  */
+
+  gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
+  tree res = make_temp_ssa_name (
+		build_complex_type (TREE_TYPE (op1)),
+		call_stmt, "divmod_tmp");
+  gimple_call_set_lhs (call_stmt, res);
+
+  /* Insert the call before top_stmt.  */
+  gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
+  gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
+
+  widen_mul_stats.divmod_calls_inserted++;		
+
+  /* Update all statements in stmts.
+     if stmt is lhs = op1 TRUNC_DIV_EXPR op2, change to lhs = REALPART_EXPR<divmod_tmp>
+     if stmt is lhs = op1 TRUNC_MOD_EXPR op2, change to lhs = IMAGPART_EXPR<divmod_tmp>.  */
+
+  bool cfg_changed = false;
+  for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
+    {
+      tree new_rhs;
+
+      switch (gimple_assign_rhs_code (use_stmt))
+	{
+	  case TRUNC_DIV_EXPR:
+	    new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
+	    break;
+
+	  case TRUNC_MOD_EXPR:
+	    new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op2), res);
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	}
+
+      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+      gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
+      update_stmt (use_stmt);
+
+      if (maybe_clean_or_replace_eh_stmt (use_stmt, use_stmt))
+	cfg_changed = true;
+    }
+
+  return cfg_changed;
+}    
 
 /* Find integer multiplications where the operands are extended from
    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
@@ -3828,6 +4050,8 @@  pass_optimize_widening_mul::execute (function *fun)
   bool cfg_changed = false;
 
   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
+  calculate_dominance_info (CDI_DOMINATORS);
+  renumber_gimple_stmt_uids ();
 
   FOR_EACH_BB_FN (bb, fun)
     {
@@ -3861,6 +4085,10 @@  pass_optimize_widening_mul::execute (function *fun)
 		    match_uaddsub_overflow (&gsi, stmt, code);
 		  break;
 
+		case TRUNC_MOD_EXPR:
+		  cfg_changed = convert_to_divmod (as_a<gassign *> (stmt));
+		  break;
+
 		default:;
 		}
 	    }
@@ -3907,6 +4135,8 @@  pass_optimize_widening_mul::execute (function *fun)
 			    widen_mul_stats.maccs_inserted);
   statistics_counter_event (fun, "fused multiply-adds inserted",
 			    widen_mul_stats.fmas_inserted);
+  statistics_counter_event (fun, "divmod calls inserted",
+			    widen_mul_stats.divmod_calls_inserted);
 
   return cfg_changed ? TODO_cleanup_cfg : 0;
 }