diff mbox series

[6/8,v9] middle-end slp: support complex FMA and complex FMA conjugate

Message ID 20201228133743.GA29457@arm.com
State New
Headers show
Series [1/8,v9] middle-end slp: Support optimizing load distribution | expand

Commit Message

Tamar Christina Dec. 28, 2020, 1:37 p.m. UTC
Hi All,

This adds support for FMA and FMA conjugated to the slp pattern matcher.

Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* internal-fn.def (COMPLEX_FMA, COMPLEX_FMA_CONJ): New.
	* optabs.def (cmla_optab, cmla_conj_optab): New.
	* doc/md.texi: Document them.
	* tree-vect-slp-patterns.c (vect_match_call_p,
	class complex_fma_pattern, vect_slp_reset_pattern,
	complex_fma_pattern::matches, complex_fma_pattern::recognize,
	complex_fma_pattern::build): New.

--- inline copy of patch -- 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index b8cc90e1a75e402abbf8a8cf2efefc1a333f8b3a..6d5a98c4946d3ff4c2b8abea5c29caa6863fd3f7 100644


--

Comments

Richard Biener Jan. 8, 2021, 9:45 a.m. UTC | #1
On Mon, 28 Dec 2020, Tamar Christina wrote:

> Hi All,
> 
> This adds support for FMA and FMA conjugated to the slp pattern matcher.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* internal-fn.def (COMPLEX_FMA, COMPLEX_FMA_CONJ): New.
> 	* optabs.def (cmla_optab, cmla_conj_optab): New.
> 	* doc/md.texi: Document them.
> 	* tree-vect-slp-patterns.c (vect_match_call_p,
> 	class complex_fma_pattern, vect_slp_reset_pattern,
> 	complex_fma_pattern::matches, complex_fma_pattern::recognize,
> 	complex_fma_pattern::build): New.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index b8cc90e1a75e402abbf8a8cf2efefc1a333f8b3a..6d5a98c4946d3ff4c2b8abea5c29caa6863fd3f7 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6202,6 +6202,51 @@ The operation is only supported for vector modes @var{m}.
>  
>  This pattern is not allowed to @code{FAIL}.
>  
> +@cindex @code{cmla@var{m}4} instruction pattern
> +@item @samp{cmla@var{m}4}
> +Perform a vector multiply and accumulate that is semantically the same as
> +a multiply and accumulate of complex numbers.
> +
> +@smallexample
> +  complex TYPE c[N];
> +  complex TYPE a[N];
> +  complex TYPE b[N];
> +  for (int i = 0; i < N; i += 1)
> +    @{
> +      c[i] += a[i] * b[i];
> +    @}
> +@end smallexample
> +
> +In GCC lane ordering the real part of the number must be in the even lanes with
> +the imaginary part in the odd lanes.
> +
> +The operation is only supported for vector modes @var{m}.
> +
> +This pattern is not allowed to @code{FAIL}.
> +
> +@cindex @code{cmla_conj@var{m}4} instruction pattern
> +@item @samp{cmla_conj@var{m}4}
> +Perform a vector multiply by conjugate and accumulate that is semantically
> +the same as a multiply and accumulate of complex numbers where the second
> +multiply arguments is conjugated.
> +
> +@smallexample
> +  complex TYPE c[N];
> +  complex TYPE a[N];
> +  complex TYPE b[N];
> +  for (int i = 0; i < N; i += 1)
> +    @{
> +      c[i] += a[i] * conj (b[i]);
> +    @}
> +@end smallexample
> +
> +In GCC lane ordering the real part of the number must be in the even lanes with
> +the imaginary part in the odd lanes.
> +
> +The operation is only supported for vector modes @var{m}.
> +
> +This pattern is not allowed to @code{FAIL}.
> +
>  @cindex @code{cmul@var{m}4} instruction pattern
>  @item @samp{cmul@var{m}4}
>  Perform a vector multiply that is semantically the same as multiply of
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 5a0bbe3fe5dee591d54130e60f6996b28164ae38..305450e026d4b94ab62ceb9ca719ec5570ff43eb 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -288,6 +288,8 @@ DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary)
>  
>  /* Ternary math functions.  */
>  DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary)
> +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA, ECF_CONST, cmla, ternary)
> +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA_CONJ, ECF_CONST, cmla_conj, ternary)
>  
>  /* Unary integer ops.  */
>  DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index e82396bae1117c6de91304761a560b7fbcb69ce1..8e2758d685ed85e02df10dac571eb40d45a294ed 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -294,6 +294,8 @@ OPTAB_D (cadd90_optab, "cadd90$a3")
>  OPTAB_D (cadd270_optab, "cadd270$a3")
>  OPTAB_D (cmul_optab, "cmul$a3")
>  OPTAB_D (cmul_conj_optab, "cmul_conj$a3")
> +OPTAB_D (cmla_optab, "cmla$a4")
> +OPTAB_D (cmla_conj_optab, "cmla_conj$a4")
>  OPTAB_D (cos_optab, "cos$a2")
>  OPTAB_D (cosh_optab, "cosh$a2")
>  OPTAB_D (exp10_optab, "exp10$a2")
> diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
> index 82721acbab8cf81c4d6f9954c98fb913a7bb6282..3625a80c08e3d70fd362fc52e17e65b3b2c7da83 100644
> --- a/gcc/tree-vect-slp-patterns.c
> +++ b/gcc/tree-vect-slp-patterns.c
> @@ -325,6 +325,24 @@ vect_match_expression_p (slp_tree node, tree_code code)
>    return true;
>  }
>  
> +/* Checks to see if the expression represented by NODE is a call to the internal
> +   function FN.  */
> +
> +static inline bool
> +vect_match_call_p (slp_tree node, internal_fn fn)
> +{
> +  if (!node
> +      || !SLP_TREE_REPRESENTATIVE (node))
> +    return false;
> +
> +  gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
> +  if (!expr
> +      || !gimple_call_internal_p (expr, fn))
> +    return false;
> +
> +   return true;
> +}
> +
>  /* Check if the given lane permute in PERMUTES matches an alternating sequence
>     of {even odd even odd ...}.  This to account for unrolled loops.  Further
>     mode there resulting permute must be linear.   */
> @@ -1081,6 +1099,161 @@ complex_mul_pattern::build (vec_info *vinfo)
>    complex_pattern::build (vinfo);
>  }
>  
> +/*******************************************************************************
> + * complex_fma_pattern class
> + ******************************************************************************/
> +
> +class complex_fma_pattern : public complex_pattern
> +{
> +  protected:
> +    complex_fma_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
> +      : complex_pattern (node, m_ops, ifn)
> +    {
> +      this->m_num_args = 3;
> +    }
> +
> +  public:
> +    void build (vec_info *);
> +    static internal_fn
> +    matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
> +	     vec<slp_tree> *);
> +
> +    static vect_pattern*
> +    recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
> +
> +    static vect_pattern*
> +    mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
> +    {
> +      return new complex_fma_pattern (node, m_ops, ifn);
> +    }
> +};
> +
> +/* Helper function to "reset" a previously matched node and undo the changes
> +   made enough so that the node is treated as an irrelevant node.  */
> +
> +static inline void
> +vect_slp_reset_pattern (slp_tree node)
> +{
> +  stmt_vec_info stmt_info = vect_orig_stmt (SLP_TREE_REPRESENTATIVE (node));
> +  STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
> +  STMT_SLP_TYPE (stmt_info) = pure_slp;
> +  SLP_TREE_REPRESENTATIVE (node) = stmt_info;
> +}
> +
> +/* Pattern matcher for trying to match complex multiply and accumulate
> +   and multiply and subtract patterns in SLP tree.
> +   If the operation matches then IFN is set to the operation it matched and
> +   the arguments to the two replacement statements are put in m_ops.
> +
> +   If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
> +
> +   This function matches the patterns shaped as:
> +
> +   double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
> +   double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
> +
> +   c[i] = c[i] - ax;
> +   c[i+1] = c[i+1] + bx;
> +
> +   If a match occurred then TRUE is returned, else FALSE.  The match is
> +   performed after COMPLEX_MUL which would have done the majority of the work.
> +   This function merely matches an ADD with a COMPLEX_MUL IFN.  The initial
> +   match is expected to be in OP1 and the initial match operands in args0.  */
> +
> +internal_fn
> +complex_fma_pattern::matches (complex_operation_t op,
> +			      slp_tree_to_load_perm_map_t * /* perm_cache */,
> +			      slp_tree *ref_node, vec<slp_tree> *ops)
> +{
> +  internal_fn ifn = IFN_LAST;
> +
> +  /* Find the two components.  We match Complex MUL first which reduces the
> +     amount of work this pattern has to do.  After that we just match the
> +     head node and we're done.:
> +
> +     * FMA: + +.
> +
> +     We need to ignore the two_operands nodes that may also match.
> +     For that we can check if they have any scalar statements and also
> +     check that it's not a permute node as we're looking for a normal
> +     PLUS_EXPR operation.  */
> +  if (op != CMPLX_NONE)
> +    return IFN_LAST;
> +
> +  /* Find the two components.  We match Complex MUL first which reduces the
> +     amount of work this pattern has to do.  After that we just match the
> +     head node and we're done.:
> +
> +   * FMA: + + on a non-two_operands node.  */
> +  slp_tree vnode = *ref_node;
> +  if (SLP_TREE_LANE_PERMUTATION (vnode).exists ()
> +      /* Need to exclude the plus two-operands node.  These are not marked
> +	 so we have to infer it based on conditions.  */
> +      || !SLP_TREE_SCALAR_STMTS (vnode).exists ()

as said earlier we shouldn't test this.  The existing lane permute
should already cover this - where the test would better be

 SLP_TREE_CODE (vnode) == VEC_PERM_EXPR

> +      || !vect_match_expression_p (vnode, PLUS_EXPR))

But then it shouldn't match this (the vect_match_expression_p should
only ever match SLP_TREE_CODE (vnode) != VEC_PERM_EXPR) anyway.

> +    return IFN_LAST;
> +
> +  slp_tree node = SLP_TREE_CHILDREN (vnode)[1];
> +
> +  if (vect_match_call_p (node, IFN_COMPLEX_MUL))
> +    ifn = IFN_COMPLEX_FMA;
> +  else if (vect_match_call_p (node, IFN_COMPLEX_MUL_CONJ))
> +    ifn = IFN_COMPLEX_FMA_CONJ;
> +  else
> +    return IFN_LAST;
> +
> +  if (!vect_pattern_validate_optab (ifn, vnode))
> +    return IFN_LAST;
> +
> +  vect_slp_reset_pattern (node);

I don't understand this ... it deserves a comment at least.
Having no testcases with this patch makes it impossible for
me to dig in myself :/

Otherwise looks OK.

Thanks,
Richard.

> +  ops->truncate (0);
> +  ops->create (3);
> +
> +  if (ifn == IFN_COMPLEX_FMA)
> +    {
> +      ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
> +      ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
> +      ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
> +    }
> +  else
> +    {
> +      ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
> +      ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
> +      ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
> +    }
> +
> +  return ifn;
> +}
> +
> +/* Attempt to recognize a complex mul pattern.  */
> +
> +vect_pattern*
> +complex_fma_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
> +				slp_tree *node)
> +{
> +  auto_vec<slp_tree> ops;
> +  complex_operation_t op
> +    = vect_detect_pair_op (*node, true, &ops);
> +  internal_fn ifn
> +    = complex_fma_pattern::matches (op, perm_cache, node, &ops);
> +  if (ifn == IFN_LAST)
> +    return NULL;
> +
> +  return new complex_fma_pattern (node, &ops, ifn);
> +}
> +
> +/* Perform a replacement of the detected complex mul pattern with the new
> +   instruction sequences.  */
> +
> +void
> +complex_fma_pattern::build (vec_info *vinfo)
> +{
> +  SLP_TREE_CHILDREN (*this->m_node).truncate (0);
> +  SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops);
> +
> +  complex_pattern::build (vinfo);
> +}
> +
>  /*******************************************************************************
>   * Pattern matching definitions
>   ******************************************************************************/
> 
> 
>
Tamar Christina Jan. 8, 2021, 9:59 a.m. UTC | #2
Hi Richi,

> -----Original Message-----
> From: Richard Biener <rguenther@suse.de>
> Sent: Friday, January 8, 2021 9:45 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; ook@ucw.cz
> Subject: Re: [PATCH 6/8 v9]middle-end slp: support complex FMA and
> complex FMA conjugate
> 
> On Mon, 28 Dec 2020, Tamar Christina wrote:
> 
> > Hi All,
> >
> > This adds support for FMA and FMA conjugated to the slp pattern matcher.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> > and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > 	* internal-fn.def (COMPLEX_FMA, COMPLEX_FMA_CONJ): New.
> > 	* optabs.def (cmla_optab, cmla_conj_optab): New.
> > 	* doc/md.texi: Document them.
> > 	* tree-vect-slp-patterns.c (vect_match_call_p,
> > 	class complex_fma_pattern, vect_slp_reset_pattern,
> > 	complex_fma_pattern::matches, complex_fma_pattern::recognize,
> > 	complex_fma_pattern::build): New.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index
> >
> b8cc90e1a75e402abbf8a8cf2efefc1a333f8b3a..6d5a98c4946d3ff4c2b8abea5c2
> 9
> > caa6863fd3f7 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -6202,6 +6202,51 @@ The operation is only supported for vector
> modes @var{m}.
> >
> >  This pattern is not allowed to @code{FAIL}.
> >
> > +@cindex @code{cmla@var{m}4} instruction pattern @item
> > +@samp{cmla@var{m}4} Perform a vector multiply and accumulate that is
> > +semantically the same as a multiply and accumulate of complex
> > +numbers.
> > +
> > +@smallexample
> > +  complex TYPE c[N];
> > +  complex TYPE a[N];
> > +  complex TYPE b[N];
> > +  for (int i = 0; i < N; i += 1)
> > +    @{
> > +      c[i] += a[i] * b[i];
> > +    @}
> > +@end smallexample
> > +
> > +In GCC lane ordering the real part of the number must be in the even
> > +lanes with the imaginary part in the odd lanes.
> > +
> > +The operation is only supported for vector modes @var{m}.
> > +
> > +This pattern is not allowed to @code{FAIL}.
> > +
> > +@cindex @code{cmla_conj@var{m}4} instruction pattern @item
> > +@samp{cmla_conj@var{m}4} Perform a vector multiply by conjugate and
> > +accumulate that is semantically the same as a multiply and accumulate
> > +of complex numbers where the second multiply arguments is conjugated.
> > +
> > +@smallexample
> > +  complex TYPE c[N];
> > +  complex TYPE a[N];
> > +  complex TYPE b[N];
> > +  for (int i = 0; i < N; i += 1)
> > +    @{
> > +      c[i] += a[i] * conj (b[i]);
> > +    @}
> > +@end smallexample
> > +
> > +In GCC lane ordering the real part of the number must be in the even
> > +lanes with the imaginary part in the odd lanes.
> > +
> > +The operation is only supported for vector modes @var{m}.
> > +
> > +This pattern is not allowed to @code{FAIL}.
> > +
> >  @cindex @code{cmul@var{m}4} instruction pattern  @item
> > @samp{cmul@var{m}4}  Perform a vector multiply that is semantically
> > the same as multiply of diff --git a/gcc/internal-fn.def
> > b/gcc/internal-fn.def index
> >
> 5a0bbe3fe5dee591d54130e60f6996b28164ae38..305450e026d4b94ab62ceb9c
> a719
> > ec5570ff43eb 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -288,6 +288,8 @@ DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp,
> > binary)
> >
> >  /* Ternary math functions.  */
> >  DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary)
> > +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA, ECF_CONST, cmla, ternary)
> > +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA_CONJ, ECF_CONST,
> cmla_conj,
> > +ternary)
> >
> >  /* Unary integer ops.  */
> >  DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb,
> unary)
> > diff --git a/gcc/optabs.def b/gcc/optabs.def index
> >
> e82396bae1117c6de91304761a560b7fbcb69ce1..8e2758d685ed85e02df10dac
> 571e
> > b40d45a294ed 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -294,6 +294,8 @@ OPTAB_D (cadd90_optab, "cadd90$a3")  OPTAB_D
> > (cadd270_optab, "cadd270$a3")  OPTAB_D (cmul_optab, "cmul$a3")
> > OPTAB_D (cmul_conj_optab, "cmul_conj$a3")
> > +OPTAB_D (cmla_optab, "cmla$a4")
> > +OPTAB_D (cmla_conj_optab, "cmla_conj$a4")
> >  OPTAB_D (cos_optab, "cos$a2")
> >  OPTAB_D (cosh_optab, "cosh$a2")
> >  OPTAB_D (exp10_optab, "exp10$a2")
> > diff --git a/gcc/tree-vect-slp-patterns.c
> > b/gcc/tree-vect-slp-patterns.c index
> >
> 82721acbab8cf81c4d6f9954c98fb913a7bb6282..3625a80c08e3d70fd362fc52e1
> 7e
> > 65b3b2c7da83 100644
> > --- a/gcc/tree-vect-slp-patterns.c
> > +++ b/gcc/tree-vect-slp-patterns.c
> > @@ -325,6 +325,24 @@ vect_match_expression_p (slp_tree node,
> tree_code code)
> >    return true;
> >  }
> >
> > +/* Checks to see if the expression represented by NODE is a call to the
> internal
> > +   function FN.  */
> > +
> > +static inline bool
> > +vect_match_call_p (slp_tree node, internal_fn fn) {
> > +  if (!node
> > +      || !SLP_TREE_REPRESENTATIVE (node))
> > +    return false;
> > +
> > +  gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE
> (node));
> > + if (!expr
> > +      || !gimple_call_internal_p (expr, fn))
> > +    return false;
> > +
> > +   return true;
> > +}
> > +
> >  /* Check if the given lane permute in PERMUTES matches an alternating
> sequence
> >     of {even odd even odd ...}.  This to account for unrolled loops.  Further
> >     mode there resulting permute must be linear.   */
> > @@ -1081,6 +1099,161 @@ complex_mul_pattern::build (vec_info *vinfo)
> >    complex_pattern::build (vinfo);
> >  }
> >
> >
> +/*********************************************************
> ***********
> > +***********
> > + * complex_fma_pattern class
> > +
> >
> +*********************************************************
> ************
> > +*********/
> > +
> > +class complex_fma_pattern : public complex_pattern {
> > +  protected:
> > +    complex_fma_pattern (slp_tree *node, vec<slp_tree> *m_ops,
> internal_fn ifn)
> > +      : complex_pattern (node, m_ops, ifn)
> > +    {
> > +      this->m_num_args = 3;
> > +    }
> > +
> > +  public:
> > +    void build (vec_info *);
> > +    static internal_fn
> > +    matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
> slp_tree *,
> > +	     vec<slp_tree> *);
> > +
> > +    static vect_pattern*
> > +    recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
> > +
> > +    static vect_pattern*
> > +    mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
> > +    {
> > +      return new complex_fma_pattern (node, m_ops, ifn);
> > +    }
> > +};
> > +
> > +/* Helper function to "reset" a previously matched node and undo the
> changes
> > +   made enough so that the node is treated as an irrelevant node.  */
> > +
> > +static inline void
> > +vect_slp_reset_pattern (slp_tree node) {
> > +  stmt_vec_info stmt_info = vect_orig_stmt (SLP_TREE_REPRESENTATIVE
> > +(node));
> > +  STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
> > +  STMT_SLP_TYPE (stmt_info) = pure_slp;
> > +  SLP_TREE_REPRESENTATIVE (node) = stmt_info; }
> > +
> > +/* Pattern matcher for trying to match complex multiply and accumulate
> > +   and multiply and subtract patterns in SLP tree.
> > +   If the operation matches then IFN is set to the operation it matched and
> > +   the arguments to the two replacement statements are put in m_ops.
> > +
> > +   If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
> > +
> > +   This function matches the patterns shaped as:
> > +
> > +   double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
> > +   double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
> > +
> > +   c[i] = c[i] - ax;
> > +   c[i+1] = c[i+1] + bx;
> > +
> > +   If a match occurred then TRUE is returned, else FALSE.  The match is
> > +   performed after COMPLEX_MUL which would have done the majority of
> the work.
> > +   This function merely matches an ADD with a COMPLEX_MUL IFN.  The
> initial
> > +   match is expected to be in OP1 and the initial match operands in
> > + args0.  */
> > +
> > +internal_fn
> > +complex_fma_pattern::matches (complex_operation_t op,
> > +			      slp_tree_to_load_perm_map_t * /* perm_cache
> */,
> > +			      slp_tree *ref_node, vec<slp_tree> *ops) {
> > +  internal_fn ifn = IFN_LAST;
> > +
> > +  /* Find the two components.  We match Complex MUL first which
> reduces the
> > +     amount of work this pattern has to do.  After that we just match the
> > +     head node and we're done.:
> > +
> > +     * FMA: + +.
> > +
> > +     We need to ignore the two_operands nodes that may also match.
> > +     For that we can check if they have any scalar statements and also
> > +     check that it's not a permute node as we're looking for a normal
> > +     PLUS_EXPR operation.  */
> > +  if (op != CMPLX_NONE)
> > +    return IFN_LAST;
> > +
> > +  /* Find the two components.  We match Complex MUL first which
> reduces the
> > +     amount of work this pattern has to do.  After that we just match the
> > +     head node and we're done.:
> > +
> > +   * FMA: + + on a non-two_operands node.  */
> > +  slp_tree vnode = *ref_node;
> > +  if (SLP_TREE_LANE_PERMUTATION (vnode).exists ()
> > +      /* Need to exclude the plus two-operands node.  These are not
> marked
> > +	 so we have to infer it based on conditions.  */
> > +      || !SLP_TREE_SCALAR_STMTS (vnode).exists ()
> 
> as said earlier we shouldn't test this.  The existing lane permute should
> already cover this - where the test would better be
> 
>  SLP_TREE_CODE (vnode) == VEC_PERM_EXPR
> 
> > +      || !vect_match_expression_p (vnode, PLUS_EXPR))
> 
> But then it shouldn't match this (the vect_match_expression_p should only
> ever match SLP_TREE_CODE (vnode) != VEC_PERM_EXPR) anyway.
> 

How so? An FMA doesn't have a TWO_OPERANDS node as the root since the operations
Are always two PLUS operations.

The corresponding tree is

note:   SLP size 10 vs. limit 24.
note:   Final SLP tree for instance 0x48f68d0:
note:   node 0x4809870 (max_nunits=4, refcnt=2)
note:   op template: REALPART_EXPR <*_3> = _31;
note:     stmt 0 REALPART_EXPR <*_3> = _31;
note:     stmt 1 IMAGPART_EXPR <*_3> = _32;
note:     children 0x48098f8
note:   node 0x48098f8 (max_nunits=4, refcnt=2)
note:   op template: _31 = _12 + _29;
note:     stmt 0 _31 = _12 + _29;
note:     stmt 1 _32 = _11 + _30;
note:     children 0x4809980 0x4809a08
note:   node 0x4809980 (max_nunits=4, refcnt=2)
note:   op template: _12 = REALPART_EXPR <*_3>;
note:     stmt 0 _12 = REALPART_EXPR <*_3>;
note:     stmt 1 _11 = IMAGPART_EXPR <*_3>;
note:     load permutation { 0 1 }
note:   node 0x4809a08 (max_nunits=4, refcnt=2)
note:   op: VEC_PERM_EXPR
note:     stmt 0 _29 = _25 - _26;
note:     stmt 1 _30 = _27 + _28;
note:     lane permutation { 0[0] 1[1] }
note:     children 0x4809dc0 0x4809e48
note:   node 0x4809dc0 (max_nunits=1, refcnt=1)
note:   op template: _29 = _25 - _26;
note:     { }
note:     children 0x4809a90 0x4809c28
note:   node 0x4809a90 (max_nunits=4, refcnt=3)
note:   op template: _25 = _19 * _22;
note:     stmt 0 _25 = _19 * _22;
note:     stmt 1 _27 = _20 * _22;
note:     children 0x4809b18 0x4809ba0
note:   node 0x4809b18 (max_nunits=4, refcnt=2)
note:   op template: _19 = REALPART_EXPR <*_7>;
note:     stmt 0 _19 = REALPART_EXPR <*_7>;
note:     stmt 1 _20 = IMAGPART_EXPR <*_7>;
note:     load permutation { 0 1 }
note:   node 0x4809ba0 (max_nunits=4, refcnt=2)
note:   op template: _22 = REALPART_EXPR <*_5>;
note:     stmt 0 _22 = REALPART_EXPR <*_5>;
note:     stmt 1 _22 = REALPART_EXPR <*_5>;
note:     load permutation { 0 0 }
note:   node 0x4809c28 (max_nunits=4, refcnt=3)
note:   op template: _26 = _20 * _21;
note:     stmt 0 _26 = _20 * _21;
note:     stmt 1 _28 = _19 * _21;
note:     children 0x4809cb0 0x4809d38
note:   node 0x4809cb0 (max_nunits=4, refcnt=2)
note:   op template: _20 = IMAGPART_EXPR <*_7>;
note:     stmt 0 _20 = IMAGPART_EXPR <*_7>;
note:     stmt 1 _19 = REALPART_EXPR <*_7>;
note:     load permutation { 1 0 }
note:   node 0x4809d38 (max_nunits=4, refcnt=2)
note:   op template: _21 = IMAGPART_EXPR <*_5>;
note:     stmt 0 _21 = IMAGPART_EXPR <*_5>;
note:     stmt 1 _21 = IMAGPART_EXPR <*_5>;
note:     load permutation { 1 1 }
note:   node 0x4809e48 (max_nunits=1, refcnt=1)
note:   op template: _30 = _27 + _28;
note:     { }
note:     children 0x4809a90 0x4809c28

and after matching the MUL all you have are the ADD node going into a COMPLEX_MUL node.

> > +    return IFN_LAST;
> > +
> > +  slp_tree node = SLP_TREE_CHILDREN (vnode)[1];
> > +
> > +  if (vect_match_call_p (node, IFN_COMPLEX_MUL))
> > +    ifn = IFN_COMPLEX_FMA;
> > +  else if (vect_match_call_p (node, IFN_COMPLEX_MUL_CONJ))
> > +    ifn = IFN_COMPLEX_FMA_CONJ;
> > +  else
> > +    return IFN_LAST;
> > +
> > +  if (!vect_pattern_validate_optab (ifn, vnode))
> > +    return IFN_LAST;
> > +
> > +  vect_slp_reset_pattern (node);
> 
> I don't understand this ... it deserves a comment at least.

The previous pass detecting COMPLEX_MUL would have marked the
Instructions as being inside of a MUL pattern.  These need to be unmarked
As being part of the COMPLEX_MUL and instead be marked as COMPLEX_FMA.

> Having no testcases with this patch makes it impossible for me to dig in
> myself :/

Sorry, the tests would have made the file too big again.. The previous test for complex add
Added gcc/testsuite/gcc.dg/vect/complex/complex-operations.c which is an overarching test
Testing everything in one go.

The individual tests are split off from that large test.

> 
> Otherwise looks OK.
> 
> Thanks,
> Richard.
> 
> > +  ops->truncate (0);
> > +  ops->create (3);
> > +
> > +  if (ifn == IFN_COMPLEX_FMA)
> > +    {
> > +      ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
> > +      ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
> > +      ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
> > +    }
> > +  else
> > +    {
> > +      ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
> > +      ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
> > +      ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
> > +    }
> > +
> > +  return ifn;
> > +}
> > +
> > +/* Attempt to recognize a complex mul pattern.  */
> > +
> > +vect_pattern*
> > +complex_fma_pattern::recognize (slp_tree_to_load_perm_map_t
> *perm_cache,
> > +				slp_tree *node)
> > +{
> > +  auto_vec<slp_tree> ops;
> > +  complex_operation_t op
> > +    = vect_detect_pair_op (*node, true, &ops);
> > +  internal_fn ifn
> > +    = complex_fma_pattern::matches (op, perm_cache, node, &ops);
> > +  if (ifn == IFN_LAST)
> > +    return NULL;
> > +
> > +  return new complex_fma_pattern (node, &ops, ifn); }
> > +
> > +/* Perform a replacement of the detected complex mul pattern with the
> new
> > +   instruction sequences.  */
> > +
> > +void
> > +complex_fma_pattern::build (vec_info *vinfo) {
> > +  SLP_TREE_CHILDREN (*this->m_node).truncate (0);
> > +  SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops);
> > +
> > +  complex_pattern::build (vinfo);
> > +}
> > +
> >
> /**********************************************************
> *********************
> >   * Pattern matching definitions
> >
> >
> **********************************************************
> ************
> > ********/
> >
> >
> >
> 
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409
> Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
Richard Biener Jan. 8, 2021, 10:17 a.m. UTC | #3
On Fri, 8 Jan 2021, Tamar Christina wrote:

> Hi Richi,
> 
> > -----Original Message-----
> > From: Richard Biener <rguenther@suse.de>
> > Sent: Friday, January 8, 2021 9:45 AM
> > To: Tamar Christina <Tamar.Christina@arm.com>
> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; ook@ucw.cz
> > Subject: Re: [PATCH 6/8 v9]middle-end slp: support complex FMA and
> > complex FMA conjugate
> > 
> > On Mon, 28 Dec 2020, Tamar Christina wrote:
> > 
> > > Hi All,
> > >
> > > This adds support for FMA and FMA conjugated to the slp pattern matcher.
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> > > and no issues.
> > >
> > > Ok for master?
> > >
> > > Thanks,
> > > Tamar
> > >
> > > gcc/ChangeLog:
> > >
> > > 	* internal-fn.def (COMPLEX_FMA, COMPLEX_FMA_CONJ): New.
> > > 	* optabs.def (cmla_optab, cmla_conj_optab): New.
> > > 	* doc/md.texi: Document them.
> > > 	* tree-vect-slp-patterns.c (vect_match_call_p,
> > > 	class complex_fma_pattern, vect_slp_reset_pattern,
> > > 	complex_fma_pattern::matches, complex_fma_pattern::recognize,
> > > 	complex_fma_pattern::build): New.
> > >
> > > --- inline copy of patch --
> > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index
> > >
> > b8cc90e1a75e402abbf8a8cf2efefc1a333f8b3a..6d5a98c4946d3ff4c2b8abea5c2
> > 9
> > > caa6863fd3f7 100644
> > > --- a/gcc/doc/md.texi
> > > +++ b/gcc/doc/md.texi
> > > @@ -6202,6 +6202,51 @@ The operation is only supported for vector
> > modes @var{m}.
> > >
> > >  This pattern is not allowed to @code{FAIL}.
> > >
> > > +@cindex @code{cmla@var{m}4} instruction pattern @item
> > > +@samp{cmla@var{m}4} Perform a vector multiply and accumulate that is
> > > +semantically the same as a multiply and accumulate of complex
> > > +numbers.
> > > +
> > > +@smallexample
> > > +  complex TYPE c[N];
> > > +  complex TYPE a[N];
> > > +  complex TYPE b[N];
> > > +  for (int i = 0; i < N; i += 1)
> > > +    @{
> > > +      c[i] += a[i] * b[i];
> > > +    @}
> > > +@end smallexample
> > > +
> > > +In GCC lane ordering the real part of the number must be in the even
> > > +lanes with the imaginary part in the odd lanes.
> > > +
> > > +The operation is only supported for vector modes @var{m}.
> > > +
> > > +This pattern is not allowed to @code{FAIL}.
> > > +
> > > +@cindex @code{cmla_conj@var{m}4} instruction pattern @item
> > > +@samp{cmla_conj@var{m}4} Perform a vector multiply by conjugate and
> > > +accumulate that is semantically the same as a multiply and accumulate
> > > +of complex numbers where the second multiply arguments is conjugated.
> > > +
> > > +@smallexample
> > > +  complex TYPE c[N];
> > > +  complex TYPE a[N];
> > > +  complex TYPE b[N];
> > > +  for (int i = 0; i < N; i += 1)
> > > +    @{
> > > +      c[i] += a[i] * conj (b[i]);
> > > +    @}
> > > +@end smallexample
> > > +
> > > +In GCC lane ordering the real part of the number must be in the even
> > > +lanes with the imaginary part in the odd lanes.
> > > +
> > > +The operation is only supported for vector modes @var{m}.
> > > +
> > > +This pattern is not allowed to @code{FAIL}.
> > > +
> > >  @cindex @code{cmul@var{m}4} instruction pattern  @item
> > > @samp{cmul@var{m}4}  Perform a vector multiply that is semantically
> > > the same as multiply of diff --git a/gcc/internal-fn.def
> > > b/gcc/internal-fn.def index
> > >
> > 5a0bbe3fe5dee591d54130e60f6996b28164ae38..305450e026d4b94ab62ceb9c
> > a719
> > > ec5570ff43eb 100644
> > > --- a/gcc/internal-fn.def
> > > +++ b/gcc/internal-fn.def
> > > @@ -288,6 +288,8 @@ DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp,
> > > binary)
> > >
> > >  /* Ternary math functions.  */
> > >  DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary)
> > > +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA, ECF_CONST, cmla, ternary)
> > > +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA_CONJ, ECF_CONST,
> > cmla_conj,
> > > +ternary)
> > >
> > >  /* Unary integer ops.  */
> > >  DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb,
> > unary)
> > > diff --git a/gcc/optabs.def b/gcc/optabs.def index
> > >
> > e82396bae1117c6de91304761a560b7fbcb69ce1..8e2758d685ed85e02df10dac
> > 571e
> > > b40d45a294ed 100644
> > > --- a/gcc/optabs.def
> > > +++ b/gcc/optabs.def
> > > @@ -294,6 +294,8 @@ OPTAB_D (cadd90_optab, "cadd90$a3")  OPTAB_D
> > > (cadd270_optab, "cadd270$a3")  OPTAB_D (cmul_optab, "cmul$a3")
> > > OPTAB_D (cmul_conj_optab, "cmul_conj$a3")
> > > +OPTAB_D (cmla_optab, "cmla$a4")
> > > +OPTAB_D (cmla_conj_optab, "cmla_conj$a4")
> > >  OPTAB_D (cos_optab, "cos$a2")
> > >  OPTAB_D (cosh_optab, "cosh$a2")
> > >  OPTAB_D (exp10_optab, "exp10$a2")
> > > diff --git a/gcc/tree-vect-slp-patterns.c
> > > b/gcc/tree-vect-slp-patterns.c index
> > >
> > 82721acbab8cf81c4d6f9954c98fb913a7bb6282..3625a80c08e3d70fd362fc52e1
> > 7e
> > > 65b3b2c7da83 100644
> > > --- a/gcc/tree-vect-slp-patterns.c
> > > +++ b/gcc/tree-vect-slp-patterns.c
> > > @@ -325,6 +325,24 @@ vect_match_expression_p (slp_tree node,
> > tree_code code)
> > >    return true;
> > >  }
> > >
> > > +/* Checks to see if the expression represented by NODE is a call to the
> > internal
> > > +   function FN.  */
> > > +
> > > +static inline bool
> > > +vect_match_call_p (slp_tree node, internal_fn fn) {
> > > +  if (!node
> > > +      || !SLP_TREE_REPRESENTATIVE (node))
> > > +    return false;
> > > +
> > > +  gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE
> > (node));
> > > + if (!expr
> > > +      || !gimple_call_internal_p (expr, fn))
> > > +    return false;
> > > +
> > > +   return true;
> > > +}
> > > +
> > >  /* Check if the given lane permute in PERMUTES matches an alternating
> > sequence
> > >     of {even odd even odd ...}.  This to account for unrolled loops.  Further
> > >     mode there resulting permute must be linear.   */
> > > @@ -1081,6 +1099,161 @@ complex_mul_pattern::build (vec_info *vinfo)
> > >    complex_pattern::build (vinfo);
> > >  }
> > >
> > >
> > +/*********************************************************
> > ***********
> > > +***********
> > > + * complex_fma_pattern class
> > > +
> > >
> > +*********************************************************
> > ************
> > > +*********/
> > > +
> > > +class complex_fma_pattern : public complex_pattern {
> > > +  protected:
> > > +    complex_fma_pattern (slp_tree *node, vec<slp_tree> *m_ops,
> > internal_fn ifn)
> > > +      : complex_pattern (node, m_ops, ifn)
> > > +    {
> > > +      this->m_num_args = 3;
> > > +    }
> > > +
> > > +  public:
> > > +    void build (vec_info *);
> > > +    static internal_fn
> > > +    matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
> > slp_tree *,
> > > +	     vec<slp_tree> *);
> > > +
> > > +    static vect_pattern*
> > > +    recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
> > > +
> > > +    static vect_pattern*
> > > +    mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
> > > +    {
> > > +      return new complex_fma_pattern (node, m_ops, ifn);
> > > +    }
> > > +};
> > > +
> > > +/* Helper function to "reset" a previously matched node and undo the
> > changes
> > > +   made enough so that the node is treated as an irrelevant node.  */
> > > +
> > > +static inline void
> > > +vect_slp_reset_pattern (slp_tree node) {
> > > +  stmt_vec_info stmt_info = vect_orig_stmt (SLP_TREE_REPRESENTATIVE
> > > +(node));
> > > +  STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
> > > +  STMT_SLP_TYPE (stmt_info) = pure_slp;
> > > +  SLP_TREE_REPRESENTATIVE (node) = stmt_info; }
> > > +
> > > +/* Pattern matcher for trying to match complex multiply and accumulate
> > > +   and multiply and subtract patterns in SLP tree.
> > > +   If the operation matches then IFN is set to the operation it matched and
> > > +   the arguments to the two replacement statements are put in m_ops.
> > > +
> > > +   If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
> > > +
> > > +   This function matches the patterns shaped as:
> > > +
> > > +   double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
> > > +   double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
> > > +
> > > +   c[i] = c[i] - ax;
> > > +   c[i+1] = c[i+1] + bx;
> > > +
> > > +   If a match occurred then TRUE is returned, else FALSE.  The match is
> > > +   performed after COMPLEX_MUL which would have done the majority of
> > the work.
> > > +   This function merely matches an ADD with a COMPLEX_MUL IFN.  The
> > initial
> > > +   match is expected to be in OP1 and the initial match operands in
> > > + args0.  */
> > > +
> > > +internal_fn
> > > +complex_fma_pattern::matches (complex_operation_t op,
> > > +			      slp_tree_to_load_perm_map_t * /* perm_cache
> > */,
> > > +			      slp_tree *ref_node, vec<slp_tree> *ops) {
> > > +  internal_fn ifn = IFN_LAST;
> > > +
> > > +  /* Find the two components.  We match Complex MUL first which
> > reduces the
> > > +     amount of work this pattern has to do.  After that we just match the
> > > +     head node and we're done.:
> > > +
> > > +     * FMA: + +.
> > > +
> > > +     We need to ignore the two_operands nodes that may also match.
> > > +     For that we can check if they have any scalar statements and also
> > > +     check that it's not a permute node as we're looking for a normal
> > > +     PLUS_EXPR operation.  */
> > > +  if (op != CMPLX_NONE)
> > > +    return IFN_LAST;
> > > +
> > > +  /* Find the two components.  We match Complex MUL first which
> > reduces the
> > > +     amount of work this pattern has to do.  After that we just match the
> > > +     head node and we're done.:
> > > +
> > > +   * FMA: + + on a non-two_operands node.  */
> > > +  slp_tree vnode = *ref_node;
> > > +  if (SLP_TREE_LANE_PERMUTATION (vnode).exists ()
> > > +      /* Need to exclude the plus two-operands node.  These are not
> > marked
> > > +	 so we have to infer it based on conditions.  */
> > > +      || !SLP_TREE_SCALAR_STMTS (vnode).exists ()
> > 
> > as said earlier we shouldn't test this.  The existing lane permute should
> > already cover this - where the test would better be
> > 
> >  SLP_TREE_CODE (vnode) == VEC_PERM_EXPR
> > 
> > > +      || !vect_match_expression_p (vnode, PLUS_EXPR))
> > 
> > But then it shouldn't match this (the vect_match_expression_p should only
> > ever match SLP_TREE_CODE (vnode) != VEC_PERM_EXPR) anyway.
> > 
> 
> How so? An FMA doesn't have a TWO_OPERANDS node as the root since the operations
> Are always two PLUS operations.
> 
> The corresponding tree is
> 
> note:   SLP size 10 vs. limit 24.
> note:   Final SLP tree for instance 0x48f68d0:
> note:   node 0x4809870 (max_nunits=4, refcnt=2)
> note:   op template: REALPART_EXPR <*_3> = _31;
> note:     stmt 0 REALPART_EXPR <*_3> = _31;
> note:     stmt 1 IMAGPART_EXPR <*_3> = _32;
> note:     children 0x48098f8
> note:   node 0x48098f8 (max_nunits=4, refcnt=2)
> note:   op template: _31 = _12 + _29;
> note:     stmt 0 _31 = _12 + _29;
> note:     stmt 1 _32 = _11 + _30;
> note:     children 0x4809980 0x4809a08
> note:   node 0x4809980 (max_nunits=4, refcnt=2)
> note:   op template: _12 = REALPART_EXPR <*_3>;
> note:     stmt 0 _12 = REALPART_EXPR <*_3>;
> note:     stmt 1 _11 = IMAGPART_EXPR <*_3>;
> note:     load permutation { 0 1 }
> note:   node 0x4809a08 (max_nunits=4, refcnt=2)
> note:   op: VEC_PERM_EXPR
> note:     stmt 0 _29 = _25 - _26;
> note:     stmt 1 _30 = _27 + _28;
> note:     lane permutation { 0[0] 1[1] }
> note:     children 0x4809dc0 0x4809e48
> note:   node 0x4809dc0 (max_nunits=1, refcnt=1)
> note:   op template: _29 = _25 - _26;
> note:     { }
> note:     children 0x4809a90 0x4809c28
> note:   node 0x4809a90 (max_nunits=4, refcnt=3)
> note:   op template: _25 = _19 * _22;
> note:     stmt 0 _25 = _19 * _22;
> note:     stmt 1 _27 = _20 * _22;
> note:     children 0x4809b18 0x4809ba0
> note:   node 0x4809b18 (max_nunits=4, refcnt=2)
> note:   op template: _19 = REALPART_EXPR <*_7>;
> note:     stmt 0 _19 = REALPART_EXPR <*_7>;
> note:     stmt 1 _20 = IMAGPART_EXPR <*_7>;
> note:     load permutation { 0 1 }
> note:   node 0x4809ba0 (max_nunits=4, refcnt=2)
> note:   op template: _22 = REALPART_EXPR <*_5>;
> note:     stmt 0 _22 = REALPART_EXPR <*_5>;
> note:     stmt 1 _22 = REALPART_EXPR <*_5>;
> note:     load permutation { 0 0 }
> note:   node 0x4809c28 (max_nunits=4, refcnt=3)
> note:   op template: _26 = _20 * _21;
> note:     stmt 0 _26 = _20 * _21;
> note:     stmt 1 _28 = _19 * _21;
> note:     children 0x4809cb0 0x4809d38
> note:   node 0x4809cb0 (max_nunits=4, refcnt=2)
> note:   op template: _20 = IMAGPART_EXPR <*_7>;
> note:     stmt 0 _20 = IMAGPART_EXPR <*_7>;
> note:     stmt 1 _19 = REALPART_EXPR <*_7>;
> note:     load permutation { 1 0 }
> note:   node 0x4809d38 (max_nunits=4, refcnt=2)
> note:   op template: _21 = IMAGPART_EXPR <*_5>;
> note:     stmt 0 _21 = IMAGPART_EXPR <*_5>;
> note:     stmt 1 _21 = IMAGPART_EXPR <*_5>;
> note:     load permutation { 1 1 }
> note:   node 0x4809e48 (max_nunits=1, refcnt=1)
> note:   op template: _30 = _27 + _28;
> note:     { }
> note:     children 0x4809a90 0x4809c28
> 
> and after matching the MUL all you have are the ADD node going into a COMPLEX_MUL node.

So the above is the original tree - how does the tree look like
at the point you need to rule out 0x4809e48 (and thus have those
COMPLEX_MUL ones)?

As said, !SLP_TREE_SCALAR_STMTS (vnode).exists () is not a good test.

If complex pattern detection doesn't want to see the direct(?) children
of permute nodes then the pattern machinery should provide means
to do this.  Likewise if the pattern should not contain parents
outside of the supposed match the machinery should provide means
to restrict matches to single entry SLP subgraphs (here the
children of the plus have an alternate "entry").

> > > +    return IFN_LAST;
> > > +
> > > +  slp_tree node = SLP_TREE_CHILDREN (vnode)[1];
> > > +
> > > +  if (vect_match_call_p (node, IFN_COMPLEX_MUL))
> > > +    ifn = IFN_COMPLEX_FMA;
> > > +  else if (vect_match_call_p (node, IFN_COMPLEX_MUL_CONJ))
> > > +    ifn = IFN_COMPLEX_FMA_CONJ;
> > > +  else
> > > +    return IFN_LAST;
> > > +
> > > +  if (!vect_pattern_validate_optab (ifn, vnode))
> > > +    return IFN_LAST;
> > > +
> > > +  vect_slp_reset_pattern (node);
> > 
> > I don't understand this ... it deserves a comment at least.
> 
> The previous pass detecting COMPLEX_MUL would have marked the
> Instructions as being inside of a MUL pattern.  These need to be unmarked
> As being part of the COMPLEX_MUL and instead be marked as COMPLEX_FMA.

The scalar pattern code adjusts this at the point it marks the
pattern "consuming" the earlier pattern.   We should try follow that
style here I think.

> > Having no testcases with this patch makes it impossible for me to dig in
> > myself :/
> 
> Sorry, the tests would have made the file too big again.. The previous test for complex add
> Added gcc/testsuite/gcc.dg/vect/complex/complex-operations.c which is an overarching test
> Testing everything in one go.
> 
> The individual tests are split off from that large test.
> 
> > 
> > Otherwise looks OK.
> > 
> > Thanks,
> > Richard.
> > 
> > > +  ops->truncate (0);
> > > +  ops->create (3);
> > > +
> > > +  if (ifn == IFN_COMPLEX_FMA)
> > > +    {
> > > +      ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
> > > +      ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
> > > +      ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
> > > +    }
> > > +  else
> > > +    {
> > > +      ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
> > > +      ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
> > > +      ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
> > > +    }
> > > +
> > > +  return ifn;
> > > +}
> > > +
> > > +/* Attempt to recognize a complex mul pattern.  */
> > > +
> > > +vect_pattern*
> > > +complex_fma_pattern::recognize (slp_tree_to_load_perm_map_t
> > *perm_cache,
> > > +				slp_tree *node)
> > > +{
> > > +  auto_vec<slp_tree> ops;
> > > +  complex_operation_t op
> > > +    = vect_detect_pair_op (*node, true, &ops);
> > > +  internal_fn ifn
> > > +    = complex_fma_pattern::matches (op, perm_cache, node, &ops);
> > > +  if (ifn == IFN_LAST)
> > > +    return NULL;
> > > +
> > > +  return new complex_fma_pattern (node, &ops, ifn); }
> > > +
> > > +/* Perform a replacement of the detected complex mul pattern with the
> > new
> > > +   instruction sequences.  */
> > > +
> > > +void
> > > +complex_fma_pattern::build (vec_info *vinfo) {
> > > +  SLP_TREE_CHILDREN (*this->m_node).truncate (0);
> > > +  SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops);
> > > +
> > > +  complex_pattern::build (vinfo);
> > > +}
> > > +
> > >
> > /**********************************************************
> > *********************
> > >   * Pattern matching definitions
> > >
> > >
> > **********************************************************
> > ************
> > > ********/
> > >
> > >
> > >
> > 
> > --
> > Richard Biener <rguenther@suse.de>
> > SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409
> > Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
>
Tamar Christina Jan. 8, 2021, 10:21 a.m. UTC | #4
> -----Original Message-----
> From: Richard Biener <rguenther@suse.de>
> Sent: Friday, January 8, 2021 10:17 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; ook@ucw.cz
> Subject: RE: [PATCH 6/8 v9]middle-end slp: support complex FMA and
> complex FMA conjugate
> 
> On Fri, 8 Jan 2021, Tamar Christina wrote:
> 
> > Hi Richi,
> >
> > > -----Original Message-----
> > > From: Richard Biener <rguenther@suse.de>
> > > Sent: Friday, January 8, 2021 9:45 AM
> > > To: Tamar Christina <Tamar.Christina@arm.com>
> > > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; ook@ucw.cz
> > > Subject: Re: [PATCH 6/8 v9]middle-end slp: support complex FMA and
> > > complex FMA conjugate
> > >
> > > On Mon, 28 Dec 2020, Tamar Christina wrote:
> > >
> > > > Hi All,
> > > >
> > > > This adds support for FMA and FMA conjugated to the slp pattern
> matcher.
> > > >
> > > > Bootstrapped Regtested on aarch64-none-linux-gnu,
> > > > x86_64-pc-linux-gnu and no issues.
> > > >
> > > > Ok for master?
> > > >
> > > > Thanks,
> > > > Tamar
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > 	* internal-fn.def (COMPLEX_FMA, COMPLEX_FMA_CONJ): New.
> > > > 	* optabs.def (cmla_optab, cmla_conj_optab): New.
> > > > 	* doc/md.texi: Document them.
> > > > 	* tree-vect-slp-patterns.c (vect_match_call_p,
> > > > 	class complex_fma_pattern, vect_slp_reset_pattern,
> > > > 	complex_fma_pattern::matches, complex_fma_pattern::recognize,
> > > > 	complex_fma_pattern::build): New.
> > > >
> > > > --- inline copy of patch --
> > > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index
> > > >
> > >
> b8cc90e1a75e402abbf8a8cf2efefc1a333f8b3a..6d5a98c4946d3ff4c2b8abea5c
> > > 2
> > > 9
> > > > caa6863fd3f7 100644
> > > > --- a/gcc/doc/md.texi
> > > > +++ b/gcc/doc/md.texi
> > > > @@ -6202,6 +6202,51 @@ The operation is only supported for vector
> > > modes @var{m}.
> > > >
> > > >  This pattern is not allowed to @code{FAIL}.
> > > >
> > > > +@cindex @code{cmla@var{m}4} instruction pattern @item
> > > > +@samp{cmla@var{m}4} Perform a vector multiply and accumulate
> that
> > > > +is semantically the same as a multiply and accumulate of complex
> > > > +numbers.
> > > > +
> > > > +@smallexample
> > > > +  complex TYPE c[N];
> > > > +  complex TYPE a[N];
> > > > +  complex TYPE b[N];
> > > > +  for (int i = 0; i < N; i += 1)
> > > > +    @{
> > > > +      c[i] += a[i] * b[i];
> > > > +    @}
> > > > +@end smallexample
> > > > +
> > > > +In GCC lane ordering the real part of the number must be in the
> > > > +even lanes with the imaginary part in the odd lanes.
> > > > +
> > > > +The operation is only supported for vector modes @var{m}.
> > > > +
> > > > +This pattern is not allowed to @code{FAIL}.
> > > > +
> > > > +@cindex @code{cmla_conj@var{m}4} instruction pattern @item
> > > > +@samp{cmla_conj@var{m}4} Perform a vector multiply by conjugate
> > > > +and accumulate that is semantically the same as a multiply and
> > > > +accumulate of complex numbers where the second multiply
> arguments is conjugated.
> > > > +
> > > > +@smallexample
> > > > +  complex TYPE c[N];
> > > > +  complex TYPE a[N];
> > > > +  complex TYPE b[N];
> > > > +  for (int i = 0; i < N; i += 1)
> > > > +    @{
> > > > +      c[i] += a[i] * conj (b[i]);
> > > > +    @}
> > > > +@end smallexample
> > > > +
> > > > +In GCC lane ordering the real part of the number must be in the
> > > > +even lanes with the imaginary part in the odd lanes.
> > > > +
> > > > +The operation is only supported for vector modes @var{m}.
> > > > +
> > > > +This pattern is not allowed to @code{FAIL}.
> > > > +
> > > >  @cindex @code{cmul@var{m}4} instruction pattern  @item
> > > > @samp{cmul@var{m}4}  Perform a vector multiply that is
> > > > semantically the same as multiply of diff --git
> > > > a/gcc/internal-fn.def b/gcc/internal-fn.def index
> > > >
> > >
> 5a0bbe3fe5dee591d54130e60f6996b28164ae38..305450e026d4b94ab62ceb9c
> > > a719
> > > > ec5570ff43eb 100644
> > > > --- a/gcc/internal-fn.def
> > > > +++ b/gcc/internal-fn.def
> > > > @@ -288,6 +288,8 @@ DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST,
> ldexp,
> > > > binary)
> > > >
> > > >  /* Ternary math functions.  */
> > > >  DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary)
> > > > +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA, ECF_CONST, cmla,
> ternary)
> > > > +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA_CONJ, ECF_CONST,
> > > cmla_conj,
> > > > +ternary)
> > > >
> > > >  /* Unary integer ops.  */
> > > >  DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb,
> > > unary)
> > > > diff --git a/gcc/optabs.def b/gcc/optabs.def index
> > > >
> > >
> e82396bae1117c6de91304761a560b7fbcb69ce1..8e2758d685ed85e02df10dac
> > > 571e
> > > > b40d45a294ed 100644
> > > > --- a/gcc/optabs.def
> > > > +++ b/gcc/optabs.def
> > > > @@ -294,6 +294,8 @@ OPTAB_D (cadd90_optab, "cadd90$a3")
> OPTAB_D
> > > > (cadd270_optab, "cadd270$a3")  OPTAB_D (cmul_optab, "cmul$a3")
> > > > OPTAB_D (cmul_conj_optab, "cmul_conj$a3")
> > > > +OPTAB_D (cmla_optab, "cmla$a4")
> > > > +OPTAB_D (cmla_conj_optab, "cmla_conj$a4")
> > > >  OPTAB_D (cos_optab, "cos$a2")
> > > >  OPTAB_D (cosh_optab, "cosh$a2")
> > > >  OPTAB_D (exp10_optab, "exp10$a2") diff --git
> > > > a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
> > > > index
> > > >
> > >
> 82721acbab8cf81c4d6f9954c98fb913a7bb6282..3625a80c08e3d70fd362fc52e1
> > > 7e
> > > > 65b3b2c7da83 100644
> > > > --- a/gcc/tree-vect-slp-patterns.c
> > > > +++ b/gcc/tree-vect-slp-patterns.c
> > > > @@ -325,6 +325,24 @@ vect_match_expression_p (slp_tree node,
> > > tree_code code)
> > > >    return true;
> > > >  }
> > > >
> > > > +/* Checks to see if the expression represented by NODE is a call
> > > > +to the
> > > internal
> > > > +   function FN.  */
> > > > +
> > > > +static inline bool
> > > > +vect_match_call_p (slp_tree node, internal_fn fn) {
> > > > +  if (!node
> > > > +      || !SLP_TREE_REPRESENTATIVE (node))
> > > > +    return false;
> > > > +
> > > > +  gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE
> > > (node));
> > > > + if (!expr
> > > > +      || !gimple_call_internal_p (expr, fn))
> > > > +    return false;
> > > > +
> > > > +   return true;
> > > > +}
> > > > +
> > > >  /* Check if the given lane permute in PERMUTES matches an
> > > > alternating
> > > sequence
> > > >     of {even odd even odd ...}.  This to account for unrolled loops.
> Further
> > > >     mode there resulting permute must be linear.   */
> > > > @@ -1081,6 +1099,161 @@ complex_mul_pattern::build (vec_info
> *vinfo)
> > > >    complex_pattern::build (vinfo);  }
> > > >
> > > >
> > >
> +/*********************************************************
> > > ***********
> > > > +***********
> > > > + * complex_fma_pattern class
> > > > +
> > > >
> > >
> +*********************************************************
> > > ************
> > > > +*********/
> > > > +
> > > > +class complex_fma_pattern : public complex_pattern {
> > > > +  protected:
> > > > +    complex_fma_pattern (slp_tree *node, vec<slp_tree> *m_ops,
> > > internal_fn ifn)
> > > > +      : complex_pattern (node, m_ops, ifn)
> > > > +    {
> > > > +      this->m_num_args = 3;
> > > > +    }
> > > > +
> > > > +  public:
> > > > +    void build (vec_info *);
> > > > +    static internal_fn
> > > > +    matches (complex_operation_t op, slp_tree_to_load_perm_map_t
> *,
> > > slp_tree *,
> > > > +	     vec<slp_tree> *);
> > > > +
> > > > +    static vect_pattern*
> > > > +    recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
> > > > +
> > > > +    static vect_pattern*
> > > > +    mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
> > > > +    {
> > > > +      return new complex_fma_pattern (node, m_ops, ifn);
> > > > +    }
> > > > +};
> > > > +
> > > > +/* Helper function to "reset" a previously matched node and undo the
> > > changes
> > > > +   made enough so that the node is treated as an irrelevant node.  */
> > > > +
> > > > +static inline void
> > > > +vect_slp_reset_pattern (slp_tree node) {
> > > > +  stmt_vec_info stmt_info = vect_orig_stmt
> (SLP_TREE_REPRESENTATIVE
> > > > +(node));
> > > > +  STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
> > > > +  STMT_SLP_TYPE (stmt_info) = pure_slp;
> > > > +  SLP_TREE_REPRESENTATIVE (node) = stmt_info; }
> > > > +
> > > > +/* Pattern matcher for trying to match complex multiply and
> accumulate
> > > > +   and multiply and subtract patterns in SLP tree.
> > > > +   If the operation matches then IFN is set to the operation it matched
> and
> > > > +   the arguments to the two replacement statements are put in m_ops.
> > > > +
> > > > +   If no match is found then IFN is set to IFN_LAST and m_ops is
> unchanged.
> > > > +
> > > > +   This function matches the patterns shaped as:
> > > > +
> > > > +   double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
> > > > +   double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
> > > > +
> > > > +   c[i] = c[i] - ax;
> > > > +   c[i+1] = c[i+1] + bx;
> > > > +
> > > > +   If a match occurred then TRUE is returned, else FALSE.  The match is
> > > > +   performed after COMPLEX_MUL which would have done the
> majority of
> > > the work.
> > > > +   This function merely matches an ADD with a COMPLEX_MUL IFN.
> The
> > > initial
> > > > +   match is expected to be in OP1 and the initial match operands in
> > > > + args0.  */
> > > > +
> > > > +internal_fn
> > > > +complex_fma_pattern::matches (complex_operation_t op,
> > > > +			      slp_tree_to_load_perm_map_t * /* perm_cache
> > > */,
> > > > +			      slp_tree *ref_node, vec<slp_tree> *ops) {
> > > > +  internal_fn ifn = IFN_LAST;
> > > > +
> > > > +  /* Find the two components.  We match Complex MUL first which
> > > reduces the
> > > > +     amount of work this pattern has to do.  After that we just match the
> > > > +     head node and we're done.:
> > > > +
> > > > +     * FMA: + +.
> > > > +
> > > > +     We need to ignore the two_operands nodes that may also match.
> > > > +     For that we can check if they have any scalar statements and also
> > > > +     check that it's not a permute node as we're looking for a normal
> > > > +     PLUS_EXPR operation.  */
> > > > +  if (op != CMPLX_NONE)
> > > > +    return IFN_LAST;
> > > > +
> > > > +  /* Find the two components.  We match Complex MUL first which
> > > reduces the
> > > > +     amount of work this pattern has to do.  After that we just match the
> > > > +     head node and we're done.:
> > > > +
> > > > +   * FMA: + + on a non-two_operands node.  */
> > > > +  slp_tree vnode = *ref_node;
> > > > +  if (SLP_TREE_LANE_PERMUTATION (vnode).exists ()
> > > > +      /* Need to exclude the plus two-operands node.  These are not
> > > marked
> > > > +	 so we have to infer it based on conditions.  */
> > > > +      || !SLP_TREE_SCALAR_STMTS (vnode).exists ()
> > >
> > > as said earlier we shouldn't test this.  The existing lane permute should
> > > already cover this - where the test would better be
> > >
> > >  SLP_TREE_CODE (vnode) == VEC_PERM_EXPR
> > >
> > > > +      || !vect_match_expression_p (vnode, PLUS_EXPR))
> > >
> > > But then it shouldn't match this (the vect_match_expression_p should
> only
> > > ever match SLP_TREE_CODE (vnode) != VEC_PERM_EXPR) anyway.
> > >
> >
> > How so? An FMA doesn't have a TWO_OPERANDS node as the root since
> the operations
> > Are always two PLUS operations.
> >
> > The corresponding tree is
> >
> > note:   SLP size 10 vs. limit 24.
> > note:   Final SLP tree for instance 0x48f68d0:
> > note:   node 0x4809870 (max_nunits=4, refcnt=2)
> > note:   op template: REALPART_EXPR <*_3> = _31;
> > note:     stmt 0 REALPART_EXPR <*_3> = _31;
> > note:     stmt 1 IMAGPART_EXPR <*_3> = _32;
> > note:     children 0x48098f8
> > note:   node 0x48098f8 (max_nunits=4, refcnt=2)
> > note:   op template: _31 = _12 + _29;
> > note:     stmt 0 _31 = _12 + _29;
> > note:     stmt 1 _32 = _11 + _30;
> > note:     children 0x4809980 0x4809a08
> > note:   node 0x4809980 (max_nunits=4, refcnt=2)
> > note:   op template: _12 = REALPART_EXPR <*_3>;
> > note:     stmt 0 _12 = REALPART_EXPR <*_3>;
> > note:     stmt 1 _11 = IMAGPART_EXPR <*_3>;
> > note:     load permutation { 0 1 }
> > note:   node 0x4809a08 (max_nunits=4, refcnt=2)
> > note:   op: VEC_PERM_EXPR
> > note:     stmt 0 _29 = _25 - _26;
> > note:     stmt 1 _30 = _27 + _28;
> > note:     lane permutation { 0[0] 1[1] }
> > note:     children 0x4809dc0 0x4809e48
> > note:   node 0x4809dc0 (max_nunits=1, refcnt=1)
> > note:   op template: _29 = _25 - _26;
> > note:     { }
> > note:     children 0x4809a90 0x4809c28
> > note:   node 0x4809a90 (max_nunits=4, refcnt=3)
> > note:   op template: _25 = _19 * _22;
> > note:     stmt 0 _25 = _19 * _22;
> > note:     stmt 1 _27 = _20 * _22;
> > note:     children 0x4809b18 0x4809ba0
> > note:   node 0x4809b18 (max_nunits=4, refcnt=2)
> > note:   op template: _19 = REALPART_EXPR <*_7>;
> > note:     stmt 0 _19 = REALPART_EXPR <*_7>;
> > note:     stmt 1 _20 = IMAGPART_EXPR <*_7>;
> > note:     load permutation { 0 1 }
> > note:   node 0x4809ba0 (max_nunits=4, refcnt=2)
> > note:   op template: _22 = REALPART_EXPR <*_5>;
> > note:     stmt 0 _22 = REALPART_EXPR <*_5>;
> > note:     stmt 1 _22 = REALPART_EXPR <*_5>;
> > note:     load permutation { 0 0 }
> > note:   node 0x4809c28 (max_nunits=4, refcnt=3)
> > note:   op template: _26 = _20 * _21;
> > note:     stmt 0 _26 = _20 * _21;
> > note:     stmt 1 _28 = _19 * _21;
> > note:     children 0x4809cb0 0x4809d38
> > note:   node 0x4809cb0 (max_nunits=4, refcnt=2)
> > note:   op template: _20 = IMAGPART_EXPR <*_7>;
> > note:     stmt 0 _20 = IMAGPART_EXPR <*_7>;
> > note:     stmt 1 _19 = REALPART_EXPR <*_7>;
> > note:     load permutation { 1 0 }
> > note:   node 0x4809d38 (max_nunits=4, refcnt=2)
> > note:   op template: _21 = IMAGPART_EXPR <*_5>;
> > note:     stmt 0 _21 = IMAGPART_EXPR <*_5>;
> > note:     stmt 1 _21 = IMAGPART_EXPR <*_5>;
> > note:     load permutation { 1 1 }
> > note:   node 0x4809e48 (max_nunits=1, refcnt=1)
> > note:   op template: _30 = _27 + _28;
> > note:     { }
> > note:     children 0x4809a90 0x4809c28
> >
> > and after matching the MUL all you have are the ADD node going into a
> COMPLEX_MUL node.
> 
> So the above is the original tree - how does the tree look like
> at the point you need to rule out 0x4809e48 (and thus have those
> COMPLEX_MUL ones)?
> 
> As said, !SLP_TREE_SCALAR_STMTS (vnode).exists () is not a good test.

Agreed, Sorry I need to start drinking coffee.. I read the above 

> > > But then it shouldn't match this (the vect_match_expression_p should
> only
> > > ever match SLP_TREE_CODE (vnode) != VEC_PERM_EXPR) anyway.
> 

as SLP_TREE_CODE (vnode) == VEC_PERM_EXPR...

I will go respin patches 😊

Thanks!


> If complex pattern detection doesn't want to see the direct(?) children
> of permute nodes then the pattern machinery should provide means
> to do this.  Likewise if the pattern should not contain parents
> outside of the supposed match the machinery should provide means
> to restrict matches to single entry SLP subgraphs (here the
> children of the plus have an alternate "entry").
> 
> > > > +    return IFN_LAST;
> > > > +
> > > > +  slp_tree node = SLP_TREE_CHILDREN (vnode)[1];
> > > > +
> > > > +  if (vect_match_call_p (node, IFN_COMPLEX_MUL))
> > > > +    ifn = IFN_COMPLEX_FMA;
> > > > +  else if (vect_match_call_p (node, IFN_COMPLEX_MUL_CONJ))
> > > > +    ifn = IFN_COMPLEX_FMA_CONJ;
> > > > +  else
> > > > +    return IFN_LAST;
> > > > +
> > > > +  if (!vect_pattern_validate_optab (ifn, vnode))
> > > > +    return IFN_LAST;
> > > > +
> > > > +  vect_slp_reset_pattern (node);
> > >
> > > I don't understand this ... it deserves a comment at least.
> >
> > The previous pass detecting COMPLEX_MUL would have marked the
> > Instructions as being inside of a MUL pattern.  These need to be unmarked
> > As being part of the COMPLEX_MUL and instead be marked as
> COMPLEX_FMA.
> 
> The scalar pattern code adjusts this at the point it marks the
> pattern "consuming" the earlier pattern.   We should try follow that
> style here I think.
> 
> > > Having no testcases with this patch makes it impossible for me to dig in
> > > myself :/
> >
> > Sorry, the tests would have made the file too big again.. The previous test
> for complex add
> > Added gcc/testsuite/gcc.dg/vect/complex/complex-operations.c which is
> an overarching test
> > Testing everything in one go.
> >
> > The individual tests are split off from that large test.
> >
> > >
> > > Otherwise looks OK.
> > >
> > > Thanks,
> > > Richard.
> > >
> > > > +  ops->truncate (0);
> > > > +  ops->create (3);
> > > > +
> > > > +  if (ifn == IFN_COMPLEX_FMA)
> > > > +    {
> > > > +      ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
> > > > +      ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
> > > > +      ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
> > > > +    }
> > > > +  else
> > > > +    {
> > > > +      ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
> > > > +      ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
> > > > +      ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
> > > > +    }
> > > > +
> > > > +  return ifn;
> > > > +}
> > > > +
> > > > +/* Attempt to recognize a complex mul pattern.  */
> > > > +
> > > > +vect_pattern*
> > > > +complex_fma_pattern::recognize (slp_tree_to_load_perm_map_t
> > > *perm_cache,
> > > > +				slp_tree *node)
> > > > +{
> > > > +  auto_vec<slp_tree> ops;
> > > > +  complex_operation_t op
> > > > +    = vect_detect_pair_op (*node, true, &ops);
> > > > +  internal_fn ifn
> > > > +    = complex_fma_pattern::matches (op, perm_cache, node, &ops);
> > > > +  if (ifn == IFN_LAST)
> > > > +    return NULL;
> > > > +
> > > > +  return new complex_fma_pattern (node, &ops, ifn); }
> > > > +
> > > > +/* Perform a replacement of the detected complex mul pattern with
> the
> > > new
> > > > +   instruction sequences.  */
> > > > +
> > > > +void
> > > > +complex_fma_pattern::build (vec_info *vinfo) {
> > > > +  SLP_TREE_CHILDREN (*this->m_node).truncate (0);
> > > > +  SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops);
> > > > +
> > > > +  complex_pattern::build (vinfo);
> > > > +}
> > > > +
> > > >
> > >
> /**********************************************************
> > > *********************
> > > >   * Pattern matching definitions
> > > >
> > > >
> > >
> **********************************************************
> > > ************
> > > > ********/
> > > >
> > > >
> > > >
> > >
> > > --
> > > Richard Biener <rguenther@suse.de>
> > > SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409
> > > Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
> >
> 
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409
> Nuernberg,
> Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
Tamar Christina Jan. 11, 2021, 10:24 a.m. UTC | #5
Hi Richi,

> -----Original Message-----
> From: Richard Biener <rguenther@suse.de>
> Sent: Friday, January 8, 2021 9:45 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; ook@ucw.cz
> Subject: Re: [PATCH 6/8 v9]middle-end slp: support complex FMA and
> complex FMA conjugate
> 
> On Mon, 28 Dec 2020, Tamar Christina wrote:
> 
> > Hi All,
> >
> > This adds support for FMA and FMA conjugated to the slp pattern matcher.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> > and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > 	* internal-fn.def (COMPLEX_FMA, COMPLEX_FMA_CONJ): New.
> > 	* optabs.def (cmla_optab, cmla_conj_optab): New.
> > 	* doc/md.texi: Document them.
> > 	* tree-vect-slp-patterns.c (vect_match_call_p,
> > 	class complex_fma_pattern, vect_slp_reset_pattern,
> > 	complex_fma_pattern::matches, complex_fma_pattern::recognize,
> > 	complex_fma_pattern::build): New.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index
> >
> b8cc90e1a75e402abbf8a8cf2efefc1a333f8b3a..6d5a98c4946d3ff4c2b8abea5c2
> 9
> > caa6863fd3f7 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -6202,6 +6202,51 @@ The operation is only supported for vector
> modes @var{m}.
> >
> >  This pattern is not allowed to @code{FAIL}.
> >
> > +@cindex @code{cmla@var{m}4} instruction pattern @item
> > +@samp{cmla@var{m}4} Perform a vector multiply and accumulate that is
> > +semantically the same as a multiply and accumulate of complex
> > +numbers.
> > +
> > +@smallexample
> > +  complex TYPE c[N];
> > +  complex TYPE a[N];
> > +  complex TYPE b[N];
> > +  for (int i = 0; i < N; i += 1)
> > +    @{
> > +      c[i] += a[i] * b[i];
> > +    @}
> > +@end smallexample
> > +
> > +In GCC lane ordering the real part of the number must be in the even
> > +lanes with the imaginary part in the odd lanes.
> > +
> > +The operation is only supported for vector modes @var{m}.
> > +
> > +This pattern is not allowed to @code{FAIL}.
> > +
> > +@cindex @code{cmla_conj@var{m}4} instruction pattern @item
> > +@samp{cmla_conj@var{m}4} Perform a vector multiply by conjugate and
> > +accumulate that is semantically the same as a multiply and accumulate
> > +of complex numbers where the second multiply arguments is conjugated.
> > +
> > +@smallexample
> > +  complex TYPE c[N];
> > +  complex TYPE a[N];
> > +  complex TYPE b[N];
> > +  for (int i = 0; i < N; i += 1)
> > +    @{
> > +      c[i] += a[i] * conj (b[i]);
> > +    @}
> > +@end smallexample
> > +
> > +In GCC lane ordering the real part of the number must be in the even
> > +lanes with the imaginary part in the odd lanes.
> > +
> > +The operation is only supported for vector modes @var{m}.
> > +
> > +This pattern is not allowed to @code{FAIL}.
> > +
> >  @cindex @code{cmul@var{m}4} instruction pattern  @item
> > @samp{cmul@var{m}4}  Perform a vector multiply that is semantically
> > the same as multiply of diff --git a/gcc/internal-fn.def
> > b/gcc/internal-fn.def index
> >
> 5a0bbe3fe5dee591d54130e60f6996b28164ae38..305450e026d4b94ab62ceb9c
> a719
> > ec5570ff43eb 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -288,6 +288,8 @@ DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp,
> > binary)
> >
> >  /* Ternary math functions.  */
> >  DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary)
> > +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA, ECF_CONST, cmla, ternary)
> > +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA_CONJ, ECF_CONST,
> cmla_conj,
> > +ternary)
> >
> >  /* Unary integer ops.  */
> >  DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb,
> unary)
> > diff --git a/gcc/optabs.def b/gcc/optabs.def index
> >
> e82396bae1117c6de91304761a560b7fbcb69ce1..8e2758d685ed85e02df10dac
> 571e
> > b40d45a294ed 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -294,6 +294,8 @@ OPTAB_D (cadd90_optab, "cadd90$a3")  OPTAB_D
> > (cadd270_optab, "cadd270$a3")  OPTAB_D (cmul_optab, "cmul$a3")
> > OPTAB_D (cmul_conj_optab, "cmul_conj$a3")
> > +OPTAB_D (cmla_optab, "cmla$a4")
> > +OPTAB_D (cmla_conj_optab, "cmla_conj$a4")
> >  OPTAB_D (cos_optab, "cos$a2")
> >  OPTAB_D (cosh_optab, "cosh$a2")
> >  OPTAB_D (exp10_optab, "exp10$a2")
> > diff --git a/gcc/tree-vect-slp-patterns.c
> > b/gcc/tree-vect-slp-patterns.c index
> >
> 82721acbab8cf81c4d6f9954c98fb913a7bb6282..3625a80c08e3d70fd362fc52e1
> 7e
> > 65b3b2c7da83 100644
> > --- a/gcc/tree-vect-slp-patterns.c
> > +++ b/gcc/tree-vect-slp-patterns.c
> > @@ -325,6 +325,24 @@ vect_match_expression_p (slp_tree node,
> tree_code code)
> >    return true;
> >  }
> >
> > +/* Checks to see if the expression represented by NODE is a call to the
> internal
> > +   function FN.  */
> > +
> > +static inline bool
> > +vect_match_call_p (slp_tree node, internal_fn fn) {
> > +  if (!node
> > +      || !SLP_TREE_REPRESENTATIVE (node))
> > +    return false;
> > +
> > +  gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE
> (node));
> > + if (!expr
> > +      || !gimple_call_internal_p (expr, fn))
> > +    return false;
> > +
> > +   return true;
> > +}
> > +
> >  /* Check if the given lane permute in PERMUTES matches an alternating
> sequence
> >     of {even odd even odd ...}.  This to account for unrolled loops.  Further
> >     mode there resulting permute must be linear.   */
> > @@ -1081,6 +1099,161 @@ complex_mul_pattern::build (vec_info *vinfo)
> >    complex_pattern::build (vinfo);
> >  }
> >
> >
> +/*********************************************************
> ***********
> > +***********
> > + * complex_fma_pattern class
> > +
> >
> +*********************************************************
> ************
> > +*********/
> > +
> > +class complex_fma_pattern : public complex_pattern {
> > +  protected:
> > +    complex_fma_pattern (slp_tree *node, vec<slp_tree> *m_ops,
> internal_fn ifn)
> > +      : complex_pattern (node, m_ops, ifn)
> > +    {
> > +      this->m_num_args = 3;
> > +    }
> > +
> > +  public:
> > +    void build (vec_info *);
> > +    static internal_fn
> > +    matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
> slp_tree *,
> > +	     vec<slp_tree> *);
> > +
> > +    static vect_pattern*
> > +    recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
> > +
> > +    static vect_pattern*
> > +    mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
> > +    {
> > +      return new complex_fma_pattern (node, m_ops, ifn);
> > +    }
> > +};
> > +
> > +/* Helper function to "reset" a previously matched node and undo the
> changes
> > +   made enough so that the node is treated as an irrelevant node.  */
> > +
> > +static inline void
> > +vect_slp_reset_pattern (slp_tree node) {
> > +  stmt_vec_info stmt_info = vect_orig_stmt (SLP_TREE_REPRESENTATIVE
> > +(node));
> > +  STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
> > +  STMT_SLP_TYPE (stmt_info) = pure_slp;
> > +  SLP_TREE_REPRESENTATIVE (node) = stmt_info; }
> > +
> > +/* Pattern matcher for trying to match complex multiply and accumulate
> > +   and multiply and subtract patterns in SLP tree.
> > +   If the operation matches then IFN is set to the operation it matched and
> > +   the arguments to the two replacement statements are put in m_ops.
> > +
> > +   If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
> > +
> > +   This function matches the patterns shaped as:
> > +
> > +   double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
> > +   double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
> > +
> > +   c[i] = c[i] - ax;
> > +   c[i+1] = c[i+1] + bx;
> > +
> > +   If a match occurred then TRUE is returned, else FALSE.  The match is
> > +   performed after COMPLEX_MUL which would have done the majority of
> the work.
> > +   This function merely matches an ADD with a COMPLEX_MUL IFN.  The
> initial
> > +   match is expected to be in OP1 and the initial match operands in
> > + args0.  */
> > +
> > +internal_fn
> > +complex_fma_pattern::matches (complex_operation_t op,
> > +			      slp_tree_to_load_perm_map_t * /* perm_cache
> */,
> > +			      slp_tree *ref_node, vec<slp_tree> *ops) {
> > +  internal_fn ifn = IFN_LAST;
> > +
> > +  /* Find the two components.  We match Complex MUL first which
> reduces the
> > +     amount of work this pattern has to do.  After that we just match the
> > +     head node and we're done.:
> > +
> > +     * FMA: + +.
> > +
> > +     We need to ignore the two_operands nodes that may also match.
> > +     For that we can check if they have any scalar statements and also
> > +     check that it's not a permute node as we're looking for a normal
> > +     PLUS_EXPR operation.  */
> > +  if (op != CMPLX_NONE)
> > +    return IFN_LAST;
> > +
> > +  /* Find the two components.  We match Complex MUL first which
> reduces the
> > +     amount of work this pattern has to do.  After that we just match the
> > +     head node and we're done.:
> > +
> > +   * FMA: + + on a non-two_operands node.  */
> > +  slp_tree vnode = *ref_node;
> > +  if (SLP_TREE_LANE_PERMUTATION (vnode).exists ()
> > +      /* Need to exclude the plus two-operands node.  These are not
> marked
> > +	 so we have to infer it based on conditions.  */
> > +      || !SLP_TREE_SCALAR_STMTS (vnode).exists ()
> 
> as said earlier we shouldn't test this.  The existing lane permute should
> already cover this - where the test would better be
> 
>  SLP_TREE_CODE (vnode) == VEC_PERM_EXPR
> 
> > +      || !vect_match_expression_p (vnode, PLUS_EXPR))
> 
> But then it shouldn't match this (the vect_match_expression_p should only
> ever match SLP_TREE_CODE (vnode) != VEC_PERM_EXPR) anyway.
> 
> > +    return IFN_LAST;
> > +
> > +  slp_tree node = SLP_TREE_CHILDREN (vnode)[1];
> > +
> > +  if (vect_match_call_p (node, IFN_COMPLEX_MUL))
> > +    ifn = IFN_COMPLEX_FMA;
> > +  else if (vect_match_call_p (node, IFN_COMPLEX_MUL_CONJ))
> > +    ifn = IFN_COMPLEX_FMA_CONJ;
> > +  else
> > +    return IFN_LAST;
> > +
> > +  if (!vect_pattern_validate_optab (ifn, vnode))
> > +    return IFN_LAST;
> > +
> > +  vect_slp_reset_pattern (node);
> 
> I don't understand this ... it deserves a comment at least.
> Having no testcases with this patch makes it impossible for me to dig in
> myself :/

I cleaned up the things pointed out in the review and added this comment:

  /* FMA matched ADD + CMUL.  During the matching of CMUL the
     stmt that starts the pattern is marked as being in a pattern,
     namely the CMUL.  When replacing this with a CFMA we have to
     unmark this statement as being in a pattern.  This is because
     vect_mark_pattern_stmts will only mark the current stmt as being
     in a pattern.  Later on when the scalar stmts are examined the
     old statement which is supposed to be irrelevant will point to
     CMUL unless we undo the pattern relationship here.  */
  vect_slp_reset_pattern (node);

As for testcases, they're quite simple, the final commit will contain a full range.
I usually try to send the testcases a long but the number of ISA this supports at
Once has forced me to test outside of the testsuite for quick iteration and moving
them into the testsuite proper is taking a bit of time :/

But they're all in the shape of

#include <stdio.h>
#include <complex.h>

#define N 200
#define ROT
#define TYPE float
#define TYPE2 float

void g (TYPE2 complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
{
  for (int i=0; i < N; i++)
    {
      c[i] +=  a[i] * (b[i] ROT);
    }
}

void g_f1 (TYPE2 complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
{
  for (int i=0; i < N; i++)
    {
      c[i] +=  conjf (a[i]) * (b[i] ROT);
    }
}

void g_s1 (TYPE2 complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
{
  for (int i=0; i < N; i++)
    {
      c[i] +=  a[i] * conjf (b[i] ROT);
    }
}

void caxpy_add(double complex * restrict y, double complex * restrict x, size_t N, double complex f) {
  for (size_t i = 0; i < N; ++i)
    y[i] += x[i]* f;
}

Regards,
Tamar

> 
> Otherwise looks OK.
> 
> Thanks,
> Richard.
> 
> > +  ops->truncate (0);
> > +  ops->create (3);
> > +
> > +  if (ifn == IFN_COMPLEX_FMA)
> > +    {
> > +      ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
> > +      ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
> > +      ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
> > +    }
> > +  else
> > +    {
> > +      ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
> > +      ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
> > +      ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
> > +    }
> > +
> > +  return ifn;
> > +}
> > +
> > +/* Attempt to recognize a complex mul pattern.  */
> > +
> > +vect_pattern*
> > +complex_fma_pattern::recognize (slp_tree_to_load_perm_map_t
> *perm_cache,
> > +				slp_tree *node)
> > +{
> > +  auto_vec<slp_tree> ops;
> > +  complex_operation_t op
> > +    = vect_detect_pair_op (*node, true, &ops);
> > +  internal_fn ifn
> > +    = complex_fma_pattern::matches (op, perm_cache, node, &ops);
> > +  if (ifn == IFN_LAST)
> > +    return NULL;
> > +
> > +  return new complex_fma_pattern (node, &ops, ifn); }
> > +
> > +/* Perform a replacement of the detected complex mul pattern with the
> new
> > +   instruction sequences.  */
> > +
> > +void
> > +complex_fma_pattern::build (vec_info *vinfo) {
> > +  SLP_TREE_CHILDREN (*this->m_node).truncate (0);
> > +  SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops);
> > +
> > +  complex_pattern::build (vinfo);
> > +}
> > +
> >
> /**********************************************************
> *********************
> >   * Pattern matching definitions
> >
> >
> **********************************************************
> ************
> > ********/
> >
> >
> >
> 
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409
> Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
diff mbox series

Patch

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index b8cc90e1a75e402abbf8a8cf2efefc1a333f8b3a..6d5a98c4946d3ff4c2b8abea5c29caa6863fd3f7 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6202,6 +6202,51 @@  The operation is only supported for vector modes @var{m}.
 
 This pattern is not allowed to @code{FAIL}.
 
+@cindex @code{cmla@var{m}4} instruction pattern
+@item @samp{cmla@var{m}4}
+Perform a vector multiply and accumulate that is semantically the same as
+a multiply and accumulate of complex numbers.
+
+@smallexample
+  complex TYPE c[N];
+  complex TYPE a[N];
+  complex TYPE b[N];
+  for (int i = 0; i < N; i += 1)
+    @{
+      c[i] += a[i] * b[i];
+    @}
+@end smallexample
+
+In GCC lane ordering the real part of the number must be in the even lanes with
+the imaginary part in the odd lanes.
+
+The operation is only supported for vector modes @var{m}.
+
+This pattern is not allowed to @code{FAIL}.
+
+@cindex @code{cmla_conj@var{m}4} instruction pattern
+@item @samp{cmla_conj@var{m}4}
+Perform a vector multiply by conjugate and accumulate that is semantically
+the same as a multiply and accumulate of complex numbers where the second
+multiply arguments is conjugated.
+
+@smallexample
+  complex TYPE c[N];
+  complex TYPE a[N];
+  complex TYPE b[N];
+  for (int i = 0; i < N; i += 1)
+    @{
+      c[i] += a[i] * conj (b[i]);
+    @}
+@end smallexample
+
+In GCC lane ordering the real part of the number must be in the even lanes with
+the imaginary part in the odd lanes.
+
+The operation is only supported for vector modes @var{m}.
+
+This pattern is not allowed to @code{FAIL}.
+
 @cindex @code{cmul@var{m}4} instruction pattern
 @item @samp{cmul@var{m}4}
 Perform a vector multiply that is semantically the same as multiply of
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 5a0bbe3fe5dee591d54130e60f6996b28164ae38..305450e026d4b94ab62ceb9ca719ec5570ff43eb 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -288,6 +288,8 @@  DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary)
 
 /* Ternary math functions.  */
 DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary)
+DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA, ECF_CONST, cmla, ternary)
+DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA_CONJ, ECF_CONST, cmla_conj, ternary)
 
 /* Unary integer ops.  */
 DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index e82396bae1117c6de91304761a560b7fbcb69ce1..8e2758d685ed85e02df10dac571eb40d45a294ed 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -294,6 +294,8 @@  OPTAB_D (cadd90_optab, "cadd90$a3")
 OPTAB_D (cadd270_optab, "cadd270$a3")
 OPTAB_D (cmul_optab, "cmul$a3")
 OPTAB_D (cmul_conj_optab, "cmul_conj$a3")
+OPTAB_D (cmla_optab, "cmla$a4")
+OPTAB_D (cmla_conj_optab, "cmla_conj$a4")
 OPTAB_D (cos_optab, "cos$a2")
 OPTAB_D (cosh_optab, "cosh$a2")
 OPTAB_D (exp10_optab, "exp10$a2")
diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
index 82721acbab8cf81c4d6f9954c98fb913a7bb6282..3625a80c08e3d70fd362fc52e17e65b3b2c7da83 100644
--- a/gcc/tree-vect-slp-patterns.c
+++ b/gcc/tree-vect-slp-patterns.c
@@ -325,6 +325,24 @@  vect_match_expression_p (slp_tree node, tree_code code)
   return true;
 }
 
+/* Checks to see if the expression represented by NODE is a call to the internal
+   function FN.  */
+
+static inline bool
+vect_match_call_p (slp_tree node, internal_fn fn)
+{
+  if (!node
+      || !SLP_TREE_REPRESENTATIVE (node))
+    return false;
+
+  gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
+  if (!expr
+      || !gimple_call_internal_p (expr, fn))
+    return false;
+
+   return true;
+}
+
 /* Check if the given lane permute in PERMUTES matches an alternating sequence
    of {even odd even odd ...}.  This to account for unrolled loops.  Further
    mode there resulting permute must be linear.   */
@@ -1081,6 +1099,161 @@  complex_mul_pattern::build (vec_info *vinfo)
   complex_pattern::build (vinfo);
 }
 
+/*******************************************************************************
+ * complex_fma_pattern class
+ ******************************************************************************/
+
+class complex_fma_pattern : public complex_pattern
+{
+  protected:
+    complex_fma_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
+      : complex_pattern (node, m_ops, ifn)
+    {
+      this->m_num_args = 3;
+    }
+
+  public:
+    void build (vec_info *);
+    static internal_fn
+    matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
+	     vec<slp_tree> *);
+
+    static vect_pattern*
+    recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
+
+    static vect_pattern*
+    mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
+    {
+      return new complex_fma_pattern (node, m_ops, ifn);
+    }
+};
+
+/* Helper function to "reset" a previously matched node and undo the changes
+   made enough so that the node is treated as an irrelevant node.  */
+
+static inline void
+vect_slp_reset_pattern (slp_tree node)
+{
+  stmt_vec_info stmt_info = vect_orig_stmt (SLP_TREE_REPRESENTATIVE (node));
+  STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
+  STMT_SLP_TYPE (stmt_info) = pure_slp;
+  SLP_TREE_REPRESENTATIVE (node) = stmt_info;
+}
+
+/* Pattern matcher for trying to match complex multiply and accumulate
+   and multiply and subtract patterns in SLP tree.
+   If the operation matches then IFN is set to the operation it matched and
+   the arguments to the two replacement statements are put in m_ops.
+
+   If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
+
+   This function matches the patterns shaped as:
+
+   double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
+   double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
+
+   c[i] = c[i] - ax;
+   c[i+1] = c[i+1] + bx;
+
+   If a match occurred then TRUE is returned, else FALSE.  The match is
+   performed after COMPLEX_MUL which would have done the majority of the work.
+   This function merely matches an ADD with a COMPLEX_MUL IFN.  The initial
+   match is expected to be in OP1 and the initial match operands in args0.  */
+
+internal_fn
+complex_fma_pattern::matches (complex_operation_t op,
+			      slp_tree_to_load_perm_map_t * /* perm_cache */,
+			      slp_tree *ref_node, vec<slp_tree> *ops)
+{
+  internal_fn ifn = IFN_LAST;
+
+  /* Find the two components.  We match Complex MUL first which reduces the
+     amount of work this pattern has to do.  After that we just match the
+     head node and we're done.:
+
+     * FMA: + +.
+
+     We need to ignore the two_operands nodes that may also match.
+     For that we can check if they have any scalar statements and also
+     check that it's not a permute node as we're looking for a normal
+     PLUS_EXPR operation.  */
+  if (op != CMPLX_NONE)
+    return IFN_LAST;
+
+  /* Find the two components.  We match Complex MUL first which reduces the
+     amount of work this pattern has to do.  After that we just match the
+     head node and we're done.:
+
+   * FMA: + + on a non-two_operands node.  */
+  slp_tree vnode = *ref_node;
+  if (SLP_TREE_LANE_PERMUTATION (vnode).exists ()
+      /* Need to exclude the plus two-operands node.  These are not marked
+	 so we have to infer it based on conditions.  */
+      || !SLP_TREE_SCALAR_STMTS (vnode).exists ()
+      || !vect_match_expression_p (vnode, PLUS_EXPR))
+    return IFN_LAST;
+
+  slp_tree node = SLP_TREE_CHILDREN (vnode)[1];
+
+  if (vect_match_call_p (node, IFN_COMPLEX_MUL))
+    ifn = IFN_COMPLEX_FMA;
+  else if (vect_match_call_p (node, IFN_COMPLEX_MUL_CONJ))
+    ifn = IFN_COMPLEX_FMA_CONJ;
+  else
+    return IFN_LAST;
+
+  if (!vect_pattern_validate_optab (ifn, vnode))
+    return IFN_LAST;
+
+  vect_slp_reset_pattern (node);
+  ops->truncate (0);
+  ops->create (3);
+
+  if (ifn == IFN_COMPLEX_FMA)
+    {
+      ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
+      ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
+      ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
+    }
+  else
+    {
+      ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
+      ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
+      ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
+    }
+
+  return ifn;
+}
+
+/* Attempt to recognize a complex mul pattern.  */
+
+vect_pattern*
+complex_fma_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
+				slp_tree *node)
+{
+  auto_vec<slp_tree> ops;
+  complex_operation_t op
+    = vect_detect_pair_op (*node, true, &ops);
+  internal_fn ifn
+    = complex_fma_pattern::matches (op, perm_cache, node, &ops);
+  if (ifn == IFN_LAST)
+    return NULL;
+
+  return new complex_fma_pattern (node, &ops, ifn);
+}
+
+/* Perform a replacement of the detected complex mul pattern with the new
+   instruction sequences.  */
+
+void
+complex_fma_pattern::build (vec_info *vinfo)
+{
+  SLP_TREE_CHILDREN (*this->m_node).truncate (0);
+  SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops);
+
+  complex_pattern::build (vinfo);
+}
+
 /*******************************************************************************
  * Pattern matching definitions
  ******************************************************************************/