diff mbox series

[7/8,v9] middle-end slp: support complex FMS and complex FMS conjugate

Message ID 20201228133806.GA32350@arm.com
State New
Headers show
Series [1/8,v9] middle-end slp: Support optimizing load distribution | expand

Commit Message

Tamar Christina Dec. 28, 2020, 1:38 p.m. UTC
Hi All,

This adds support for FMS and FMS conjugated to the slp pattern matcher.

Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* internal-fn.def (COMPLEX_FMS, COMPLEX_FMS_CONJ): New.
	* optabs.def (cmls_optab, cmls_conj_optab): New.
	* doc/md.texi: Document them.
	* tree-vect-slp-patterns.c (class complex_fms_pattern,
	complex_fms_pattern::matches, complex_fms_pattern::recognize,
	complex_fms_pattern::build): New.

--- inline copy of patch -- 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 6d5a98c4946d3ff4c2b8abea5c29caa6863fd3f7..3f5a42df285b3ee162edc9ec661f25c0eec5e4fa 100644


--

Comments

Richard Biener Jan. 8, 2021, 9:49 a.m. UTC | #1
On Mon, 28 Dec 2020, Tamar Christina wrote:

> Hi All,
> 
> This adds support for FMS and FMS conjugated to the slp pattern matcher.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> and no issues.
> 
> Ok for master?

Interestingly this patch looks different from the FMA one(!?).  I
would have expected to have the same pattern for FMA and FMS in the
end.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* internal-fn.def (COMPLEX_FMS, COMPLEX_FMS_CONJ): New.
> 	* optabs.def (cmls_optab, cmls_conj_optab): New.
> 	* doc/md.texi: Document them.
> 	* tree-vect-slp-patterns.c (class complex_fms_pattern,
> 	complex_fms_pattern::matches, complex_fms_pattern::recognize,
> 	complex_fms_pattern::build): New.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 6d5a98c4946d3ff4c2b8abea5c29caa6863fd3f7..3f5a42df285b3ee162edc9ec661f25c0eec5e4fa 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6247,6 +6247,51 @@ The operation is only supported for vector modes @var{m}.
>  
>  This pattern is not allowed to @code{FAIL}.
>  
> +@cindex @code{cmls@var{m}4} instruction pattern
> +@item @samp{cmls@var{m}4}
> +Perform a vector multiply and subtract that is semantically the same as
> +a multiply and subtract of complex numbers.
> +
> +@smallexample
> +  complex TYPE c[N];
> +  complex TYPE a[N];
> +  complex TYPE b[N];
> +  for (int i = 0; i < N; i += 1)
> +    @{
> +      c[i] -= a[i] * b[i];
> +    @}
> +@end smallexample
> +
> +In GCC lane ordering the real part of the number must be in the even lanes with
> +the imaginary part in the odd lanes.
> +
> +The operation is only supported for vector modes @var{m}.
> +
> +This pattern is not allowed to @code{FAIL}.
> +
> +@cindex @code{cmls_conj@var{m}4} instruction pattern
> +@item @samp{cmls_conj@var{m}4}
> +Perform a vector multiply by conjugate and subtract that is semantically
> +the same as a multiply and subtract of complex numbers where the second
> +multiply arguments is conjugated.
> +
> +@smallexample
> +  complex TYPE c[N];
> +  complex TYPE a[N];
> +  complex TYPE b[N];
> +  for (int i = 0; i < N; i += 1)
> +    @{
> +      c[i] -= a[i] * conj (b[i]);
> +    @}
> +@end smallexample
> +
> +In GCC lane ordering the real part of the number must be in the even lanes with
> +the imaginary part in the odd lanes.
> +
> +The operation is only supported for vector modes @var{m}.
> +
> +This pattern is not allowed to @code{FAIL}.
> +
>  @cindex @code{cmul@var{m}4} instruction pattern
>  @item @samp{cmul@var{m}4}
>  Perform a vector multiply that is semantically the same as multiply of
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 305450e026d4b94ab62ceb9ca719ec5570ff43eb..c8161509d9497afe58f32bde12d8e6bd7b876a3c 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -290,6 +290,8 @@ DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary)
>  DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary)
>  DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA, ECF_CONST, cmla, ternary)
>  DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA_CONJ, ECF_CONST, cmla_conj, ternary)
> +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMS, ECF_CONST, cmls, ternary)
> +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMS_CONJ, ECF_CONST, cmls_conj, ternary)
>  
>  /* Unary integer ops.  */
>  DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 8e2758d685ed85e02df10dac571eb40d45a294ed..320bb5f3dce31867d312bbbb6a4c6e31c534254e 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -296,6 +296,8 @@ OPTAB_D (cmul_optab, "cmul$a3")
>  OPTAB_D (cmul_conj_optab, "cmul_conj$a3")
>  OPTAB_D (cmla_optab, "cmla$a4")
>  OPTAB_D (cmla_conj_optab, "cmla_conj$a4")
> +OPTAB_D (cmls_optab, "cmls$a4")
> +OPTAB_D (cmls_conj_optab, "cmls_conj$a4")
>  OPTAB_D (cos_optab, "cos$a2")
>  OPTAB_D (cosh_optab, "cosh$a2")
>  OPTAB_D (exp10_optab, "exp10$a2")
> diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
> index 3625a80c08e3d70fd362fc52e17e65b3b2c7da83..ab6587f0b8522ec5f916f74e7e7401b1f7a35bbb 100644
> --- a/gcc/tree-vect-slp-patterns.c
> +++ b/gcc/tree-vect-slp-patterns.c
> @@ -1254,6 +1254,181 @@ complex_fma_pattern::build (vec_info *vinfo)
>    complex_pattern::build (vinfo);
>  }
>  
> +/*******************************************************************************
> + * complex_fms_pattern class
> + ******************************************************************************/
> +
> +class complex_fms_pattern : public complex_pattern
> +{
> +  protected:
> +    complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
> +      : complex_pattern (node, m_ops, ifn)
> +    {
> +      this->m_num_args = 3;
> +    }
> +
> +  public:
> +    void build (vec_info *);
> +    static internal_fn
> +    matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
> +	     vec<slp_tree> *);
> +
> +    static vect_pattern*
> +    recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
> +
> +    static vect_pattern*
> +    mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
> +    {
> +      return new complex_fms_pattern (node, m_ops, ifn);
> +    }
> +};
> +
> +
> +/* Pattern matcher for trying to match complex multiply and accumulate
> +   and multiply and subtract patterns in SLP tree.
> +   If the operation matches then IFN is set to the operation it matched and
> +   the arguments to the two replacement statements are put in m_ops.
> +
> +   If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
> +
> +   This function matches the patterns shaped as:
> +
> +   double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
> +   double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
> +
> +   c[i] = c[i] - ax;
> +   c[i+1] = c[i+1] + bx;
> +
> +   If a match occurred then TRUE is returned, else FALSE.  The initial match is
> +   expected to be in OP1 and the initial match operands in args0.  */
> +
> +internal_fn
> +complex_fms_pattern::matches (complex_operation_t op,
> +			      slp_tree_to_load_perm_map_t *perm_cache,
> +			      slp_tree * ref_node, vec<slp_tree> *ops)
> +{
> +  internal_fn ifn = IFN_LAST;
> +
> +  /* Find the two components.  We match Complex MUL first which reduces the
> +     amount of work this pattern has to do.  After that we just match the
> +     head node and we're done.:
> +
> +     * FMS: - +.  */
> +  slp_tree child = NULL;
> +
> +  /* We need to ignore the two_operands nodes that may also match,
> +     for that we can check if they have any scalar statements and also
> +     check that it's not a permute node as we're looking for a normal
> +     PLUS_EXPR operation.  */
> +  if (op != PLUS_MINUS)
> +    return IFN_LAST;
> +
> +  child = SLP_TREE_CHILDREN ((*ops)[1])[1];
> +  if (vect_detect_pair_op (child) != MINUS_PLUS)
> +    return IFN_LAST;
> +
> +  /* First two nodes must be a multiply.  */
> +  auto_vec<slp_tree> muls;
> +  if (vect_match_call_complex_mla (child, 0) != MULT_MULT
> +      || vect_match_call_complex_mla (child, 1, &muls) != MULT_MULT)
> +    return IFN_LAST;
> +
> +  /* Now operand2+4 may lead to another expression.  */
> +  auto_vec<slp_tree> left_op, right_op;
> +  left_op.safe_splice (SLP_TREE_CHILDREN (muls[0]));
> +  right_op.safe_splice (SLP_TREE_CHILDREN (muls[1]));
> +
> +  bool is_neg = vect_normalize_conj_loc (left_op);
> +
> +  child = SLP_TREE_CHILDREN ((*ops)[1])[0];
> +  bool conj_first_operand;
> +  if (!vect_validate_multiplication (perm_cache, right_op, left_op, false,
> +				     &conj_first_operand, true))
> +    return IFN_LAST;
> +
> +  if (!is_neg)
> +    ifn = IFN_COMPLEX_FMS;
> +  else if (is_neg)
> +    ifn = IFN_COMPLEX_FMS_CONJ;
> +
> +  if (!vect_pattern_validate_optab (ifn, *ref_node))
> +    return IFN_LAST;
> +
> +  ops->truncate (0);
> +  ops->create (4);
> +
> +  complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]).first;
> +  if (kind == PERM_EVENODD)
> +    {
> +      ops->quick_push (child);
> +      ops->quick_push (right_op[0]);
> +      ops->quick_push (right_op[1]);
> +      ops->quick_push (left_op[0]);
> +    }
> +  else if (kind == PERM_TOP)
> +    {
> +      ops->quick_push (child);
> +      ops->quick_push (right_op[1]);
> +      ops->quick_push (right_op[0]);
> +      ops->quick_push (left_op[0]);
> +    }
> +  else
> +    {
> +      ops->quick_push (child);
> +      ops->quick_push (right_op[1]);
> +      ops->quick_push (right_op[0]);
> +      ops->quick_push (left_op[1]);
> +    }
> +
> +  return ifn;
> +}
> +
> +/* Attempt to recognize a complex mul pattern.  */
> +
> +vect_pattern*
> +complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
> +				slp_tree *node)
> +{
> +  auto_vec<slp_tree> ops;
> +  complex_operation_t op
> +    = vect_detect_pair_op (*node, true, &ops);
> +  internal_fn ifn
> +    = complex_fms_pattern::matches (op, perm_cache, node, &ops);
> +  if (ifn == IFN_LAST)
> +    return NULL;
> +
> +  return new complex_fms_pattern (node, &ops, ifn);
> +}
> +
> +/* Perform a replacement of the detected complex mul pattern with the new
> +   instruction sequences.  */
> +
> +void
> +complex_fms_pattern::build (vec_info *vinfo)
> +{
> +  auto_vec<slp_tree> nodes;
> +
> +  /* First re-arrange the children.  */
> +  nodes.create (3);
> +
> +  nodes.quick_push (this->m_ops[0]);
> +  nodes.quick_push (this->m_ops[1]);
> +  nodes.quick_push (
> +    vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node));
> +  SLP_TREE_REF_COUNT (this->m_ops[0])++;
> +  SLP_TREE_REF_COUNT (this->m_ops[1])++;
> +
> +  slp_tree node;
> +  unsigned i;
> +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
> +    vect_free_slp_tree (node);
> +
> +  SLP_TREE_CHILDREN (*this->m_node).truncate (0);
> +  SLP_TREE_CHILDREN (*this->m_node).safe_splice (nodes);

please elide the nodes vector.

Otherwise OK.
Richard.

> +
> +  complex_pattern::build (vinfo);
> +}
> +
>  /*******************************************************************************
>   * Pattern matching definitions
>   ******************************************************************************/
> 
> 
>
Tamar Christina Jan. 8, 2021, 10:02 a.m. UTC | #2
> -----Original Message-----
> From: Richard Biener <rguenther@suse.de>
> Sent: Friday, January 8, 2021 9:49 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>
> Subject: Re: [PATCH 7/8 v9]middle-end slp: support complex FMS and
> complex FMS conjugate
> 
> On Mon, 28 Dec 2020, Tamar Christina wrote:
> 
> > Hi All,
> >
> > This adds support for FMS and FMS conjugated to the slp pattern matcher.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> > and no issues.
> >
> > Ok for master?
> 
> Interestingly this patch looks different from the FMA one(!?).  I would have
> expected to have the same pattern for FMA and FMS in the end.

No, because the mid-end canonization of the tree for FMA and FMS are different.
Because FMS has two TWO_OPERANDS nodes the order of the tree is swapped.

There's no real reason for it (as far as I can tell) but that results in a reverse tree.
However the operations are not sufficiently different that I can detect the MUL part.

I have a note for next year's rewrite to fix this during slp build so they can be shared.

> 
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > 	* internal-fn.def (COMPLEX_FMS, COMPLEX_FMS_CONJ): New.
> > 	* optabs.def (cmls_optab, cmls_conj_optab): New.
> > 	* doc/md.texi: Document them.
> > 	* tree-vect-slp-patterns.c (class complex_fms_pattern,
> > 	complex_fms_pattern::matches, complex_fms_pattern::recognize,
> > 	complex_fms_pattern::build): New.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index
> >
> 6d5a98c4946d3ff4c2b8abea5c29caa6863fd3f7..3f5a42df285b3ee162edc9ec66
> 1f
> > 25c0eec5e4fa 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -6247,6 +6247,51 @@ The operation is only supported for vector
> modes @var{m}.
> >
> >  This pattern is not allowed to @code{FAIL}.
> >
> > +@cindex @code{cmls@var{m}4} instruction pattern @item
> > +@samp{cmls@var{m}4} Perform a vector multiply and subtract that is
> > +semantically the same as a multiply and subtract of complex numbers.
> > +
> > +@smallexample
> > +  complex TYPE c[N];
> > +  complex TYPE a[N];
> > +  complex TYPE b[N];
> > +  for (int i = 0; i < N; i += 1)
> > +    @{
> > +      c[i] -= a[i] * b[i];
> > +    @}
> > +@end smallexample
> > +
> > +In GCC lane ordering the real part of the number must be in the even
> > +lanes with the imaginary part in the odd lanes.
> > +
> > +The operation is only supported for vector modes @var{m}.
> > +
> > +This pattern is not allowed to @code{FAIL}.
> > +
> > +@cindex @code{cmls_conj@var{m}4} instruction pattern @item
> > +@samp{cmls_conj@var{m}4} Perform a vector multiply by conjugate and
> > +subtract that is semantically the same as a multiply and subtract of
> > +complex numbers where the second multiply arguments is conjugated.
> > +
> > +@smallexample
> > +  complex TYPE c[N];
> > +  complex TYPE a[N];
> > +  complex TYPE b[N];
> > +  for (int i = 0; i < N; i += 1)
> > +    @{
> > +      c[i] -= a[i] * conj (b[i]);
> > +    @}
> > +@end smallexample
> > +
> > +In GCC lane ordering the real part of the number must be in the even
> > +lanes with the imaginary part in the odd lanes.
> > +
> > +The operation is only supported for vector modes @var{m}.
> > +
> > +This pattern is not allowed to @code{FAIL}.
> > +
> >  @cindex @code{cmul@var{m}4} instruction pattern  @item
> > @samp{cmul@var{m}4}  Perform a vector multiply that is semantically
> > the same as multiply of diff --git a/gcc/internal-fn.def
> > b/gcc/internal-fn.def index
> >
> 305450e026d4b94ab62ceb9ca719ec5570ff43eb..c8161509d9497afe58f32bde1
> 2d8
> > e6bd7b876a3c 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -290,6 +290,8 @@ DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp,
> > binary)  DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary)
> > DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA, ECF_CONST, cmla, ternary)
> > DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA_CONJ, ECF_CONST,
> cmla_conj,
> > ternary)
> > +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMS, ECF_CONST, cmls, ternary)
> > +DEF_INTERNAL_OPTAB_FN (COMPLEX_FMS_CONJ, ECF_CONST,
> cmls_conj,
> > +ternary)
> >
> >  /* Unary integer ops.  */
> >  DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb,
> unary)
> > diff --git a/gcc/optabs.def b/gcc/optabs.def index
> >
> 8e2758d685ed85e02df10dac571eb40d45a294ed..320bb5f3dce31867d312bbb
> b6a4c
> > 6e31c534254e 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -296,6 +296,8 @@ OPTAB_D (cmul_optab, "cmul$a3")  OPTAB_D
> > (cmul_conj_optab, "cmul_conj$a3")  OPTAB_D (cmla_optab, "cmla$a4")
> > OPTAB_D (cmla_conj_optab, "cmla_conj$a4")
> > +OPTAB_D (cmls_optab, "cmls$a4")
> > +OPTAB_D (cmls_conj_optab, "cmls_conj$a4")
> >  OPTAB_D (cos_optab, "cos$a2")
> >  OPTAB_D (cosh_optab, "cosh$a2")
> >  OPTAB_D (exp10_optab, "exp10$a2")
> > diff --git a/gcc/tree-vect-slp-patterns.c
> > b/gcc/tree-vect-slp-patterns.c index
> >
> 3625a80c08e3d70fd362fc52e17e65b3b2c7da83..ab6587f0b8522ec5f916f74e7e
> 74
> > 01b1f7a35bbb 100644
> > --- a/gcc/tree-vect-slp-patterns.c
> > +++ b/gcc/tree-vect-slp-patterns.c
> > @@ -1254,6 +1254,181 @@ complex_fma_pattern::build (vec_info *vinfo)
> >    complex_pattern::build (vinfo);
> >  }
> >
> >
> +/*********************************************************
> ***********
> > +***********
> > + * complex_fms_pattern class
> > +
> >
> +*********************************************************
> ************
> > +*********/
> > +
> > +class complex_fms_pattern : public complex_pattern {
> > +  protected:
> > +    complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops,
> internal_fn ifn)
> > +      : complex_pattern (node, m_ops, ifn)
> > +    {
> > +      this->m_num_args = 3;
> > +    }
> > +
> > +  public:
> > +    void build (vec_info *);
> > +    static internal_fn
> > +    matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
> slp_tree *,
> > +	     vec<slp_tree> *);
> > +
> > +    static vect_pattern*
> > +    recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
> > +
> > +    static vect_pattern*
> > +    mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
> > +    {
> > +      return new complex_fms_pattern (node, m_ops, ifn);
> > +    }
> > +};
> > +
> > +
> > +/* Pattern matcher for trying to match complex multiply and accumulate
> > +   and multiply and subtract patterns in SLP tree.
> > +   If the operation matches then IFN is set to the operation it matched and
> > +   the arguments to the two replacement statements are put in m_ops.
> > +
> > +   If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
> > +
> > +   This function matches the patterns shaped as:
> > +
> > +   double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
> > +   double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
> > +
> > +   c[i] = c[i] - ax;
> > +   c[i+1] = c[i+1] + bx;
> > +
> > +   If a match occurred then TRUE is returned, else FALSE.  The initial match
> is
> > +   expected to be in OP1 and the initial match operands in args0.  */
> > +
> > +internal_fn
> > +complex_fms_pattern::matches (complex_operation_t op,
> > +			      slp_tree_to_load_perm_map_t *perm_cache,
> > +			      slp_tree * ref_node, vec<slp_tree> *ops) {
> > +  internal_fn ifn = IFN_LAST;
> > +
> > +  /* Find the two components.  We match Complex MUL first which
> reduces the
> > +     amount of work this pattern has to do.  After that we just match the
> > +     head node and we're done.:
> > +
> > +     * FMS: - +.  */
> > +  slp_tree child = NULL;
> > +
> > +  /* We need to ignore the two_operands nodes that may also match,
> > +     for that we can check if they have any scalar statements and also
> > +     check that it's not a permute node as we're looking for a normal
> > +     PLUS_EXPR operation.  */
> > +  if (op != PLUS_MINUS)
> > +    return IFN_LAST;
> > +
> > +  child = SLP_TREE_CHILDREN ((*ops)[1])[1];  if (vect_detect_pair_op
> > + (child) != MINUS_PLUS)
> > +    return IFN_LAST;
> > +
> > +  /* First two nodes must be a multiply.  */  auto_vec<slp_tree>
> > + muls;  if (vect_match_call_complex_mla (child, 0) != MULT_MULT
> > +      || vect_match_call_complex_mla (child, 1, &muls) != MULT_MULT)
> > +    return IFN_LAST;
> > +
> > +  /* Now operand2+4 may lead to another expression.  */
> > + auto_vec<slp_tree> left_op, right_op;  left_op.safe_splice
> > + (SLP_TREE_CHILDREN (muls[0]));  right_op.safe_splice
> > + (SLP_TREE_CHILDREN (muls[1]));
> > +
> > +  bool is_neg = vect_normalize_conj_loc (left_op);
> > +
> > +  child = SLP_TREE_CHILDREN ((*ops)[1])[0];
> > +  bool conj_first_operand;
> > +  if (!vect_validate_multiplication (perm_cache, right_op, left_op, false,
> > +				     &conj_first_operand, true))
> > +    return IFN_LAST;
> > +
> > +  if (!is_neg)
> > +    ifn = IFN_COMPLEX_FMS;
> > +  else if (is_neg)
> > +    ifn = IFN_COMPLEX_FMS_CONJ;
> > +
> > +  if (!vect_pattern_validate_optab (ifn, *ref_node))
> > +    return IFN_LAST;
> > +
> > +  ops->truncate (0);
> > +  ops->create (4);
> > +
> > +  complex_perm_kinds_t kind = linear_loads_p (perm_cache,
> > + right_op[0]).first;  if (kind == PERM_EVENODD)
> > +    {
> > +      ops->quick_push (child);
> > +      ops->quick_push (right_op[0]);
> > +      ops->quick_push (right_op[1]);
> > +      ops->quick_push (left_op[0]);
> > +    }
> > +  else if (kind == PERM_TOP)
> > +    {
> > +      ops->quick_push (child);
> > +      ops->quick_push (right_op[1]);
> > +      ops->quick_push (right_op[0]);
> > +      ops->quick_push (left_op[0]);
> > +    }
> > +  else
> > +    {
> > +      ops->quick_push (child);
> > +      ops->quick_push (right_op[1]);
> > +      ops->quick_push (right_op[0]);
> > +      ops->quick_push (left_op[1]);
> > +    }
> > +
> > +  return ifn;
> > +}
> > +
> > +/* Attempt to recognize a complex mul pattern.  */
> > +
> > +vect_pattern*
> > +complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t
> *perm_cache,
> > +				slp_tree *node)
> > +{
> > +  auto_vec<slp_tree> ops;
> > +  complex_operation_t op
> > +    = vect_detect_pair_op (*node, true, &ops);
> > +  internal_fn ifn
> > +    = complex_fms_pattern::matches (op, perm_cache, node, &ops);
> > +  if (ifn == IFN_LAST)
> > +    return NULL;
> > +
> > +  return new complex_fms_pattern (node, &ops, ifn); }
> > +
> > +/* Perform a replacement of the detected complex mul pattern with the
> new
> > +   instruction sequences.  */
> > +
> > +void
> > +complex_fms_pattern::build (vec_info *vinfo) {
> > +  auto_vec<slp_tree> nodes;
> > +
> > +  /* First re-arrange the children.  */  nodes.create (3);
> > +
> > +  nodes.quick_push (this->m_ops[0]);
> > +  nodes.quick_push (this->m_ops[1]);
> > +  nodes.quick_push (
> > +    vect_build_combine_node (this->m_ops[2], this->m_ops[3],
> > + *this->m_node));  SLP_TREE_REF_COUNT (this->m_ops[0])++;
> > + SLP_TREE_REF_COUNT (this->m_ops[1])++;
> > +
> > +  slp_tree node;
> > +  unsigned i;
> > +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
> > +    vect_free_slp_tree (node);
> > +
> > +  SLP_TREE_CHILDREN (*this->m_node).truncate (0);
> SLP_TREE_CHILDREN
> > + (*this->m_node).safe_splice (nodes);
> 
> please elide the nodes vector.
> 
> Otherwise OK.
> Richard.
> 
> > +
> > +  complex_pattern::build (vinfo);
> > +}
> > +
> >
> /**********************************************************
> *********************
> >   * Pattern matching definitions
> >
> >
> **********************************************************
> ************
> > ********/
> >
> >
> >
> 
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409
> Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
diff mbox series

Patch

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 6d5a98c4946d3ff4c2b8abea5c29caa6863fd3f7..3f5a42df285b3ee162edc9ec661f25c0eec5e4fa 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6247,6 +6247,51 @@  The operation is only supported for vector modes @var{m}.
 
 This pattern is not allowed to @code{FAIL}.
 
+@cindex @code{cmls@var{m}4} instruction pattern
+@item @samp{cmls@var{m}4}
+Perform a vector multiply and subtract that is semantically the same as
+a multiply and subtract of complex numbers.
+
+@smallexample
+  complex TYPE c[N];
+  complex TYPE a[N];
+  complex TYPE b[N];
+  for (int i = 0; i < N; i += 1)
+    @{
+      c[i] -= a[i] * b[i];
+    @}
+@end smallexample
+
+In GCC lane ordering the real part of the number must be in the even lanes with
+the imaginary part in the odd lanes.
+
+The operation is only supported for vector modes @var{m}.
+
+This pattern is not allowed to @code{FAIL}.
+
+@cindex @code{cmls_conj@var{m}4} instruction pattern
+@item @samp{cmls_conj@var{m}4}
+Perform a vector multiply by conjugate and subtract that is semantically
+the same as a multiply and subtract of complex numbers where the second
+multiply arguments is conjugated.
+
+@smallexample
+  complex TYPE c[N];
+  complex TYPE a[N];
+  complex TYPE b[N];
+  for (int i = 0; i < N; i += 1)
+    @{
+      c[i] -= a[i] * conj (b[i]);
+    @}
+@end smallexample
+
+In GCC lane ordering the real part of the number must be in the even lanes with
+the imaginary part in the odd lanes.
+
+The operation is only supported for vector modes @var{m}.
+
+This pattern is not allowed to @code{FAIL}.
+
 @cindex @code{cmul@var{m}4} instruction pattern
 @item @samp{cmul@var{m}4}
 Perform a vector multiply that is semantically the same as multiply of
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 305450e026d4b94ab62ceb9ca719ec5570ff43eb..c8161509d9497afe58f32bde12d8e6bd7b876a3c 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -290,6 +290,8 @@  DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary)
 DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary)
 DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA, ECF_CONST, cmla, ternary)
 DEF_INTERNAL_OPTAB_FN (COMPLEX_FMA_CONJ, ECF_CONST, cmla_conj, ternary)
+DEF_INTERNAL_OPTAB_FN (COMPLEX_FMS, ECF_CONST, cmls, ternary)
+DEF_INTERNAL_OPTAB_FN (COMPLEX_FMS_CONJ, ECF_CONST, cmls_conj, ternary)
 
 /* Unary integer ops.  */
 DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 8e2758d685ed85e02df10dac571eb40d45a294ed..320bb5f3dce31867d312bbbb6a4c6e31c534254e 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -296,6 +296,8 @@  OPTAB_D (cmul_optab, "cmul$a3")
 OPTAB_D (cmul_conj_optab, "cmul_conj$a3")
 OPTAB_D (cmla_optab, "cmla$a4")
 OPTAB_D (cmla_conj_optab, "cmla_conj$a4")
+OPTAB_D (cmls_optab, "cmls$a4")
+OPTAB_D (cmls_conj_optab, "cmls_conj$a4")
 OPTAB_D (cos_optab, "cos$a2")
 OPTAB_D (cosh_optab, "cosh$a2")
 OPTAB_D (exp10_optab, "exp10$a2")
diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
index 3625a80c08e3d70fd362fc52e17e65b3b2c7da83..ab6587f0b8522ec5f916f74e7e7401b1f7a35bbb 100644
--- a/gcc/tree-vect-slp-patterns.c
+++ b/gcc/tree-vect-slp-patterns.c
@@ -1254,6 +1254,181 @@  complex_fma_pattern::build (vec_info *vinfo)
   complex_pattern::build (vinfo);
 }
 
+/*******************************************************************************
+ * complex_fms_pattern class
+ ******************************************************************************/
+
+class complex_fms_pattern : public complex_pattern
+{
+  protected:
+    complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
+      : complex_pattern (node, m_ops, ifn)
+    {
+      this->m_num_args = 3;
+    }
+
+  public:
+    void build (vec_info *);
+    static internal_fn
+    matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
+	     vec<slp_tree> *);
+
+    static vect_pattern*
+    recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
+
+    static vect_pattern*
+    mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
+    {
+      return new complex_fms_pattern (node, m_ops, ifn);
+    }
+};
+
+
+/* Pattern matcher for trying to match complex multiply and accumulate
+   and multiply and subtract patterns in SLP tree.
+   If the operation matches then IFN is set to the operation it matched and
+   the arguments to the two replacement statements are put in m_ops.
+
+   If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
+
+   This function matches the patterns shaped as:
+
+   double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
+   double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
+
+   c[i] = c[i] - ax;
+   c[i+1] = c[i+1] + bx;
+
+   If a match occurred then TRUE is returned, else FALSE.  The initial match is
+   expected to be in OP1 and the initial match operands in args0.  */
+
+internal_fn
+complex_fms_pattern::matches (complex_operation_t op,
+			      slp_tree_to_load_perm_map_t *perm_cache,
+			      slp_tree * ref_node, vec<slp_tree> *ops)
+{
+  internal_fn ifn = IFN_LAST;
+
+  /* Find the two components.  We match Complex MUL first which reduces the
+     amount of work this pattern has to do.  After that we just match the
+     head node and we're done.:
+
+     * FMS: - +.  */
+  slp_tree child = NULL;
+
+  /* We need to ignore the two_operands nodes that may also match,
+     for that we can check if they have any scalar statements and also
+     check that it's not a permute node as we're looking for a normal
+     PLUS_EXPR operation.  */
+  if (op != PLUS_MINUS)
+    return IFN_LAST;
+
+  child = SLP_TREE_CHILDREN ((*ops)[1])[1];
+  if (vect_detect_pair_op (child) != MINUS_PLUS)
+    return IFN_LAST;
+
+  /* First two nodes must be a multiply.  */
+  auto_vec<slp_tree> muls;
+  if (vect_match_call_complex_mla (child, 0) != MULT_MULT
+      || vect_match_call_complex_mla (child, 1, &muls) != MULT_MULT)
+    return IFN_LAST;
+
+  /* Now operand2+4 may lead to another expression.  */
+  auto_vec<slp_tree> left_op, right_op;
+  left_op.safe_splice (SLP_TREE_CHILDREN (muls[0]));
+  right_op.safe_splice (SLP_TREE_CHILDREN (muls[1]));
+
+  bool is_neg = vect_normalize_conj_loc (left_op);
+
+  child = SLP_TREE_CHILDREN ((*ops)[1])[0];
+  bool conj_first_operand;
+  if (!vect_validate_multiplication (perm_cache, right_op, left_op, false,
+				     &conj_first_operand, true))
+    return IFN_LAST;
+
+  if (!is_neg)
+    ifn = IFN_COMPLEX_FMS;
+  else if (is_neg)
+    ifn = IFN_COMPLEX_FMS_CONJ;
+
+  if (!vect_pattern_validate_optab (ifn, *ref_node))
+    return IFN_LAST;
+
+  ops->truncate (0);
+  ops->create (4);
+
+  complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]).first;
+  if (kind == PERM_EVENODD)
+    {
+      ops->quick_push (child);
+      ops->quick_push (right_op[0]);
+      ops->quick_push (right_op[1]);
+      ops->quick_push (left_op[0]);
+    }
+  else if (kind == PERM_TOP)
+    {
+      ops->quick_push (child);
+      ops->quick_push (right_op[1]);
+      ops->quick_push (right_op[0]);
+      ops->quick_push (left_op[0]);
+    }
+  else
+    {
+      ops->quick_push (child);
+      ops->quick_push (right_op[1]);
+      ops->quick_push (right_op[0]);
+      ops->quick_push (left_op[1]);
+    }
+
+  return ifn;
+}
+
+/* Attempt to recognize a complex mul pattern.  */
+
+vect_pattern*
+complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
+				slp_tree *node)
+{
+  auto_vec<slp_tree> ops;
+  complex_operation_t op
+    = vect_detect_pair_op (*node, true, &ops);
+  internal_fn ifn
+    = complex_fms_pattern::matches (op, perm_cache, node, &ops);
+  if (ifn == IFN_LAST)
+    return NULL;
+
+  return new complex_fms_pattern (node, &ops, ifn);
+}
+
+/* Perform a replacement of the detected complex mul pattern with the new
+   instruction sequences.  */
+
+void
+complex_fms_pattern::build (vec_info *vinfo)
+{
+  auto_vec<slp_tree> nodes;
+
+  /* First re-arrange the children.  */
+  nodes.create (3);
+
+  nodes.quick_push (this->m_ops[0]);
+  nodes.quick_push (this->m_ops[1]);
+  nodes.quick_push (
+    vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node));
+  SLP_TREE_REF_COUNT (this->m_ops[0])++;
+  SLP_TREE_REF_COUNT (this->m_ops[1])++;
+
+  slp_tree node;
+  unsigned i;
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
+    vect_free_slp_tree (node);
+
+  SLP_TREE_CHILDREN (*this->m_node).truncate (0);
+  SLP_TREE_CHILDREN (*this->m_node).safe_splice (nodes);
+
+  complex_pattern::build (vinfo);
+}
+
 /*******************************************************************************
  * Pattern matching definitions
  ******************************************************************************/