diff mbox series

[v2,10/11] Native complex ops: Add a fast complex multiplication pattern

Message ID 20230912100713.1074-11-snoiry@kalrayinc.com
State New
Headers show
Series Native complex operations | expand

Commit Message

Sylvain Noiry Sept. 12, 2023, 10:07 a.m. UTC
Summary:
Add a new fast_mult_optab to define a pattern corresponding to
the fast path of a IEEE compliant multiplication. Indeed, the backend
programmer can change the fast path without having to handle manually
the IEEE checks.

gcc/ChangeLog:

        * internal-fn.def: Add a FAST_MULT internal fn
        * optabs.def: Add fast_mult_optab
        * tree-complex.cc (expand_complex_multiplication_components):
        Adapt complex multiplication expand to generate
        FAST_MULT internal fn
        (expand_complex_multiplication): Likewise
        (expand_complex_operations_1): Likewise
---
 gcc/internal-fn.def |  1 +
 gcc/optabs.def      |  1 +
 gcc/tree-complex.cc | 70 +++++++++++++++++++++++++++++----------------
 3 files changed, 47 insertions(+), 25 deletions(-)
diff mbox series

Patch

diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 0ac6cd98a4f..f1046996a48 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -396,6 +396,7 @@  DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT90, ECF_CONST, cadd90, binary)
 DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT270, ECF_CONST, cadd270, binary)
 DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL, ECF_CONST, cmul, binary)
 DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL_CONJ, ECF_CONST, cmul_conj, binary)
+DEF_INTERNAL_OPTAB_FN (FAST_MULT, ECF_CONST, fast_mul, binary)
 DEF_INTERNAL_OPTAB_FN (VEC_ADDSUB, ECF_CONST, vec_addsub, binary)
 DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_PLUS,
 				ECF_CONST | ECF_NOTHROW,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index d146cac5eec..a90b6ee6440 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -344,6 +344,7 @@  OPTAB_D (cmla_optab, "cmla$a4")
 OPTAB_D (cmla_conj_optab, "cmla_conj$a4")
 OPTAB_D (cmls_optab, "cmls$a4")
 OPTAB_D (cmls_conj_optab, "cmls_conj$a4")
+OPTAB_D (fast_mul_optab, "fast_mul$a3")
 OPTAB_D (cos_optab, "cos$a2")
 OPTAB_D (cosh_optab, "cosh$a2")
 OPTAB_D (exp10_optab, "exp10$a2")
diff --git a/gcc/tree-complex.cc b/gcc/tree-complex.cc
index d814e407af6..16759f1f3ba 100644
--- a/gcc/tree-complex.cc
+++ b/gcc/tree-complex.cc
@@ -1138,25 +1138,36 @@  expand_complex_libcall (gimple_stmt_iterator *gsi, tree type, tree ar, tree ai,
 
 static void
 expand_complex_multiplication_components (gimple_seq *stmts, location_t loc,
-					  tree type, tree ar, tree ai,
-					  tree br, tree bi,
-					  tree *rr, tree *ri)
+					  tree type, tree ac, tree ar,
+					  tree ai, tree bc, tree br, tree bi,
+					  tree *rr, tree *ri,
+					  bool fast_mult)
 {
-  tree t1, t2, t3, t4;
+  tree inner_type = TREE_TYPE (type);
+  if (!fast_mult)
+    {
+      tree t1, t2, t3, t4;
 
-  t1 = gimple_build (stmts, loc, MULT_EXPR, type, ar, br);
-  t2 = gimple_build (stmts, loc, MULT_EXPR, type, ai, bi);
-  t3 = gimple_build (stmts, loc, MULT_EXPR, type, ar, bi);
+      t1 = gimple_build (stmts, loc, MULT_EXPR, inner_type, ar, br);
+      t2 = gimple_build (stmts, loc, MULT_EXPR, inner_type, ai, bi);
+      t3 = gimple_build (stmts, loc, MULT_EXPR, inner_type, ar, bi);
 
-  /* Avoid expanding redundant multiplication for the common
-     case of squaring a complex number.  */
-  if (ar == br && ai == bi)
-    t4 = t3;
-  else
-    t4 = gimple_build (stmts, loc, MULT_EXPR, type, ai, br);
+      /* Avoid expanding redundant multiplication for the common
+	 case of squaring a complex number.  */
+      if (ar == br && ai == bi)
+	t4 = t3;
+      else
+	t4 = gimple_build (stmts, loc, MULT_EXPR, inner_type, ai, br);
 
-  *rr = gimple_build (stmts, loc, MINUS_EXPR, type, t1, t2);
-  *ri = gimple_build (stmts, loc, PLUS_EXPR, type, t3, t4);
+      *rr = gimple_build (stmts, loc, MINUS_EXPR, inner_type, t1, t2);
+      *ri = gimple_build (stmts, loc, PLUS_EXPR, inner_type, t3, t4);
+    }
+  else
+    {
+      tree rc = gimple_build (stmts, loc, CFN_FAST_MULT, type, ac, bc);
+      *rr = gimple_build (stmts, loc, REALPART_EXPR, inner_type, rc);
+      *ri = gimple_build (stmts, loc, IMAGPART_EXPR, inner_type, rc);
+    }
 }
 
 /* Expand complex multiplication to scalars:
@@ -1165,13 +1176,18 @@  expand_complex_multiplication_components (gimple_seq *stmts, location_t loc,
 
 static void
 expand_complex_multiplication (gimple_stmt_iterator *gsi, tree type,
-			       tree ar, tree ai, tree br, tree bi,
+			       tree ac, tree ar, tree ai,
+			       tree bc, tree br, tree bi,
 			       complex_lattice_t al, complex_lattice_t bl)
 {
   tree rr, ri;
   tree inner_type = TREE_TYPE (type);
   location_t loc = gimple_location (gsi_stmt (*gsi));
   gimple_seq stmts = NULL;
+  bool fast_mult = direct_internal_fn_supported_p (IFN_FAST_MULT, type,
+						   bb_optimization_type
+						   (gimple_bb
+						    (gsi_stmt (*gsi))));
 
   if (al < bl)
     {
@@ -1232,9 +1248,10 @@  expand_complex_multiplication (gimple_stmt_iterator *gsi, tree type,
 	    {
 	      /* If we are not worrying about NaNs expand to
 		 (ar*br - ai*bi) + i(ar*bi + br*ai) directly.  */
-	      expand_complex_multiplication_components (&stmts, loc, inner_type,
-							ar, ai, br, bi,
-							&rr, &ri);
+	      expand_complex_multiplication_components (&stmts, loc, type,
+							ac, ar, ai, bc, br,
+							bi, &rr, &ri,
+							fast_mult);
 	      break;
 	    }
 
@@ -1245,8 +1262,9 @@  expand_complex_multiplication (gimple_stmt_iterator *gsi, tree type,
 
 	  tree tmpr, tmpi;
 	  expand_complex_multiplication_components (&stmts, loc,
-						    inner_type, ar, ai,
-						    br, bi, &tmpr, &tmpi);
+						    type, ac, ar, ai,
+						    bc, br, bi, &tmpr, &tmpi,
+						    fast_mult);
 	  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
 	  stmts = NULL;
 
@@ -1297,10 +1315,11 @@  expand_complex_multiplication (gimple_stmt_iterator *gsi, tree type,
 	}
       else
 	/* If we are not worrying about NaNs expand to
-	  (ar*br - ai*bi) + i(ar*bi + br*ai) directly.  */
+	   (ar*br - ai*bi) + i(ar*bi + br*ai) directly.  */
 	expand_complex_multiplication_components (&stmts, loc,
-						  inner_type, ar, ai,
-						  br, bi, &rr, &ri);
+						  type, ac, ar, ai,
+						  bc, br, bi, &rr, &ri,
+						  fast_mult);
       break;
 
     default:
@@ -2096,7 +2115,8 @@  expand_complex_operations_1 (gimple_stmt_iterator *gsi)
       break;
 
     case MULT_EXPR:
-      expand_complex_multiplication (gsi, type, ar, ai, br, bi, al, bl);
+      expand_complex_multiplication (gsi, type, ac, ar, ai, bc, br, bi, al,
+				     bl);
       break;
 
     case TRUNC_DIV_EXPR: