From patchwork Fri Oct 22 19:41:49 2010
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Richard Henderson <rth@redhat.com>
X-Patchwork-Id: 68943
Return-Path: 
 <gcc-patches-return-276182-incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
	by ozlabs.org (Postfix) with SMTP id BB6DCB70D2
	for <incoming@patchwork.ozlabs.org>;
	Sat, 23 Oct 2010 06:41:55 +1100 (EST)
Received: (qmail 17252 invoked by alias); 22 Oct 2010 19:41:54 -0000
Received: (qmail 17238 invoked by uid 22791); 22 Oct 2010 19:41:53 -0000
X-SWARE-Spam-Status: No, hits=-4.8 required=5.0	tests=AWL, BAYES_00,
	RCVD_IN_DNSWL_HI, SARE_HTML_INV_TAG, SPF_HELO_PASS,
	T_RP_MATCHES_RCVD
X-Spam-Check-By: sourceware.org
Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by
	sourceware.org (qpsmtpd/0.43rc1) with ESMTP;
	Fri, 22 Oct 2010 19:41:49 +0000
Received: from int-mx08.intmail.prod.int.phx2.redhat.com
	(int-mx08.intmail.prod.int.phx2.redhat.com [10.5.11.21])	by
	mx1.redhat.com (8.13.8/8.13.8) with ESMTP id
	o9MJfltO015871	(version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA
	bits=256 verify=OK)	for <gcc-patches@gcc.gnu.org>;
	Fri, 22 Oct 2010 15:41:48 -0400
Received: from stone.twiddle.home ([10.3.113.17])	by
	int-mx08.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8)
	with ESMTP id o9MJfkGw003925	for <gcc-patches@gcc.gnu.org>;
	Fri, 22 Oct 2010 15:41:46 -0400
Message-ID: <4CC1E8FD.4080803@redhat.com>
Date: Fri, 22 Oct 2010 12:41:49 -0700
From: Richard Henderson <rth@redhat.com>
User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US;
	rv:1.9.2.9) Gecko/20100921 Fedora/3.1.4-1.fc13 Thunderbird/3.1.4
MIME-Version: 1.0
To: GCC Patches <gcc-patches@gcc.gnu.org>
Subject: [i386] Transform __builtin_fma during vectorization.
X-IsSubscribed: yes
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Unsubscribe: 
 <mailto:gcc-patches-unsubscribe-incoming=patchwork.ozlabs.org@gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org

We already have the vector builtins, we just needed
to hook them up.


r~

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5f04fc4..2c8bf86 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,10 @@
 2010-10-22  Richard Henderson  <rth@redhat.com>
 
+	* config/i386/i386.c (ix86_builtin_vectorized_function): Add
+	cases for __builtin_fma and __builtin_fmaf.
+
+2010-10-22  Richard Henderson  <rth@redhat.com>
+
 	* config/i386/i386.c (ix86_expand_fp_absneg_operator): Produce
 	NEG+USE for vectors as well.
 	* config/i386/i386.md (*absneg<VEC_FLOAT_MODE>2): New pattern
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ee88b06..bf16d08 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -26105,8 +26105,28 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
 	}
       break;
 
+    case BUILT_IN_FMA:
+      if (out_mode == DFmode && in_mode == DFmode)
+	{
+	  if (out_n == 2 && in_n == 2)
+	    return ix86_builtins[IX86_BUILTIN_VFMADDPD];
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
+	}
+      break;
+
+    case BUILT_IN_FMAF:
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_builtins[IX86_BUILTIN_VFMADDPS];
+	  if (out_n == 8 && in_n == 8)
+	    return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
+	}
+      break;
+
     default:
-      ;
+      break;
     }
 
   /* Dispatch to a handler for a vectorization library.  */
diff --git a/gcc/testsuite/gcc.target/i386/fma3-builtin-2.c b/gcc/testsuite/gcc.target/i386/fma3-builtin-2.c
new file mode 100644
index 0000000..b84e4a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma3-builtin-2.c
@@ -0,0 +1,97 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mfma -mno-fma4" } */
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+double vda[SIZE] __attribute__((__aligned__(32)));
+double vdb[SIZE] __attribute__((__aligned__(32)));
+double vdc[SIZE] __attribute__((__aligned__(32)));
+double vdd[SIZE] __attribute__((__aligned__(32)));
+
+float vfa[SIZE] __attribute__((__aligned__(32)));
+float vfb[SIZE] __attribute__((__aligned__(32)));
+float vfc[SIZE] __attribute__((__aligned__(32)));
+float vfd[SIZE] __attribute__((__aligned__(32)));
+
+void
+vector_fma (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = __builtin_fma (vdb[i], vdc[i], vdd[i]);
+}
+
+void
+vector_fms (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = __builtin_fma (vdb[i], vdc[i], -vdd[i]);
+}
+
+void
+vector_fnma (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = __builtin_fma (-vdb[i], vdc[i], vdd[i]);
+}
+
+void
+vector_fnms (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = __builtin_fma (-vdb[i], vdc[i], -vdd[i]);
+}
+
+void
+vector_fmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]);
+}
+
+void
+vector_fmsf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (vfb[i], vfc[i], -vfd[i]);
+}
+
+void
+vector_fnmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (-vfb[i], vfc[i], vfd[i]);
+}
+
+void
+vector_fnmsf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (-vfb[i], vfc[i], -vfd[i]);
+}
+
+/* { dg-final { scan-assembler-times "vfmadd...ps" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...pd" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...pd" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/fma4-builtin-2.c b/gcc/testsuite/gcc.target/i386/fma4-builtin-2.c
new file mode 100644
index 0000000..a7e3975
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma4-builtin-2.c
@@ -0,0 +1,97 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mfma4" } */
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+double vda[SIZE] __attribute__((__aligned__(32)));
+double vdb[SIZE] __attribute__((__aligned__(32)));
+double vdc[SIZE] __attribute__((__aligned__(32)));
+double vdd[SIZE] __attribute__((__aligned__(32)));
+
+float vfa[SIZE] __attribute__((__aligned__(32)));
+float vfb[SIZE] __attribute__((__aligned__(32)));
+float vfc[SIZE] __attribute__((__aligned__(32)));
+float vfd[SIZE] __attribute__((__aligned__(32)));
+
+void
+vector_fma (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = __builtin_fma (vdb[i], vdc[i], vdd[i]);
+}
+
+void
+vector_fms (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = __builtin_fma (vdb[i], vdc[i], -vdd[i]);
+}
+
+void
+vector_fnma (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = __builtin_fma (-vdb[i], vdc[i], vdd[i]);
+}
+
+void
+vector_fnms (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = __builtin_fma (-vdb[i], vdc[i], -vdd[i]);
+}
+
+void
+vector_fmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]);
+}
+
+void
+vector_fmsf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (vfb[i], vfc[i], -vfd[i]);
+}
+
+void
+vector_fnmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (-vfb[i], vfc[i], vfd[i]);
+}
+
+void
+vector_fnmsf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (-vfb[i], vfc[i], -vfd[i]);
+}
+
+/* { dg-final { scan-assembler-times "vfmaddps" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddpd" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubps" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubpd" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmaddps" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmaddpd" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsubps" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsubpd" 1 } } */