From patchwork Fri Oct 22 19:41:49 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 68943 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id BB6DCB70D2 for ; Sat, 23 Oct 2010 06:41:55 +1100 (EST) Received: (qmail 17252 invoked by alias); 22 Oct 2010 19:41:54 -0000 Received: (qmail 17238 invoked by uid 22791); 22 Oct 2010 19:41:53 -0000 X-SWARE-Spam-Status: No, hits=-4.8 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_HI, SARE_HTML_INV_TAG, SPF_HELO_PASS, T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 22 Oct 2010 19:41:49 +0000 Received: from int-mx08.intmail.prod.int.phx2.redhat.com (int-mx08.intmail.prod.int.phx2.redhat.com [10.5.11.21]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id o9MJfltO015871 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Fri, 22 Oct 2010 15:41:48 -0400 Received: from stone.twiddle.home ([10.3.113.17]) by int-mx08.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id o9MJfkGw003925 for ; Fri, 22 Oct 2010 15:41:46 -0400 Message-ID: <4CC1E8FD.4080803@redhat.com> Date: Fri, 22 Oct 2010 12:41:49 -0700 From: Richard Henderson User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.9) Gecko/20100921 Fedora/3.1.4-1.fc13 Thunderbird/3.1.4 MIME-Version: 1.0 To: GCC Patches Subject: [i386] Transform __builtin_fma during vectorization. X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org We already have the vector builtins, we just needed to hook them up. r~ diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5f04fc4..2c8bf86 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,10 @@ 2010-10-22 Richard Henderson + * config/i386/i386.c (ix86_builtin_vectorized_function): Add + cases for __builtin_fma and __builtin_fmaf. + +2010-10-22 Richard Henderson + * config/i386/i386.c (ix86_expand_fp_absneg_operator): Produce NEG+USE for vectors as well. * config/i386/i386.md (*absneg2): New pattern diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ee88b06..bf16d08 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -26105,8 +26105,28 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out, } break; + case BUILT_IN_FMA: + if (out_mode == DFmode && in_mode == DFmode) + { + if (out_n == 2 && in_n == 2) + return ix86_builtins[IX86_BUILTIN_VFMADDPD]; + if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_VFMADDPD256]; + } + break; + + case BUILT_IN_FMAF: + if (out_mode == SFmode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_VFMADDPS]; + if (out_n == 8 && in_n == 8) + return ix86_builtins[IX86_BUILTIN_VFMADDPS256]; + } + break; + default: - ; + break; } /* Dispatch to a handler for a vectorization library. */ diff --git a/gcc/testsuite/gcc.target/i386/fma3-builtin-2.c b/gcc/testsuite/gcc.target/i386/fma3-builtin-2.c new file mode 100644 index 0000000..b84e4a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma3-builtin-2.c @@ -0,0 +1,97 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mfma -mno-fma4" } */ + +#ifndef SIZE +#define SIZE 1024 +#endif + +double vda[SIZE] __attribute__((__aligned__(32))); +double vdb[SIZE] __attribute__((__aligned__(32))); +double vdc[SIZE] __attribute__((__aligned__(32))); +double vdd[SIZE] __attribute__((__aligned__(32))); + +float vfa[SIZE] __attribute__((__aligned__(32))); +float vfb[SIZE] __attribute__((__aligned__(32))); +float vfc[SIZE] __attribute__((__aligned__(32))); +float vfd[SIZE] __attribute__((__aligned__(32))); + +void +vector_fma (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vda[i] = __builtin_fma (vdb[i], vdc[i], vdd[i]); +} + +void +vector_fms (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vda[i] = __builtin_fma (vdb[i], vdc[i], -vdd[i]); +} + +void +vector_fnma (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vda[i] = __builtin_fma (-vdb[i], vdc[i], vdd[i]); +} + +void +vector_fnms (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vda[i] = __builtin_fma (-vdb[i], vdc[i], -vdd[i]); +} + +void +vector_fmaf (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]); +} + +void +vector_fmsf (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vfa[i] = __builtin_fmaf (vfb[i], vfc[i], -vfd[i]); +} + +void +vector_fnmaf (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vfa[i] = __builtin_fmaf (-vfb[i], vfc[i], vfd[i]); +} + +void +vector_fnmsf (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vfa[i] = __builtin_fmaf (-vfb[i], vfc[i], -vfd[i]); +} + +/* { dg-final { scan-assembler-times "vfmadd...ps" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd...pd" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub...ps" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub...pd" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...ps" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...pd" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...ps" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...pd" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/fma4-builtin-2.c b/gcc/testsuite/gcc.target/i386/fma4-builtin-2.c new file mode 100644 index 0000000..a7e3975 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma4-builtin-2.c @@ -0,0 +1,97 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mfma4" } */ + +#ifndef SIZE +#define SIZE 1024 +#endif + +double vda[SIZE] __attribute__((__aligned__(32))); +double vdb[SIZE] __attribute__((__aligned__(32))); +double vdc[SIZE] __attribute__((__aligned__(32))); +double vdd[SIZE] __attribute__((__aligned__(32))); + +float vfa[SIZE] __attribute__((__aligned__(32))); +float vfb[SIZE] __attribute__((__aligned__(32))); +float vfc[SIZE] __attribute__((__aligned__(32))); +float vfd[SIZE] __attribute__((__aligned__(32))); + +void +vector_fma (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vda[i] = __builtin_fma (vdb[i], vdc[i], vdd[i]); +} + +void +vector_fms (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vda[i] = __builtin_fma (vdb[i], vdc[i], -vdd[i]); +} + +void +vector_fnma (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vda[i] = __builtin_fma (-vdb[i], vdc[i], vdd[i]); +} + +void +vector_fnms (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vda[i] = __builtin_fma (-vdb[i], vdc[i], -vdd[i]); +} + +void +vector_fmaf (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]); +} + +void +vector_fmsf (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vfa[i] = __builtin_fmaf (vfb[i], vfc[i], -vfd[i]); +} + +void +vector_fnmaf (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vfa[i] = __builtin_fmaf (-vfb[i], vfc[i], vfd[i]); +} + +void +vector_fnmsf (void) +{ + int i; + + for (i = 0; i < SIZE; i++) + vfa[i] = __builtin_fmaf (-vfb[i], vfc[i], -vfd[i]); +} + +/* { dg-final { scan-assembler-times "vfmaddps" 1 } } */ +/* { dg-final { scan-assembler-times "vfmaddpd" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsubps" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsubpd" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmaddps" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmaddpd" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsubps" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsubpd" 1 } } */