diff mbox

[i386] fma,fma4 and xop flags

Message ID 20120808113105.21153.11115.sendpatchset@adcelk01.amd.com
State New
Headers show

Commit Message

Gopalasubramanian, Ganesh Aug. 8, 2012, 11:31 a.m. UTC
Hello,

Bdver2 cpu supports both fma and fma4 instructions.
Previous to patch, option "-mno-xop" removes "-mfma4".
Similarly, option "-mno-fma4" removes "-mxop".

So, the patch conditionally disables "-mfma" or "-mfma4".
Enabling "-mxop" is done by also checking "-mfma".

Ok for trunk?

Regards
Ganesh

2012-08-08  Ganesh Gopalasubramanian  <Ganesh.Gopalasubramanian@amd.com>

    * common/config/i386/i386-common.c (ix86_handle_option): Reset 
    fma flag after checking fma4. Reset fma4 flag after checking fma. 
    Set xop flag after checking fma flags.

Comments

Richard Biener Aug. 8, 2012, 11:42 a.m. UTC | #1
On Wed, Aug 8, 2012 at 1:31 PM,  <Ganesh.Gopalasubramanian@amd.com> wrote:
> Hello,
>
> Bdver2 cpu supports both fma and fma4 instructions.
> Previous to patch, option "-mno-xop" removes "-mfma4".
> Similarly, option "-mno-fma4" removes "-mxop".

Eh?  Why's that?  I think we should disentangle -mxop and -mfma4
instead.  Otherwise, what does -mno-fma4 -mxop do?
(it should enable both xop and fma4!)  what should -mfma4 -mno-xop do
(it should disable both xop and fma4!).  All this is just confusing to
the user, even if in AMD documents XOP includes FMA4.

Richard.

> So, the patch conditionally disables "-mfma" or "-mfma4".
> Enabling "-mxop" is done by also checking "-mfma".
>
> Ok for trunk?
>
> Regards
> Ganesh
>
> 2012-08-08  Ganesh Gopalasubramanian  <Ganesh.Gopalasubramanian@amd.com>
>
>     * common/config/i386/i386-common.c (ix86_handle_option): Reset
>     fma flag after checking fma4. Reset fma4 flag after checking fma.
>     Set xop flag after checking fma flags.
>
> Index: gcc/common/config/i386/i386-common.c
> ===================================================================
> --- gcc/common/config/i386/i386-common.c        (revision 189996)
> +++ gcc/common/config/i386/i386-common.c        (working copy)
> @@ -310,8 +310,16 @@
>         }
>        else
>         {
> -         opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
> -         opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
> +          if (opts->x_ix86_isa_flags & OPTION_MASK_ISA_FMA4)
> +            {
> +              opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA ;
> +              opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA;
> +            }
> +          else
> +            {
> +             opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
> +             opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
> +            }
>         }
>        return true;
>
> @@ -359,16 +367,32 @@
>         }
>        else
>         {
> -         opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
> -         opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
> +          if (opts->x_ix86_isa_flags & OPTION_MASK_ISA_FMA)
> +            {
> +             opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4 ;
> +              opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4;
> +            }
> +          else
> +            {
> +             opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
> +             opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
> +           }
>         }
>        return true;
>
>     case OPT_mxop:
>        if (value)
>         {
> -         opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
> -         opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
> +          if (opts->x_ix86_isa_flags & OPTION_MASK_ISA_FMA)
> +            {
> +              opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP ;
> +              opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP;
> +            }
> +            else
> +            {
> +             opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
> +             opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
> +           }
>         }
>        else
>         {
>
Gopalasubramanian, Ganesh Aug. 9, 2012, 5:55 a.m. UTC | #2
> Otherwise, what does -mno-fma4 -mxop do?
> (it should enable both xop and fma4!)  what should -mfma4 -mno-xop do
> (it should disable both xop and fma4!).

Yes! that's what GCC does now.
Some flags are coupled (atleast for now).
For ex, -mno-sse4.2 -mavx enables both sse4.2 and avx 
whereas -mavx -mno-sse4.2 disables both.

Setting of the following are clubbed.
1) 3DNow sets MMX
2) SSE2 sets SSE
3) SSE3 sets SSE2
4) SSE4_1 sets SSE3
5) SSE4_2 sets SSE4_1
6) FMA sets AVX
7) AVX2 sets AVX
8) SSE4_A sets SSE3
9) FMA4 set SSE4_A and AVX
10) XOP sets FMA4
11) AES sets SSE2
12) PCLMUL sets SSE2
13) ABM sets POPCNT

Resetting is done in reversely (MMX resets 3DNOW).

IMO, if we have different cpuid flags, enabling\disabling 
the compiler flags depends on these cpuid flags directly. 
Adding subsets to them or tangling them together may give 
wrong results.

Please let me know your opinion.

Regards
Ganesh

-----Original Message-----
From: Richard Guenther [mailto:richard.guenther@gmail.com] 
Sent: Wednesday, August 08, 2012 5:12 PM
To: Gopalasubramanian, Ganesh
Cc: gcc-patches@gcc.gnu.org; ubizjak@gmail.com
Subject: Re: [PATCH,i386] fma,fma4 and xop flags

On Wed, Aug 8, 2012 at 1:31 PM,  <Ganesh.Gopalasubramanian@amd.com> wrote:
> Hello,
>
> Bdver2 cpu supports both fma and fma4 instructions.
> Previous to patch, option "-mno-xop" removes "-mfma4".
> Similarly, option "-mno-fma4" removes "-mxop".

Eh?  Why's that?  I think we should disentangle -mxop and -mfma4
instead.  Otherwise, what does -mno-fma4 -mxop do?
(it should enable both xop and fma4!)  what should -mfma4 -mno-xop do
(it should disable both xop and fma4!).  All this is just confusing to
the user, even if in AMD documents XOP includes FMA4.

Richard.
Richard Biener Aug. 9, 2012, 8:26 a.m. UTC | #3
On Thu, Aug 9, 2012 at 7:55 AM, Gopalasubramanian, Ganesh
<Ganesh.Gopalasubramanian@amd.com> wrote:
>> Otherwise, what does -mno-fma4 -mxop do?
>> (it should enable both xop and fma4!)  what should -mfma4 -mno-xop do
>> (it should disable both xop and fma4!).
>
> Yes! that's what GCC does now.
> Some flags are coupled (atleast for now).
> For ex, -mno-sse4.2 -mavx enables both sse4.2 and avx
> whereas -mavx -mno-sse4.2 disables both.
>
> Setting of the following are clubbed.
> 1) 3DNow sets MMX
> 2) SSE2 sets SSE
> 3) SSE3 sets SSE2
> 4) SSE4_1 sets SSE3
> 5) SSE4_2 sets SSE4_1
> 6) FMA sets AVX
> 7) AVX2 sets AVX
> 8) SSE4_A sets SSE3
> 9) FMA4 set SSE4_A and AVX
> 10) XOP sets FMA4
> 11) AES sets SSE2
> 12) PCLMUL sets SSE2
> 13) ABM sets POPCNT
>
> Resetting is done in reversely (MMX resets 3DNOW).
>
> IMO, if we have different cpuid flags, enabling\disabling
> the compiler flags depends on these cpuid flags directly.
> Adding subsets to them or tangling them together may give
> wrong results.

Uh, ok ... it's messier than I anticipated ;)

> Please let me know your opinion.

Well, your patch looks reasonable then.  I'll defer to x86 maintainers for
approval though.

Thanks,
Richard.

> Regards
> Ganesh
>
> -----Original Message-----
> From: Richard Guenther [mailto:richard.guenther@gmail.com]
> Sent: Wednesday, August 08, 2012 5:12 PM
> To: Gopalasubramanian, Ganesh
> Cc: gcc-patches@gcc.gnu.org; ubizjak@gmail.com
> Subject: Re: [PATCH,i386] fma,fma4 and xop flags
>
> On Wed, Aug 8, 2012 at 1:31 PM,  <Ganesh.Gopalasubramanian@amd.com> wrote:
>> Hello,
>>
>> Bdver2 cpu supports both fma and fma4 instructions.
>> Previous to patch, option "-mno-xop" removes "-mfma4".
>> Similarly, option "-mno-fma4" removes "-mxop".
>
> Eh?  Why's that?  I think we should disentangle -mxop and -mfma4
> instead.  Otherwise, what does -mno-fma4 -mxop do?
> (it should enable both xop and fma4!)  what should -mfma4 -mno-xop do
> (it should disable both xop and fma4!).  All this is just confusing to
> the user, even if in AMD documents XOP includes FMA4.
>
> Richard.
>
Uros Bizjak Aug. 9, 2012, 7:51 p.m. UTC | #4
On Wed, Aug 8, 2012 at 1:31 PM,  <Ganesh.Gopalasubramanian@amd.com> wrote:

> Bdver2 cpu supports both fma and fma4 instructions.
> Previous to patch, option "-mno-xop" removes "-mfma4".
> Similarly, option "-mno-fma4" removes "-mxop".

It looks to me that there is some misunderstanding. AFAICS:

-mxop implies -mfma4, but reverse is not true. Please see

#define OPTION_MASK_ISA_FMA4_SET \
  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
   | OPTION_MASK_ISA_AVX_SET)
#define OPTION_MASK_ISA_XOP_SET \
  (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)

So, -mxop sets -mfma4, etc ..., but -mfma4 does NOT enable -mxop.

OTOH,

#define OPTION_MASK_ISA_FMA4_UNSET \
  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP

-mno-fma4 implies -mno-xop, but again reverse is not true. Thus,
-mno-xop does NOT imply -mno-fma4.

> So, the patch conditionally disables "-mfma" or "-mfma4".
> Enabling "-mxop" is done by also checking "-mfma".

Please note that conditional handling of ISA flags belongs to
ix86_option_override_internal. However, if someone set -mfma4 together
with -mfma on the command line, we should NOT disable selected ISA
behind user's back, in the same way as we don't disable anything with
"-march=i386 -msse4". With -march=bdver2, we already marked that only
fma is supported, and if user selected "-march=bdver2 -mfma4" on the
command line, we shouldn't disable anything.

Uros.
Gopalasubramanian, Ganesh Aug. 10, 2012, 5:27 a.m. UTC | #5
> -mxop implies -mfma4, but reverse is not true.

I think this handling went in for bdver1.
But, with bdver2, we have both fma and fma4.
So for bdver2, -mxop should not be enabling one of them.

> if someone set -mfma4 together
> with -mfma on the command line, we should NOT disable selected ISA
> behind user's back

If both -mfma4 and -mfma are enabled, GCC outputs fma4 instructions.
This, I think is because fma4 instruction patterns are read before fma 
instruction patterns from the ".md" files.
So, enabling both -mfma4 and -mfma is not good for bdver2.

Moreover, if user tries to use, -mfma -mno-fma4 -mxop, the order in which these 
options are used becomes crucial. -mxop enables -mfma4 and by instruction 
patterns fma4 instructions gets listed in the assembly file.

For the below test,

double a,b,c,d;
int fn(){
        a = b + c * d ;
        return a;
}

#1) Using options "-O2 -mno-fma4 -mfma -mxop" outputs fma4. 
(vfmaddsd        b(%rip), %xmm2, %xmm1, %xmm0)
#2) Using options "-O2 -mfma -mno-fma4 -mxop" outputs fma4.
(vfmaddsd        b(%rip), %xmm2, %xmm1, %xmm0)
#3) Using options "-mxop -mno-fma4 -mfma" outpts fma.
(vfmadd132sd     d(%rip), %xmm1, %xmm0)

As we see the order in which the options are used becomes crucial.
This is confusing.

I haven't really tested other implied options. But, I suspect
similar phenomenon in those cases too. 

IMO, we can directly go by the CPUID flags and enable the flags.
This will be a one to one mapping and leave the user with lot more liberty.
Please let me know your opinion.

Regards
Ganesh

-----Original Message-----
From: Uros Bizjak [mailto:ubizjak@gmail.com] 
Sent: Friday, August 10, 2012 1:21 AM
To: Gopalasubramanian, Ganesh
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH,i386] fma,fma4 and xop flags

On Wed, Aug 8, 2012 at 1:31 PM,  <Ganesh.Gopalasubramanian@amd.com> wrote:

> Bdver2 cpu supports both fma and fma4 instructions.
> Previous to patch, option "-mno-xop" removes "-mfma4".
> Similarly, option "-mno-fma4" removes "-mxop".

It looks to me that there is some misunderstanding. AFAICS:

-mxop implies -mfma4, but reverse is not true. Please see

#define OPTION_MASK_ISA_FMA4_SET \
  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
   | OPTION_MASK_ISA_AVX_SET)
#define OPTION_MASK_ISA_XOP_SET \
  (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)

So, -mxop sets -mfma4, etc ..., but -mfma4 does NOT enable -mxop.

OTOH,

#define OPTION_MASK_ISA_FMA4_UNSET \
  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP

-mno-fma4 implies -mno-xop, but again reverse is not true. Thus,
-mno-xop does NOT imply -mno-fma4.

> So, the patch conditionally disables "-mfma" or "-mfma4".
> Enabling "-mxop" is done by also checking "-mfma".

Please note that conditional handling of ISA flags belongs to
ix86_option_override_internal. However, if someone set -mfma4 together
with -mfma on the command line, we should NOT disable selected ISA
behind user's back, in the same way as we don't disable anything with
"-march=i386 -msse4". With -march=bdver2, we already marked that only
fma is supported, and if user selected "-march=bdver2 -mfma4" on the
command line, we shouldn't disable anything.

Uros.
diff mbox

Patch

Index: gcc/common/config/i386/i386-common.c
===================================================================
--- gcc/common/config/i386/i386-common.c	(revision 189996)
+++ gcc/common/config/i386/i386-common.c	(working copy)
@@ -310,8 +310,16 @@ 
 	}
       else
 	{
-	  opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
-	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
+          if (opts->x_ix86_isa_flags & OPTION_MASK_ISA_FMA4)
+            {
+              opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA ;
+              opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA;
+            }
+          else
+            {
+	      opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
+	      opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
+            }
 	}
       return true;
 
@@ -359,16 +367,32 @@ 
 	}
       else
 	{
-	  opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
-	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
+          if (opts->x_ix86_isa_flags & OPTION_MASK_ISA_FMA)
+            {
+	      opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4 ;
+              opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4;
+            }
+          else
+            {
+	      opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
+	      opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
+	    }
 	}
       return true;
 
    case OPT_mxop:
       if (value)
 	{
-	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
-	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
+          if (opts->x_ix86_isa_flags & OPTION_MASK_ISA_FMA)
+            {
+              opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP ;
+              opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP;
+            }
+            else
+            {
+	      opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
+	      opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
+	    }
 	}
       else
 	{