Patchwork [i386] scalar ops that preserve the high part of a vector

login
register
mail settings
Submitter Marc Glisse
Date Dec. 7, 2012, 2:02 p.m.
Message ID <alpine.DEB.2.02.1212071451430.9922@stedding.saclay.inria.fr>
Download mbox | patch
Permalink /patch/204493/
State New
Headers show

Comments

Marc Glisse - Dec. 7, 2012, 2:02 p.m.
On Fri, 7 Dec 2012, Michael Zolotukhin wrote:

> By the way, if we decide to have one pattern for V4SF instructions and
> another for V2DF, we could try to use recently introduced define_subst
> here. It won't reduce number of actual patterns (I mean number of
> patterns after iterators and subst expanding), but it could help to
> make sse.md more compact.

Here is a version of the patch with define_subst. This helps make sse.md 
more compact indeed (well, the define_subst takes space, but it will 
already be there for mult, div, etc). One side effect is that in the 
expanded .md file, we have both variants of the V2DF operation (I switched 
the builtins to use the _vconcat version).

(not tested beyond "make dumpmd" and a quick look at that dump)

Patch

Index: testsuite/gcc.target/i386/pr54855-2.c

===================================================================
--- testsuite/gcc.target/i386/pr54855-2.c	(revision 0)

+++ testsuite/gcc.target/i386/pr54855-2.c	(revision 0)

@@ -0,0 +1,18 @@ 

+/* { dg-do compile } */

+/* { dg-options "-O -msse" } */

+

+typedef float vec __attribute__((vector_size(16)));

+

+vec f (vec x)

+{

+  x[0] += 2;

+  return x;

+}

+

+vec g (vec x)

+{

+  x[0] -= 1;

+  return x;

+}

+

+/* { dg-final { scan-assembler-not "mov" } } */


Property changes on: testsuite/gcc.target/i386/pr54855-2.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: testsuite/gcc.target/i386/pr54855-1.c

===================================================================
--- testsuite/gcc.target/i386/pr54855-1.c	(revision 0)

+++ testsuite/gcc.target/i386/pr54855-1.c	(revision 0)

@@ -0,0 +1,18 @@ 

+/* { dg-do compile } */

+/* { dg-options "-O -msse2" } */

+

+typedef double vec __attribute__((vector_size(16)));

+

+vec f (vec x)

+{

+  x[0] += 2;

+  return x;

+}

+

+vec g (vec x)

+{

+  x[0] -= 1;

+  return x;

+}

+

+/* { dg-final { scan-assembler-not "mov" } } */


Property changes on: testsuite/gcc.target/i386/pr54855-1.c
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Author Date Id Revision URL

Index: config/i386/i386.c

===================================================================
--- config/i386/i386.c	(revision 194301)

+++ config/i386/i386.c	(working copy)

@@ -27070,22 +27070,22 @@  static const struct builtin_description

   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
 
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
 
   { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
-  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },

-  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },

+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_FLOAT },

+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_FLOAT },

   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
 
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
@@ -27174,22 +27174,22 @@  static const struct builtin_description

   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },

-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },

+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3_vconcat,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DOUBLE },

+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3_vconcat,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DOUBLE },

   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
@@ -30801,34 +30801,36 @@  ix86_expand_args_builtin (const struct b

     case V4HI_FTYPE_V8QI_V8QI:
     case V4HI_FTYPE_V2SI_V2SI:
     case V4DF_FTYPE_V4DF_V4DF:
     case V4DF_FTYPE_V4DF_V4DI:
     case V4SF_FTYPE_V4SF_V4SF:
     case V4SF_FTYPE_V4SF_V4SI:
     case V4SF_FTYPE_V4SF_V2SI:
     case V4SF_FTYPE_V4SF_V2DF:
     case V4SF_FTYPE_V4SF_DI:
     case V4SF_FTYPE_V4SF_SI:
+    case V4SF_FTYPE_V4SF_FLOAT:

     case V2DI_FTYPE_V2DI_V2DI:
     case V2DI_FTYPE_V16QI_V16QI:
     case V2DI_FTYPE_V4SI_V4SI:
     case V2UDI_FTYPE_V4USI_V4USI:
     case V2DI_FTYPE_V2DI_V16QI:
     case V2DI_FTYPE_V2DF_V2DF:
     case V2SI_FTYPE_V2SI_V2SI:
     case V2SI_FTYPE_V4HI_V4HI:
     case V2SI_FTYPE_V2SF_V2SF:
     case V2DF_FTYPE_V2DF_V2DF:
     case V2DF_FTYPE_V2DF_V4SF:
     case V2DF_FTYPE_V2DF_V2DI:
     case V2DF_FTYPE_V2DF_DI:
     case V2DF_FTYPE_V2DF_SI:
+    case V2DF_FTYPE_V2DF_DOUBLE:

     case V2SF_FTYPE_V2SF_V2SF:
     case V1DI_FTYPE_V1DI_V1DI:
     case V1DI_FTYPE_V8QI_V8QI:
     case V1DI_FTYPE_V2SI_V2SI:
     case V32QI_FTYPE_V16HI_V16HI:
     case V16HI_FTYPE_V8SI_V8SI:
     case V32QI_FTYPE_V32QI_V32QI:
     case V16HI_FTYPE_V32QI_V32QI:
     case V16HI_FTYPE_V16HI_V16HI:
     case V8SI_FTYPE_V4DF_V4DF:
Index: config/i386/xmmintrin.h

===================================================================
--- config/i386/xmmintrin.h	(revision 194301)

+++ config/i386/xmmintrin.h	(working copy)

@@ -92,27 +92,27 @@  _mm_setzero_ps (void)

   return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
 }
 
 /* Perform the respective operation on the lower SPFP (single-precision
    floating-point) values of A and B; the upper three SPFP values are
    passed through from A.  */
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_add_ss (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);

+  return (__m128) __builtin_ia32_addss ((__v4sf)__A, __B[0]);

 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sub_ss (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);

+  return (__m128) __builtin_ia32_subss ((__v4sf)__A, __B[0]);

 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mul_ss (__m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_div_ss (__m128 __A, __m128 __B)
Index: config/i386/emmintrin.h

===================================================================
--- config/i386/emmintrin.h	(revision 194301)

+++ config/i386/emmintrin.h	(working copy)

@@ -226,33 +226,33 @@  _mm_cvtsi128_si64x (__m128i __A)

 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_add_pd (__m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_add_sd (__m128d __A, __m128d __B)
 {
-  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);

+  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, __B[0]);

 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sub_pd (__m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sub_sd (__m128d __A, __m128d __B)
 {
-  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);

+  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, __B[0]);

 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mul_pd (__m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mul_sd (__m128d __A, __m128d __B)
Index: config/i386/sse.md

===================================================================
--- config/i386/sse.md	(revision 194301)

+++ config/i386/sse.md	(working copy)

@@ -404,20 +404,37 @@ 

 
 ;; Mix-n-match
 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
 
 ;; Mapping of immediate bits for blend instructions
 (define_mode_attr blendbits
   [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
 
 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
 
+;; Substitutions

+

+(define_subst "replace_vec_merge_with_vec_concat"

+  [(set (match_operand:V2DF 0 "" "")

+	(vec_merge:V2DF

+	  (vec_duplicate:V2DF (match_operand:DF 2 "" ""))

+	  (match_operand:V2DF 1 "" "")

+	  (const_int 1)))]

+  "TARGET_SSE2"

+  [(set (match_dup 0)

+	(vec_concat:V2DF

+	  (match_dup 2)

+	  (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))))])

+

+(define_subst_attr "vec_merge_or_concat" "replace_vec_merge_with_vec_concat"

+		   "" "_vconcat")

+

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Move patterns
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; All of these patterns are enabled for SSE1 as well as SSE2.
 ;; This is essential for maintaining stable calling conventions.
 
 (define_expand "mov<mode>"
@@ -855,26 +872,29 @@ 

 	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
   "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
    <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
    v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<plusminus_insn><mode>3"

+(define_insn "<sse>_vm<plusminus_insn><mode>3<vec_merge_or_concat>"

   [(set (match_operand:VF_128 0 "register_operand" "=x,x")
 	(vec_merge:VF_128
-	  (plusminus:VF_128

-	    (match_operand:VF_128 1 "register_operand" "0,x")

-	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))

+	  (vec_duplicate:VF_128

+	    (plusminus:<ssescalarmode>

+	      (vec_select:<ssescalarmode>

+		(match_operand:VF_128 1 "register_operand" "0,x")

+		(parallel [(const_int 0)]))

+	      (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,xm")))

 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
    <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
    v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<ssescalarmode>")])
Index: config/i386/i386-builtin-types.def

===================================================================
--- config/i386/i386-builtin-types.def	(revision 194301)

+++ config/i386/i386-builtin-types.def	(working copy)

@@ -263,20 +263,21 @@  DEF_FUNCTION_TYPE (UINT64, UINT64, UINT6

 DEF_FUNCTION_TYPE (UINT8, UINT8, INT)
 DEF_FUNCTION_TYPE (V16QI, V16QI, SI)
 DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI)
 DEF_FUNCTION_TYPE (V16QI, V8HI, V8HI)
 DEF_FUNCTION_TYPE (V1DI, V1DI, SI)
 DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI)
 DEF_FUNCTION_TYPE (V1DI, V2SI, V2SI)
 DEF_FUNCTION_TYPE (V1DI, V8QI, V8QI)
 DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DI)
 DEF_FUNCTION_TYPE (V2DF, V2DF, DI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, DOUBLE)

 DEF_FUNCTION_TYPE (V2DF, V2DF, INT)
 DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE)
 DEF_FUNCTION_TYPE (V2DF, V2DF, SI)
 DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF)
 DEF_FUNCTION_TYPE (V2DF, V2DF, V2DI)
 DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF)
 DEF_FUNCTION_TYPE (V2DF, V4DF, INT)
 DEF_FUNCTION_TYPE (V2DI, V16QI, V16QI)
 DEF_FUNCTION_TYPE (V2DI, V2DF, V2DF)
 DEF_FUNCTION_TYPE (V2DI, V2DI, INT)
@@ -296,20 +297,21 @@  DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DI)

 DEF_FUNCTION_TYPE (V4DF, V4DF, INT)
 DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF)
 DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI)
 DEF_FUNCTION_TYPE (V4HI, V2SI, V2SI)
 DEF_FUNCTION_TYPE (V4HI, V4HI, INT)
 DEF_FUNCTION_TYPE (V4HI, V4HI, SI)
 DEF_FUNCTION_TYPE (V4HI, V4HI, V4HI)
 DEF_FUNCTION_TYPE (V4HI, V8QI, V8QI)
 DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SI)
 DEF_FUNCTION_TYPE (V4SF, V4SF, DI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, FLOAT)

 DEF_FUNCTION_TYPE (V4SF, V4SF, INT)
 DEF_FUNCTION_TYPE (V4SF, V4SF, PCV2SF)
 DEF_FUNCTION_TYPE (V4SF, V4SF, SI)
 DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF)
 DEF_FUNCTION_TYPE (V4SF, V4SF, V2SI)
 DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF)
 DEF_FUNCTION_TYPE (V4SF, V4SF, V4SI)
 DEF_FUNCTION_TYPE (V4SF, V8SF, INT)
 DEF_FUNCTION_TYPE (V4SI, V2DF, V2DF)
 DEF_FUNCTION_TYPE (V4SI, V4SF, V4SF)
Index: doc/extend.texi

===================================================================
--- doc/extend.texi	(revision 194301)

+++ doc/extend.texi	(working copy)

@@ -9843,22 +9843,22 @@  int __builtin_ia32_comige (v4sf, v4sf)

 int __builtin_ia32_ucomieq (v4sf, v4sf)
 int __builtin_ia32_ucomineq (v4sf, v4sf)
 int __builtin_ia32_ucomilt (v4sf, v4sf)
 int __builtin_ia32_ucomile (v4sf, v4sf)
 int __builtin_ia32_ucomigt (v4sf, v4sf)
 int __builtin_ia32_ucomige (v4sf, v4sf)
 v4sf __builtin_ia32_addps (v4sf, v4sf)
 v4sf __builtin_ia32_subps (v4sf, v4sf)
 v4sf __builtin_ia32_mulps (v4sf, v4sf)
 v4sf __builtin_ia32_divps (v4sf, v4sf)
-v4sf __builtin_ia32_addss (v4sf, v4sf)

-v4sf __builtin_ia32_subss (v4sf, v4sf)

+v4sf __builtin_ia32_addss (v4sf, float)

+v4sf __builtin_ia32_subss (v4sf, float)

 v4sf __builtin_ia32_mulss (v4sf, v4sf)
 v4sf __builtin_ia32_divss (v4sf, v4sf)
 v4si __builtin_ia32_cmpeqps (v4sf, v4sf)
 v4si __builtin_ia32_cmpltps (v4sf, v4sf)
 v4si __builtin_ia32_cmpleps (v4sf, v4sf)
 v4si __builtin_ia32_cmpgtps (v4sf, v4sf)
 v4si __builtin_ia32_cmpgeps (v4sf, v4sf)
 v4si __builtin_ia32_cmpunordps (v4sf, v4sf)
 v4si __builtin_ia32_cmpneqps (v4sf, v4sf)
 v4si __builtin_ia32_cmpnltps (v4sf, v4sf)
@@ -9964,22 +9964,22 @@  v2df __builtin_ia32_cmpunordsd (v2df, v2

 v2df __builtin_ia32_cmpneqsd (v2df, v2df)
 v2df __builtin_ia32_cmpnltsd (v2df, v2df)
 v2df __builtin_ia32_cmpnlesd (v2df, v2df)
 v2df __builtin_ia32_cmpordsd (v2df, v2df)
 v2di __builtin_ia32_paddq (v2di, v2di)
 v2di __builtin_ia32_psubq (v2di, v2di)
 v2df __builtin_ia32_addpd (v2df, v2df)
 v2df __builtin_ia32_subpd (v2df, v2df)
 v2df __builtin_ia32_mulpd (v2df, v2df)
 v2df __builtin_ia32_divpd (v2df, v2df)
-v2df __builtin_ia32_addsd (v2df, v2df)

-v2df __builtin_ia32_subsd (v2df, v2df)

+v2df __builtin_ia32_addsd (v2df, double)

+v2df __builtin_ia32_subsd (v2df, double)

 v2df __builtin_ia32_mulsd (v2df, v2df)
 v2df __builtin_ia32_divsd (v2df, v2df)
 v2df __builtin_ia32_minpd (v2df, v2df)
 v2df __builtin_ia32_maxpd (v2df, v2df)
 v2df __builtin_ia32_minsd (v2df, v2df)
 v2df __builtin_ia32_maxsd (v2df, v2df)
 v2df __builtin_ia32_andpd (v2df, v2df)
 v2df __builtin_ia32_andnpd (v2df, v2df)
 v2df __builtin_ia32_orpd (v2df, v2df)
 v2df __builtin_ia32_xorpd (v2df, v2df)