===================================================================
@@ -1470,8 +1470,8 @@
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp14_sd (__m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
- (__v2df) __B);
+ return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
+ (__v2df) __A);
}
extern __inline __m128
@@ -1478,8 +1478,8 @@
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp14_ss (__m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
- (__v4sf) __B);
+ return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
+ (__v4sf) __A);
}
extern __inline __m512d
--cut here--
vec_merge RSQRT and RCP are unops of type "sse". To correctly
determine "memory" attribute, "sse" types look at operand1 only, so
this is the reason that the pattern is defined in this way.
There is similar problem with vec_merge rcp28 and rsqrt28 patterns.
operands 1 and 2 are swapped in the mnemonic, since only the last
operands allow memory:
===================================================================
@@ -12825,7 +12825,7 @@
(match_operand:VF_128 2 "register_operand" "v")
(const_int 1)))]
"TARGET_AVX512ER"
- "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0,
%1, %2<round_saeonly_op3>}"
+ "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0,
%2, %1<round_saeonly_op3>}"
[(set_attr "length_immediate" "1")