diff mbox

[i386,13/8,AVX-512] Fix argument order for perm and recp intrinsics.

Message ID 20140218100651.GA4382@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Kirill Yukhin Feb. 18, 2014, 10:06 a.m. UTC
Hello Uroš,
On 17 Feb 13:41, Uros Bizjak wrote:
> On Mon, Feb 17, 2014 at 1:26 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> 
> >> >> Please don't change srcp pattern, it should be defined similar to
> >> >> vrcpss (aka sse_vmrcpv4sf). You need to switch operand order
> >> >> elsewhere.
> >> >
> >> > No, you are correct. Operands should be swapped as in your patch.
> >>
> >> Eh, sorry that after some more thinking, I have to again revert this decision.
> >>
> >> The srcp pattern should remain as is, and you should swap operands in
> >> avx512fintrin.h instead:
> >
> > In the bottom there's updated patch.
> >
> > Added "sse" type. mem operand made second.
> > Built-ins & tests fixed.
> >
> > Testing in progress.
> >
> > Is it ok for mainline if pass?
> 
> No, you got operand order wrong.
> 
> To correctly calculate "memory" attribute, all "sse" type insns expect
> the operands in the way sse_vmrcpv4sf2 is defined. You should keep
> nonimmedate operand as operand_1 and switch operands in builtins and
> insn mnemonics to fulfill required operand order *in the pattern*.
Patch updated. It is in the bottom.
gcc/
	* config/i386/avx512erintrin.h (_mm_rcp28_round_sd): Swap operands.
	(_mm_rcp28_round_ss): Ditto.
	(_mm_rsqrt28_round_sd): Ditto.
	(_mm_rsqrt28_round_ss): Ditto.
	* config/i386/avx512erintrin.h (_mm_rcp14_round_sd): Ditto.
	(_mm_rcp14_round_ss): Ditto.
	(_mm_rsqrt14_round_sd): Ditto.
	(_mm_rsqrt14_round_ss): Ditto.
	* config/i386/sse.md (rsqrt14<mode>): Make memory first operand.
	(avx512er_exp2<mode><mask_name><round_saeonly_name>): Set type
	attribute to sse.
	(<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>):
	Ditto.
	(avx512er_vmrcp28<mode><round_saeonly_name>): Make memory first
	operand, set type attribute.
	(<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>):
	Set type attribute.
	(avx512er_vmrsqrt28<mode><round_saeonly_name>): Make memory first
	operand, Set type attribute.

gcc/testsuite/
	* gcc.target/i386/avx512er-vrcp28sd-2.c: Distinguish src1 and src2.
	* gcc.target/i386/avx512er-vrcp28ss-2.c: Call correct intrinsic.
	* gcc.target/i386/avx512er-vrsqrt28sd-2.c: Distinguish src1 and src2.
	* gcc.target/i386/avx512er-vrsqrt28ss-2.c: Ditto.
	* gcc.target/i386/avx512f-vrcp14sd-2.c: Fix reference calculation.
	* gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto.

--
Thanks, K

Comments

Uros Bizjak Feb. 18, 2014, 10:34 a.m. UTC | #1
On Tue, Feb 18, 2014 at 11:06 AM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:

>> >> >> Please don't change srcp pattern, it should be defined similar to
>> >> >> vrcpss (aka sse_vmrcpv4sf). You need to switch operand order
>> >> >> elsewhere.
>> >> >
>> >> > No, you are correct. Operands should be swapped as in your patch.
>> >>
>> >> Eh, sorry that after some more thinking, I have to again revert this decision.
>> >>
>> >> The srcp pattern should remain as is, and you should swap operands in
>> >> avx512fintrin.h instead:
>> >
>> > In the bottom there's updated patch.
>> >
>> > Added "sse" type. mem operand made second.
>> > Built-ins & tests fixed.
>> >
>> > Testing in progress.
>> >
>> > Is it ok for mainline if pass?
>>
>> No, you got operand order wrong.
>>
>> To correctly calculate "memory" attribute, all "sse" type insns expect
>> the operands in the way sse_vmrcpv4sf2 is defined. You should keep
>> nonimmedate operand as operand_1 and switch operands in builtins and
>> insn mnemonics to fulfill required operand order *in the pattern*.
> Patch updated. It is in the bottom.
> gcc/
>         * config/i386/avx512erintrin.h (_mm_rcp28_round_sd): Swap operands.
>         (_mm_rcp28_round_ss): Ditto.
>         (_mm_rsqrt28_round_sd): Ditto.
>         (_mm_rsqrt28_round_ss): Ditto.
>         * config/i386/avx512erintrin.h (_mm_rcp14_round_sd): Ditto.
>         (_mm_rcp14_round_ss): Ditto.
>         (_mm_rsqrt14_round_sd): Ditto.
>         (_mm_rsqrt14_round_ss): Ditto.
>         * config/i386/sse.md (rsqrt14<mode>): Make memory first operand.

"Put nonimmediate operand as the first input operand." (and in similar
way below).

>         (avx512er_exp2<mode><mask_name><round_saeonly_name>): Set type
>         attribute to sse.
>         (<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>):
>         Ditto.
>         (avx512er_vmrcp28<mode><round_saeonly_name>): Make memory first
>         operand, set type attribute.
>         (<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>):
>         Set type attribute.
>         (avx512er_vmrsqrt28<mode><round_saeonly_name>): Make memory first
>         operand, Set type attribute.
>
> gcc/testsuite/
>         * gcc.target/i386/avx512er-vrcp28sd-2.c: Distinguish src1 and src2.
>         * gcc.target/i386/avx512er-vrcp28ss-2.c: Call correct intrinsic.
>         * gcc.target/i386/avx512er-vrsqrt28sd-2.c: Distinguish src1 and src2.
>         * gcc.target/i386/avx512er-vrsqrt28ss-2.c: Ditto.
>         * gcc.target/i386/avx512f-vrcp14sd-2.c: Fix reference calculation.
>         * gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto.

OK with a slight adjustement to vrcp14 patter below.

> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -1551,13 +1551,13 @@
>    [(set (match_operand:VF_128 0 "register_operand" "=v")
>         (vec_merge:VF_128
>           (unspec:VF_128
> -           [(match_operand:VF_128 1 "register_operand" "v")
> -            (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
> +           [(match_operand:VF_128 2 "register_operand" "v")
> +            (match_operand:VF_128 1 "nonimmediate_operand" "vm")]
>             UNSPEC_RSQRT14)
>           (match_dup 1)
>           (const_int 1)))]
>    "TARGET_AVX512F"
> -  "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
> +  "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"

This pattern should probably read the same as other vmrsqrt patterns
(e.g. sse_vmrsqrtv4sf2 and avx512er_vmrsqrt28...):

       (vec_merge:VF_128
         (unspec:VF_128
           [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
           UNSPEC_RSQRT14)
         (match_operand:VF_128 2 "register_operand" "v")
         (const_int 1)))]
  "TARGET_AVX512F"
  "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"

OK with the change above.

Thanks,
Uros.
diff mbox

Patch

diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h
index 6fe05bc..f6870a5 100644
--- a/gcc/config/i386/avx512erintrin.h
+++ b/gcc/config/i386/avx512erintrin.h
@@ -163,8 +163,8 @@  extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
 {
-  return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __A,
-						 (__v2df) __B,
+  return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
+						 (__v2df) __A,
 						 __R);
 }
 
@@ -172,8 +172,8 @@  extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
 {
-  return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __A,
-						(__v4sf) __B,
+  return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
+						(__v4sf) __A,
 						__R);
 }
 
@@ -237,8 +237,8 @@  extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
 {
-  return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __A,
-						   (__v2df) __B,
+  return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
+						   (__v2df) __A,
 						   __R);
 }
 
@@ -246,8 +246,8 @@  extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
 {
-  return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __A,
-						  (__v4sf) __B,
+  return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
+						  (__v4sf) __A,
 						  __R);
 }
 
@@ -375,16 +375,16 @@  _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
     _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rcp28_sd(A, B)	\
-    __builtin_ia32_rcp28sd_round(A, B, _MM_FROUND_CUR_DIRECTION)
+    __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rcp28_ss(A, B)	\
-    __builtin_ia32_rcp28ss_round(A, B, _MM_FROUND_CUR_DIRECTION)
+    __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rsqrt28_sd(A, B)	\
-    __builtin_ia32_rsqrt28sd_round(A, B, _MM_FROUND_CUR_DIRECTION)
+    __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rsqrt28_ss(A, B)	\
-    __builtin_ia32_rsqrt28ss_round(A, B, _MM_FROUND_CUR_DIRECTION)
+    __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
 
 #ifdef __DISABLE_AVX512ER__
 #undef __DISABLE_AVX512ER__
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index d53a40d..f9b04d3 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -1470,16 +1470,16 @@  extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp14_sd (__m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
-					   (__v2df) __B);
+  return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
+					   (__v2df) __A);
 }
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp14_ss (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
-					  (__v4sf) __B);
+  return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
+					  (__v4sf) __A);
 }
 
 extern __inline __m512d
@@ -1544,16 +1544,16 @@  extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
-					     (__v2df) __B);
+  return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
+					     (__v2df) __A);
 }
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
-					    (__v4sf) __B);
+  return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
+					    (__v4sf) __A);
 }
 
 #ifdef __OPTIMIZE__
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5595767..392bcf5 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1551,13 +1551,13 @@ 
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
-	    [(match_operand:VF_128 1 "register_operand" "v")
-	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+	    [(match_operand:VF_128 2 "register_operand" "v")
+	     (match_operand:VF_128 1 "nonimmediate_operand" "vm")]
 	    UNSPEC_RSQRT14)
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
-  "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
@@ -12804,6 +12804,7 @@ 
   "TARGET_AVX512ER"
   "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "prefix" "evex")
+   (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
@@ -12814,6 +12815,7 @@ 
   "TARGET_AVX512ER"
   "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "prefix" "evex")
+   (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
@@ -12825,9 +12827,10 @@ 
 	  (match_operand:VF_128 2 "register_operand" "v")
 	  (const_int 1)))]
   "TARGET_AVX512ER"
-  "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}"
+  "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
   [(set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
+   (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
@@ -12838,6 +12841,7 @@ 
   "TARGET_AVX512ER"
   "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "prefix" "evex")
+   (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
@@ -12849,8 +12853,9 @@ 
 	  (match_operand:VF_128 2 "register_operand" "v")
 	  (const_int 1)))]
   "TARGET_AVX512ER"
-  "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}"
+  "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
   [(set_attr "length_immediate" "1")
+   (set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c
index d30f088..889f990 100644
--- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c
@@ -10,19 +10,20 @@ 
 void static
 avx512er_test (void)
 {
-  union128d src, res;
+  union128d src1, src2, res;
   double res_ref[2];
   int i;
   
   for (i = 0; i < 2; i++)
     {
-      src.a[i] = 179.345 - 6.5645 * i;
-      res_ref[i] = src.a[i];
+      src1.a[i] = 179.345 - 6.5645 * i;
+      src2.a[i] = 204179.345 + 6.5645 * i;
+      res_ref[i] = src1.a[i];
     }
 
-  res_ref[0] = 1.0 / src.a[0];
+  res_ref[0] = 1.0 / src2.a[0];
 
-  res.x = _mm_rcp28_round_sd (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rcp28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
 
   if (checkVd (res.a, res_ref, 2))
     abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c
index 499a977..3280879 100644
--- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c
@@ -10,19 +10,20 @@ 
 void static
 avx512er_test (void)
 {
-  union128 src, res;
+  union128 src1, src2, res;
   float res_ref[4];
   int i;
   
   for (i = 0; i < 4; i++)
     {
-      src.a[i] = 179.345 - 6.5645 * i;
-      res_ref[i] = src.a[i];
+      src1.a[i] = 179.345 - 6.5645 * i;
+      src2.a[i] = 179345.006 + 6.5645 * i;
+      res_ref[i] = src1.a[i];
     }
 
-  res_ref[0] = 1.0 / src.a[0];
+  res_ref[0] = 1.0 / src2.a[0];
 
-  res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rcp28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
 
   if (checkVf (res.a, res_ref, 4))
     abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c
index 1537a59..bd217e8 100644
--- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c
@@ -10,19 +10,20 @@ 
 void static
 avx512er_test (void)
 {
-  union128d src, res;
+  union128d src1, src2, res;
   double res_ref[2];
   int i;
   
   for (i = 0; i < 2; i++)
     {
-      src.a[i] = 179.345 - 6.5645 * i;
-      res_ref[i] = src.a[i];
+      src1.a[i] = 179.345 - 6.5645 * i;
+      src2.a[i] = 45 - 6.5645 * i;
+      res_ref[i] = src1.a[i];
     }
 
-  res_ref[0] = 1.0 / sqrt (src.a[0]);
+  res_ref[0] = 1.0 / sqrt (src2.a[0]);
 
-  res.x = _mm_rsqrt28_round_sd (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rsqrt28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
 
   if (checkVd (res.a, res_ref, 2))
     abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c
index f88422e..f7bfff5 100644
--- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c
@@ -10,19 +10,20 @@ 
 void static
 avx512er_test (void)
 {
-  union128 src, res;
+  union128 src1, src2, res;
   float res_ref[4];
   int i;
   
   for (i = 0; i < 4; i++)
     {
-      src.a[i] = 179.345 - 6.5645 * i;
-      res_ref[i] = src.a[i];
+      src1.a[i] = 179.345 - 6.5645 * i;
+      src2.a[i] = 179221345 + 6.5645 * i;
+      res_ref[i] = src1.a[i];
     }
 
-  res_ref[0] = 1.0 / sqrt (src.a[0]);
+  res_ref[0] = 1.0 / sqrt (src2.a[0]);
 
-  res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rsqrt28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
 
   if (checkVf (res.a, res_ref, 4))
     abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
index 0c9211a..f944600 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
@@ -8,8 +8,8 @@ 
 static void
 compute_vrcp14sd (double *s1, double *s2, double *r)
 {
-  r[0] = 1.0 / s1[0];
-  r[1] = s2[1];
+  r[0] = 1.0 / s2[0];
+  r[1] = s1[1];
 }
 
 static void
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
index 3344dad..7aca591 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
@@ -8,10 +8,10 @@ 
 static void
 compute_vrcp14ss (float *s1, float *s2, float *r)
 {
-  r[0] = 1.0 / s1[0];
-  r[1] = s2[1];
-  r[2] = s2[2];
-  r[3] = s2[3];
+  r[0] = 1.0 / s2[0];
+  r[1] = s1[1];
+  r[2] = s1[2];
+  r[3] = s1[3];
 }
 
 static void