diff mbox

[testsuite] Fix sse4_1-round* inline asm statements

Message ID CAFULd4ZARDX8V=fb=XkkPjJN0JYcMJciYoQNUUuQwN-TR3bzLQ@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak Dec. 8, 2015, 6:10 p.m. UTC
Hello!

> this patch fixes the asm statements in the gcc.target/i386/sse4_1-round* test cases.
>
> They do lots of things that are just absolutely forbidden, like clobber registers
> that are not mentioned in the clobber list, and create a hidden data flow.
>
> The test cases work just by chance, and You can see the asm statements
> ripped completely apart by the loop optimizer if you try to do the assembler
> part in a loop:
>
>   for (i = 0; i < 10; i++) {
>   __asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f));
>
>   __asm__ ("fstcw %0" : "=m" (*&saved_cw));
>   new_cw = saved_cw & clr_mask;
>   new_cw |= type;
>   __asm__ ("fldcw %0" : : "m" (*&new_cw));
>
>   __asm__ ("frndint\n"
>            "fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret));
>   __asm__ ("fldcw %0" : : "m" (*&saved_cw));
>   }
>   return ret;
>
> So this patch avoids the hidden data flow, and
> adds "st" to the clobber list.
>
> Boot-strapped and reg-tested on x86_64-pc-linux-gnu
> OK for trunk?

Uh, no.

x87 is a strange beast, and even your patch is wrong. The assembly
should be written in this way:

  __asm__ ("fnstcw %0" : "=m" (saved_cw));

  new_cw = saved_cw & clr_mask;
  new_cw |= type;

  __asm__ ("fldcw %2\n\t"
       "frndint\n\t"
       "fldcw %3" : "=t" (ret)
              : "0" (f), "m" (new_cw), "m" (saved_cw));

I'm testing the attached patch.

Uros.
diff mbox

Patch

Index: gcc.target/i386/sse4_1-roundsd-4.c
===================================================================
--- gcc.target/i386/sse4_1-roundsd-4.c	(revision 231413)
+++ gcc.target/i386/sse4_1-roundsd-4.c	(working copy)
@@ -36,7 +36,7 @@  init_round (double *src)
 static double
 do_round (double f, int type)
 {
-  short saved_cw, new_cw, clr_mask;
+  unsigned short saved_cw, new_cw, clr_mask;
   double ret;
 
   if ((type & 4))
@@ -50,16 +50,15 @@  do_round (double f, int type)
       clr_mask = ~0x0C3F;
     }
 
-  __asm__ ("fldl %0" : : "m" (*&f));
+  __asm__ ("fstcw %0" : "=m" (saved_cw));
 
-  __asm__ ("fstcw %0" : "=m" (*&saved_cw));
   new_cw = saved_cw & clr_mask;
   new_cw |= type;
-  __asm__ ("fldcw %0" : : "m" (*&new_cw));
 
-  __asm__ ("frndint\n"
-	   "fstpl %0\n" : "=m" (*&ret));
-  __asm__ ("fldcw %0" : : "m" (*&saved_cw));
+  __asm__ ("fldcw %2\n\t"
+	   "frndint\n\t"
+	   "fldcw %3" : "=t" (ret)
+		      : "0" (f), "m" (new_cw), "m" (saved_cw));
   return ret;
 }
 
Index: gcc.target/i386/sse4_1-roundss-4.c
===================================================================
--- gcc.target/i386/sse4_1-roundss-4.c	(revision 231413)
+++ gcc.target/i386/sse4_1-roundss-4.c	(working copy)
@@ -36,7 +36,7 @@  init_round (float *src)
 static float
 do_round (float f, int type)
 {
-  short saved_cw, new_cw, clr_mask;
+  unsigned short saved_cw, new_cw, clr_mask;
   float ret;
 
   if ((type & 4))
@@ -50,16 +50,15 @@  do_round (float f, int type)
       clr_mask = ~0x0C3F;
     }
 
-  __asm__ ("flds %0" : : "m" (*&f));
+  __asm__ ("fstcw %0" : "=m" (saved_cw));
 
-  __asm__ ("fstcw %0" : "=m" (*&saved_cw));
   new_cw = saved_cw & clr_mask;
   new_cw |= type;
-  __asm__ ("fldcw %0" : : "m" (*&new_cw));
 
-  __asm__ ("frndint\n"
-	   "fstps %0\n" : "=m" (*&ret));
-  __asm__ ("fldcw %0" : : "m" (*&saved_cw));
+  __asm__ ("fldcw %2\n\t"
+	   "frndint\n\t"
+	   "fldcw %3" : "=t" (ret)
+		      : "0" (f), "m" (new_cw), "m" (saved_cw));
   return ret;
 }