diff mbox

nvptx: do not use alternative spelling of unsigned comparisons

Message ID alpine.LNX.2.20.1602011943090.18496@monopod.intra.ispras.ru
State New
Headers show

Commit Message

Alexander Monakov Feb. 1, 2016, 4:55 p.m. UTC
Hello!

The following patch fixes subtle breakage on NVPTX when unsigned comparisons
would be sometimes mistranslated by PTX JIT.  The new test demonstrates that
by using the %nctaid.x (number of blocks) register, but a comparison against
a value in constant memory can also trigger that (I could easily reproduce
that with NVCC, but not with GCC yet).  OK for trunk?

When emitting unsigned comparisons, nvptx backend uses alternative spellings
lo/ls/hs/hi for relational operators, instead of the usual lt/le/ge/gt,
respectively.  There's no nominal difference, but in practice ptxas and PTX
JIT can mistranslate the former in some circumstances, e.g. when the first
comparison operand is a load from constant memory (perhaps implicitely like
%nctaid.x).

This was reported to NVIDIA, but the workaround on GCC side is to simply
use the same common spellings as for signed comparisons, so let's do that.

Alexander

gcc/ChangeLog:
	* config/nvptx/nvptx.c (nvptx_print_operand): Treat LEU, GEU, LTU, GTU
        like LE, GE, LT, GT when emitting relational operator.

gcc/testsuite/ChangeLog:
	* gcc.target/nvptx/unsigned-cmp.c: New test.
---

Comments

Nathan Sidwell Feb. 1, 2016, 6:25 p.m. UTC | #1
On 02/01/16 11:55, Alexander Monakov wrote:
> Hello!
>
> The following patch fixes subtle breakage on NVPTX when unsigned comparisons
> would be sometimes mistranslated by PTX JIT.  The new test demonstrates that
> by using the %nctaid.x (number of blocks) register, but a comparison against
> a value in constant memory can also trigger that (I could easily reproduce
> that with NVCC, but not with GCC yet).  OK for trunk?
>
> When emitting unsigned comparisons, nvptx backend uses alternative spellings
> lo/ls/hs/hi for relational operators, instead of the usual lt/le/ge/gt,
> respectively.  There's no nominal difference, but in practice ptxas and PTX
> JIT can mistranslate the former in some circumstances, e.g. when the first
> comparison operand is a load from constant memory (perhaps implicitely like
> %nctaid.x).

Ok.

nathan
diff mbox

Patch

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 27c797b..efd0f8e 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -2161,28 +2161,20 @@  nvptx_print_operand (FILE *file, rtx x, int code)
 	    fputs (".ne", file);
 	  break;
 	case LE:
+	case LEU:
 	  fputs (".le", file);
 	  break;
 	case GE:
+	case GEU:
 	  fputs (".ge", file);
 	  break;
 	case LT:
+	case LTU:
 	  fputs (".lt", file);
 	  break;
 	case GT:
-	  fputs (".gt", file);
-	  break;
-	case LEU:
-	  fputs (".ls", file);
-	  break;
-	case GEU:
-	  fputs (".hs", file);
-	  break;
-	case LTU:
-	  fputs (".lo", file);
-	  break;
 	case GTU:
-	  fputs (".hi", file);
+	  fputs (".gt", file);
 	  break;
 	case LTGT:
 	  fputs (".ne", file);
diff --git a/gcc/testsuite/gcc.target/nvptx/unsigned-cmp.c b/gcc/testsuite/gcc.target/nvptx/unsigned-cmp.c
new file mode 100644
index 0000000..a0cf5c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/unsigned-cmp.c
@@ -0,0 +1,50 @@ 
+/* { dg-options "-O2" } */
+/* { dg-do run } */
+
+/* nvptx backend used to emit lo/ls/hs/hi suffixes on unsigned comparison
+   insns instead of the more common lt/le/ge/gt, but ptxas and PTX JIT
+   miscompile 'ls' and 'hi' under some circumstances, such as when the first
+   source operand expands to a constant memory load, as demonstrated below.
+   Reported as NVIDIA bug ID 1725195 (tracker is not public).  */
+
+/* Define this to observe PTX translation breakage.  */
+//#define EMIT_BROKEN_ASM 1
+
+/* Or define this to get expected codegen.  */
+//#define EMIT_WORKING_ASM 1
+
+static __attribute__((noinline,noclone)) int ls(unsigned a)
+{
+    unsigned v;
+    /* %nctaid.x is always 1 in gcc testing.  */
+    asm ("mov.u32 %0, %%nctaid.x;" : "=r"(v));
+#if defined(EMIT_BROKEN_ASM)
+    asm ("set.u32.ls.u32 %0, %1, %0;" : "+r"(a) : "r"(v));
+#elif defined(EMIT_WORKING_ASM)
+    asm ("set.u32.le.u32 %0, %1, %0;" : "+r"(a) : "r"(v));
+#else
+    a = v <= a ? -1 : 0;
+#endif
+    return a;
+}
+static __attribute__((noinline,noclone)) int hi(unsigned a)
+{
+    unsigned v;
+    asm ("mov.u32 %0, %%nctaid.x;" : "=r"(v));
+#if defined(EMIT_BROKEN_ASM)
+    asm ("set.u32.hi.u32 %0, %1, %0;" : "+r"(a) : "r"(v));
+#elif defined(EMIT_WORKING_ASM)
+    asm ("set.u32.gt.u32 %0, %1, %0;" : "+r"(a) : "r"(v));
+#else
+    a = v > a ? -1 : 0;
+#endif
+    return a;
+}
+int main()
+{
+    int i;
+    for (i=0; i<3; i++)
+        if (ls(i) != -(1 <= i) || hi(i) != -(1 > i))
+            __builtin_abort();
+    return 0;
+}