diff mbox series

i386: Improve code generation of smin(x,0) with -m32.

Message ID 001501d66ef1$4b230bd0$e1692370$@nextmovesoftware.com
State New
Headers show
Series i386: Improve code generation of smin(x,0) with -m32. | expand

Commit Message

Roger Sayle Aug. 10, 2020, 8:36 a.m. UTC
To make amends for the recent (temporary) testsuite failure
of my new gcc.target/i386/minmax-9.c when compiled with -m32,
this patch improves the -m32 code we generate for the examples
in that test case.

Currently, smax(x,0) generates the very cool implementation:

smax0:  movl    4(%esp), %eax
        cltd
        notl    %edx
        andl    %edx, %eax
        ret

But mysteriously, the related smin(x,0) uses a longer conditional
move sequence:

smin0:  movl    4(%esp), %eax
        xorl    %edx, %edx
        testl   %eax, %eax
        setg    %dl
        subl    $1, %edx
        andl    %edx, %eax
        ret

With this patch, we now generate the much nicer:

smin0:  movl    4(%esp), %eax
        cltd
        andl    %edx, %eax
        ret

The trick is to expand smin(x,0) as "x < 0 ? x : 0" instead
of the current "x <= 0 ? x : 0", as the former can take
advantage of sign_bit_mask operations.

The benefits can also be seen in QImode, where previously:
smin0:  movl    4(%esp), %edx
        testb   %dl, %dl
        setg    %al
        subl    $1, %eax
        andl    %edx, %eax
        ret

now becomes:

smin0:  movl    4(%esp), %edx
        movl    %edx, %eax
        sarb    $7, %al
        andl    %edx, %eax
        ret


This patch has been tested on x86_64-pc-linux-gnu with a
"make bootstrap" and "make -k check" with no new failures.
Please let me know if I need to tweak the dejagnu directives;
no other i386.exp test appears to explicitly specify -m32, but as
a compile test, it's not unreasonable to check things on x86_64.
OK for mainline?


2020-08-10  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* config/i386/i386-expand.c (ix86_expand_int_movcc): Expand
	signed MIN_EXPR against zero as "x < 0 ? x : 0" instead of
	"x <= 0 ? x : 0" to enable sign_bit_compare_p optimizations.

gcc/testsuite/ChangeLog
	* gcc.target/i386/minmax-12.c: New test.

Thanks in advance,
Roger
--
Roger Sayle
NextMove Software
Cambridge, UK

diff --git a/gcc/testsuite/gcc.target/i386/minmax-12.c b/gcc/testsuite/gcc.target/i386/minmax-12.c
new file mode 100644
index 0000000..bcb3af0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/minmax-12.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-m32 -O2 -march=i386 -mtune=generic" } */
+
+#define min(a,b) (((a) < (b))? (a) : (b))
+
+int foo(int x)
+{
+  return min(x,0);
+}
+
+signed char bar(signed char x)
+{
+  return min(x,0);
+}
+
+/* { dg-final { scan-assembler "cltd" } } */
+/* { dg-final { scan-assembler "sarb" } } */

Comments

Uros Bizjak Aug. 10, 2020, 8:50 a.m. UTC | #1
On Mon, Aug 10, 2020 at 10:36 AM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
> To make amends for the recent (temporary) testsuite failure
> of my new gcc.target/i386/minmax-9.c when compiled with -m32,
> this patch improves the -m32 code we generate for the examples
> in that test case.
>
> Currently, smax(x,0) generates the very cool implementation:
>
> smax0:  movl    4(%esp), %eax
>         cltd
>         notl    %edx
>         andl    %edx, %eax
>         ret
>
> But mysteriously, the related smin(x,0) uses a longer conditional
> move sequence:
>
> smin0:  movl    4(%esp), %eax
>         xorl    %edx, %edx
>         testl   %eax, %eax
>         setg    %dl
>         subl    $1, %edx
>         andl    %edx, %eax
>         ret
>
> With this patch, we now generate the much nicer:
>
> smin0:  movl    4(%esp), %eax
>         cltd
>         andl    %edx, %eax
>         ret
>
> The trick is to expand smin(x,0) as "x < 0 ? x : 0" instead
> of the current "x <= 0 ? x : 0", as the former can take
> advantage of sign_bit_mask operations.
>
> The benefits can also be seen in QImode, where previously:
> smin0:  movl    4(%esp), %edx
>         testb   %dl, %dl
>         setg    %al
>         subl    $1, %eax
>         andl    %edx, %eax
>         ret
>
> now becomes:
>
> smin0:  movl    4(%esp), %edx
>         movl    %edx, %eax
>         sarb    $7, %al
>         andl    %edx, %eax
>         ret
>
>
> This patch has been tested on x86_64-pc-linux-gnu with a
> "make bootstrap" and "make -k check" with no new failures.
> Please let me know if I need to tweak the dejagnu directives;
> no other i386.exp test appears to explicitly specify -m32, but as
> a compile test, it's not unreasonable to check things on x86_64.

We don't use -m32 compile directives in the testsuite, please just use
"dg-do compile { target ia32 } }" dg directive.

BTW: Multilibs can be tested with:

make -j X check RUNTESTFLAGS="--target-board=unix\{,-m32}"

> OK for mainline?

OK with the above testcase fix.

Thanks,
Uros.

>
>
> 2020-08-10  Roger Sayle  <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
>         * config/i386/i386-expand.c (ix86_expand_int_movcc): Expand
>         signed MIN_EXPR against zero as "x < 0 ? x : 0" instead of
>         "x <= 0 ? x : 0" to enable sign_bit_compare_p optimizations.
>
> gcc/testsuite/ChangeLog
>         * gcc.target/i386/minmax-12.c: New test.
>
> Thanks in advance,
> Roger
> --
> Roger Sayle
> NextMove Software
> Cambridge, UK
>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index e194214..46fbab4 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -3305,7 +3305,17 @@  ix86_expand_int_movcc (rtx operands[])
 	{
 	  var = operands[2];
 	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
-	    operands[2] = constm1_rtx, op = and_optab;
+	    {
+	      /* For smin (x, 0), expand as "x < 0 ? x : 0" instead of
+		 "x <= 0 ? x : 0" to enable sign_bit_compare_p.  */
+	      if (code == LE && op1 == const0_rtx && rtx_equal_p (op0, var))
+		operands[1] = simplify_gen_relational (LT, VOIDmode,
+						       GET_MODE (op0),
+						       op0, const0_rtx);
+
+	      operands[2] = constm1_rtx;
+	      op = and_optab;
+	    }
 	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
 	    operands[2] = const0_rtx, op = ior_optab;
 	  else