Patchwork [i386] : Use indirect functions some more

login
register
mail settings
Submitter Uros Bizjak
Date March 18, 2011, 8:40 p.m.
Message ID <AANLkTiko4NUrkb4DAYNfz06B_Ys7GRkVarFGojOa0=6K@mail.gmail.com>
Download mbox | patch
Permalink /patch/87577/
State New
Headers show

Comments

Uros Bizjak - March 18, 2011, 8:40 p.m.
Hello!

Just a trivial cleanup, no functional changes.

2011-03-18  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (float<SSEMODEI24:mode><X87MODEF:mode>2):
	Rewrite using indirect functions.
	(lwp_slwpcb): Ditto.
	(avx_vextractf128<mode>): Ditto.
	(avx_vinsertf128<mode>): Ditto.

Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN.

Uros.
Richard Henderson - March 18, 2011, 9:08 p.m.
On 03/18/2011 01:40 PM, Uros Bizjak wrote:
>        if (<X87MODEF:MODE>mode == SFmode)
> -	insn = gen_truncxfsf2 (operands[0], reg);
> +	insn = gen_truncxfsf2;
>        else if (<X87MODEF:MODE>mode == DFmode)
> -	insn = gen_truncxfdf2 (operands[0], reg);
> +	insn = gen_truncxfdf2;
>        else
>  	gcc_unreachable ();

Why is this a good thing?  Surely the direct calls are much
better predicted by the CPU...

I can certainly understand sinking the call to emit_insn, as
in the second hunk; that ought to save code size.  Though the
compiler really ought to be able to figure that out itself.


r~
Uros Bizjak - March 19, 2011, 5:26 p.m.
On Fri, Mar 18, 2011 at 10:08 PM, Richard Henderson <rth@redhat.com> wrote:
> On 03/18/2011 01:40 PM, Uros Bizjak wrote:
>>        if (<X87MODEF:MODE>mode == SFmode)
>> -     insn = gen_truncxfsf2 (operands[0], reg);
>> +     insn = gen_truncxfsf2;
>>        else if (<X87MODEF:MODE>mode == DFmode)
>> -     insn = gen_truncxfdf2 (operands[0], reg);
>> +     insn = gen_truncxfdf2;
>>        else
>>       gcc_unreachable ();
>
> Why is this a good thing?  Surely the direct calls are much
> better predicted by the CPU...

I was hoping that cmove will be generated in this particular case for
64bit targets. I'm not aware of any other differences between direct
and indirect calls...

OTOH, call arguments do not need to be set-up twice when indirect call
is used. Please note that these are not CSE'd as shown by comparing
two examples below on 32bit i386:

--cut here--
#include <stdio.h>
#include <stdlib.h>

int foo (int, int);
int bar (int, int);

void test (int a, int b, int c)
{
  int x;

  if (c == 1)
    x = foo (a, b);
  else if (c == 2)
    x = bar (a, b);
  else
    abort ();

  printf ("%i\n", x);
}


void test_ (int a, int b, int c)
{
  int (*x)(int, int);

  if (c == 1)
    x = foo;
  else if (c == 2)
    x = bar;
  else
    abort ();

  printf ("%i\n", x (a, b));
}
--cut here--

test:
	subl	$28, %esp
	movl	40(%esp), %eax
	movl	32(%esp), %edx
	movl	36(%esp), %ecx
	cmpl	$1, %eax
	je	.L6
	cmpl	$2, %eax
	jne	.L4
	movl	%ecx, 4(%esp)
	movl	%edx, (%esp)
	call	bar
.L3:
	movl	%eax, 36(%esp)
	movl	$.LC0, 32(%esp)
	addl	$28, %esp
	jmp	printf
	.p2align 4,,7
	.p2align 3
.L6:
	movl	%ecx, 4(%esp)
	movl	%edx, (%esp)
	call	foo
	jmp	.L3
.L4:
	call	abort

test_:
	subl	$28, %esp
	movl	40(%esp), %eax
	movl	32(%esp), %edx
	movl	36(%esp), %ecx
	cmpl	$1, %eax
	je	.L9
	cmpl	$2, %eax
	jne	.L11
	movl	$bar, %eax
.L8:
	movl	%ecx, 4(%esp)
	movl	%edx, (%esp)
	call	*%eax
	movl	$.LC0, 32(%esp)
	movl	%eax, 36(%esp)
	addl	$28, %esp
	jmp	printf
	.p2align 4,,7
	.p2align 3
.L9:
	.cfi_restore_state
	movl	$foo, %eax
	jmp	.L8
.L11:
	call	abort

> I can certainly understand sinking the call to emit_insn, as
> in the second hunk; that ought to save code size.  Though the
> compiler really ought to be able to figure that out itself.

Currently, it does not.

Uros.

Patch

Index: i386.md
===================================================================
--- i386.md	(revision 171163)
+++ i386.md	(working copy)
@@ -4959,18 +4959,18 @@ 
       && !X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode))
     {
       rtx reg = gen_reg_rtx (XFmode);
-      rtx insn;
+      rtx (*insn)(rtx, rtx);
 
       emit_insn (gen_float<SSEMODEI24:mode>xf2 (reg, operands[1]));
 
       if (<X87MODEF:MODE>mode == SFmode)
-	insn = gen_truncxfsf2 (operands[0], reg);
+	insn = gen_truncxfsf2;
       else if (<X87MODEF:MODE>mode == DFmode)
-	insn = gen_truncxfdf2 (operands[0], reg);
+	insn = gen_truncxfdf2;
       else
 	gcc_unreachable ();
 
-      emit_insn (insn);
+      emit_insn (insn (operands[0], reg));
       DONE;
     }
 })
@@ -18216,10 +18216,13 @@ 
 	(unspec_volatile [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
   "TARGET_LWP"
 {
-  if (TARGET_64BIT)
-    emit_insn (gen_lwp_slwpcbdi (operands[0]));
-  else
-    emit_insn (gen_lwp_slwpcbsi (operands[0]));
+  rtx (*insn)(rtx);
+
+  insn = (TARGET_64BIT
+	  ? gen_lwp_slwpcbdi
+	  : gen_lwp_slwpcbsi);
+
+  emit_insn (insn (operands[0]));
   DONE;
 })
 
Index: sse.md
===================================================================
--- sse.md	(revision 171163)
+++ sse.md	(working copy)
@@ -4122,17 +4122,21 @@ 
    (match_operand:SI 2 "const_0_to_1_operand" "")]
   "TARGET_AVX"
 {
+  rtx (*insn)(rtx, rtx);
+
   switch (INTVAL (operands[2]))
     {
     case 0:
-      emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
+      insn = gen_vec_extract_lo_<mode>;
       break;
     case 1:
-      emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
+      insn = gen_vec_extract_hi_<mode>;
       break;
     default:
       gcc_unreachable ();
     }
+
+  emit_insn (insn (operands[0], operands[1]));
   DONE;
 })
 
@@ -11776,19 +11780,21 @@ 
    (match_operand:SI 3 "const_0_to_1_operand" "")]
   "TARGET_AVX"
 {
+  rtx (*insn)(rtx, rtx, rtx);
+
   switch (INTVAL (operands[3]))
     {
     case 0:
-      emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
-					operands[2]));
+      insn = gen_vec_set_lo_<mode>;
       break;
     case 1:
-      emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
-					operands[2]));
+      insn = gen_vec_set_hi_<mode>;
       break;
     default:
       gcc_unreachable ();
     }
+
+  emit_insn (insn (operands[0], operands[1], operands[2]));
   DONE;
 })