Patchwork RFC: Add TARGET_EXPAND_COMPOUND_OPERATION

login
register
mail settings
Submitter H.J. Lu
Date June 25, 2010, 7:50 p.m.
Message ID <20100625195028.GA27734@intel.com>
Download mbox | patch
Permalink /patch/56979/
State New
Headers show

Comments

H.J. Lu - June 25, 2010, 7:50 p.m.
Hi,

x86 backend has special optimization for accessing

(zero_extract:SI (reg:M N) (const_int 8) (const_int 8))

However, combiner never exposes this to x86 backend.  I added
a TARGET_EXPAND_COMPOUND_OPERATION hook to allow x86 backend to
optimize it.  For

---
typedef struct
{
  unsigned char c1;
  unsigned char c2;
  unsigned char c3;
  unsigned char c4;
} foo_t;

int
foo (foo_t x)
{
   return x.c2 > 4;
}
---

it generates:

	movl	%edi, %eax
	cmpb	$4, %ah
	seta	%al
	movzbl	%al, %eax
	ret

instead of

	movl	%edi, %eax
	movzbl	%ah, %edi
	xorl	%eax, %eax
	cmpb	$4, %dil
	seta	%al
	ret

Any comments?

Thanks.


H.J.
---

Patch

diff --git a/gcc/combine.c b/gcc/combine.c
index 1bee2c7..34f4f76 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -396,7 +396,6 @@  static rtx combine_simplify_rtx (rtx, enum machine_mode, int);
 static rtx simplify_if_then_else (rtx);
 static rtx simplify_set (rtx);
 static rtx simplify_logical (rtx);
-static rtx expand_compound_operation (rtx);
 static const_rtx expand_field_assignment (const_rtx);
 static rtx make_extraction (enum machine_mode, rtx, HOST_WIDE_INT,
 			    rtx, unsigned HOST_WIDE_INT, int, int, int);
@@ -5085,7 +5084,7 @@  combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
       break;
 
     case NEG:
-      temp = expand_compound_operation (XEXP (x, 0));
+      temp = targetm.expand_compound_operation (XEXP (x, 0));
 
       /* For C equal to the width of MODE minus 1, (neg (ashiftrt X C)) can be
 	 replaced by (lshiftrt X C).  This will convert
@@ -5322,7 +5321,7 @@  combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
 	      && mode == GET_MODE (op0)
 	      && nonzero_bits (op0, mode) == 1)
 	    return gen_lowpart (mode,
-				expand_compound_operation (op0));
+				targetm.expand_compound_operation (op0));
 
 	  else if (STORE_FLAG_VALUE == 1
 		   && new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
@@ -5331,7 +5330,7 @@  combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
 		   && (num_sign_bit_copies (op0, mode)
 		       == GET_MODE_BITSIZE (mode)))
 	    {
-	      op0 = expand_compound_operation (op0);
+	      op0 = targetm.expand_compound_operation (op0);
 	      return simplify_gen_unary (NEG, mode,
 					 gen_lowpart (mode, op0),
 					 mode);
@@ -5343,7 +5342,7 @@  combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
 		   && mode == GET_MODE (op0)
 		   && nonzero_bits (op0, mode) == 1)
 	    {
-	      op0 = expand_compound_operation (op0);
+	      op0 = targetm.expand_compound_operation (op0);
 	      return simplify_gen_binary (XOR, mode,
 					  gen_lowpart (mode, op0),
 					  const1_rtx);
@@ -5356,7 +5355,7 @@  combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
 		   && (num_sign_bit_copies (op0, mode)
 		       == GET_MODE_BITSIZE (mode)))
 	    {
-	      op0 = expand_compound_operation (op0);
+	      op0 = targetm.expand_compound_operation (op0);
 	      return plus_constant (gen_lowpart (mode, op0), 1);
 	    }
 
@@ -5368,7 +5367,7 @@  combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
 	      && (num_sign_bit_copies (op0, mode)
 		  == GET_MODE_BITSIZE (mode)))
 	    return gen_lowpart (mode,
-				expand_compound_operation (op0));
+				targetm.expand_compound_operation (op0));
 
 	  else if (STORE_FLAG_VALUE == -1
 		   && new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
@@ -5376,7 +5375,7 @@  combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
 		   && mode == GET_MODE (op0)
 		   && nonzero_bits (op0, mode) == 1)
 	    {
-	      op0 = expand_compound_operation (op0);
+	      op0 = targetm.expand_compound_operation (op0);
 	      return simplify_gen_unary (NEG, mode,
 					 gen_lowpart (mode, op0),
 					 mode);
@@ -5389,7 +5388,7 @@  combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
 		   && (num_sign_bit_copies (op0, mode)
 		       == GET_MODE_BITSIZE (mode)))
 	    {
-	      op0 = expand_compound_operation (op0);
+	      op0 = targetm.expand_compound_operation (op0);
 	      return simplify_gen_unary (NOT, mode,
 					 gen_lowpart (mode, op0),
 					 mode);
@@ -5402,7 +5401,7 @@  combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
 		   && mode == GET_MODE (op0)
 		   && nonzero_bits (op0, mode) == 1)
 	    {
-	      op0 = expand_compound_operation (op0);
+	      op0 = targetm.expand_compound_operation (op0);
 	      return plus_constant (gen_lowpart (mode, op0), -1);
 	    }
 
@@ -5420,7 +5419,7 @@  combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
 	      && (i = exact_log2 (nonzero_bits (op0, mode))) >= 0)
 	    {
 	      x = simplify_shift_const (NULL_RTX, ASHIFT, mode,
-					expand_compound_operation (op0),
+					targetm.expand_compound_operation (op0),
 					GET_MODE_BITSIZE (mode) - 1 - i);
 	      if (GET_CODE (x) == AND && XEXP (x, 1) == const_true_rtx)
 		return XEXP (x, 0);
@@ -5450,7 +5449,7 @@  combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
       if (in_dest)
 	return x;
 
-      return expand_compound_operation (x);
+      return targetm.expand_compound_operation (x);
 
     case SET:
       return simplify_set (x);
@@ -6248,7 +6247,7 @@  simplify_logical (rtx x)
    consisting of shifts and ANDs into the equivalent compound expression.
    It is the inverse of this function, loosely speaking.  */
 
-static rtx
+rtx
 expand_compound_operation (rtx x)
 {
   unsigned HOST_WIDE_INT pos = 0, len;
@@ -6347,7 +6346,7 @@  expand_compound_operation (rtx x)
 	       == 0)))
     {
       rtx temp = gen_rtx_ZERO_EXTEND (GET_MODE (x), XEXP (x, 0));
-      rtx temp2 = expand_compound_operation (temp);
+      rtx temp2 = targetm.expand_compound_operation (temp);
 
       /* Make sure this is a profitable operation.  */
       if (rtx_cost (x, SET, optimize_this_for_speed_p)
@@ -7609,7 +7608,7 @@  force_to_mode (rtx x, enum machine_mode mode, unsigned HOST_WIDE_INT mask,
     case ZERO_EXTEND:
     case ZERO_EXTRACT:
     case SIGN_EXTRACT:
-      x = expand_compound_operation (x);
+      x = targetm.expand_compound_operation (x);
       if (GET_CODE (x) != code)
 	return force_to_mode (x, mode, mask, next_select);
       break;
@@ -8564,8 +8563,8 @@  make_field_assignment (rtx x)
   if (GET_CODE (src) != IOR && GET_CODE (src) != XOR)
     return x;
 
-  rhs = expand_compound_operation (XEXP (src, 0));
-  lhs = expand_compound_operation (XEXP (src, 1));
+  rhs = targetm.expand_compound_operation (XEXP (src, 0));
+  lhs = targetm.expand_compound_operation (XEXP (src, 1));
 
   if (GET_CODE (rhs) == AND
       && CONST_INT_P (XEXP (rhs, 1))
@@ -8650,8 +8649,8 @@  apply_distributive_law (rtx x)
   if (OBJECT_P (lhs) || OBJECT_P (rhs))
     return x;
 
-  lhs = expand_compound_operation (lhs);
-  rhs = expand_compound_operation (rhs);
+  lhs = targetm.expand_compound_operation (lhs);
+  rhs = targetm.expand_compound_operation (rhs);
   inner_code = GET_CODE (lhs);
   if (inner_code != GET_CODE (rhs))
     return x;
@@ -9434,7 +9433,7 @@  simplify_shift_const_1 (enum rtx_code code, enum machine_mode result_mode,
 	case ZERO_EXTEND:
 	case SIGN_EXTRACT:
 	case ZERO_EXTRACT:
-	  new_rtx = expand_compound_operation (varop);
+	  new_rtx = targetm.expand_compound_operation (varop);
 	  if (new_rtx != varop)
 	    {
 	      varop = new_rtx;
@@ -10716,7 +10715,7 @@  simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
 	  /* ... fall through ...  */
 
 	case SIGN_EXTRACT:
-	  tem = expand_compound_operation (op0);
+	  tem = targetm.expand_compound_operation (op0);
 	  if (tem != op0)
 	    {
 	      op0 = tem;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8957fe2..181fa06 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -30673,6 +30673,33 @@  ix86_enum_va_list (int idx, const char **pname, tree *ptree)
   return 0;
 }
 
+
+/* Optimize conversion of ZERO_EXTRACT, SIGN_EXTRACT, ZERO_EXTEND and
+   SIGN_EXTEND for combiner.  */
+
+static rtx
+ix86_expand_compound_operation (rtx x)
+{
+  /* Don't convert:
+
+     (zero_extract:SI (reg:M N) (const_int 8) (const_int 8))
+
+    since we have special patterns to access upper 8bit registers.  */
+
+  if (GET_CODE (x) == ZERO_EXTRACT
+      && GET_MODE (x) == SImode
+      && GET_CODE (XEXP (x, 0)) != CLOBBER
+      && GET_MODE (XEXP (x, 0)) != VOIDmode
+      && SCALAR_INT_MODE_P (GET_MODE (XEXP (x, 0)))
+      && CONST_INT_P (XEXP (x, 1))
+      && CONST_INT_P (XEXP (x, 2))
+      && INTVAL (XEXP (x, 1)) == 8
+      && INTVAL (XEXP (x, 2)) == 8)
+    return x;
+
+  return expand_compound_operation (x);
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_RETURN_IN_MEMORY
 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
@@ -30943,6 +30970,9 @@  ix86_enum_va_list (int idx, const char **pname, tree *ptree)
 #undef TARGET_ASM_CODE_END
 #define TARGET_ASM_CODE_END ix86_code_end
 
+#undef TARGET_EXPAND_COMPOUND_OPERATION
+#define TARGET_EXPAND_COMPOUND_OPERATION ix86_expand_compound_operation
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-i386.h"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 87329e0..f92b68a 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -1474,6 +1474,11 @@  This hook allows the backend to perform additional instantiations on rtl
 that are not actually in any insns yet, but will be later.
 @end deftypefn
 
+@deftypefn {Target Hook} rtx TARGET_EXPAND_COMPOUND_OPERATION (rtx)
+This hook allows the backend to optimize conversion of ZERO_EXTRACT,
+SIGN_EXTRACT, ZERO_EXTEND and SIGN_EXTEND for combiner.
+@end deftypefn
+
 @deftypefn {Target Hook} {const char *} TARGET_MANGLE_TYPE (const_tree @var{type})
 If your target defines any fundamental types, or any types your target
 uses should be mangled differently from the default, define this hook
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 6be88d1..053e3de 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -2233,6 +2233,7 @@  extern bool validate_subreg (enum machine_mode, enum machine_mode,
 			     const_rtx, unsigned int);
 
 /* In combine.c  */
+extern rtx expand_compound_operation (rtx);
 extern unsigned int extended_count (const_rtx, enum machine_mode, int);
 extern rtx remove_death (unsigned int, rtx);
 extern void dump_combine_stats (FILE *);
diff --git a/gcc/target-def.h b/gcc/target-def.h
index 1aaf38c..0c0b4fc 100644
--- a/gcc/target-def.h
+++ b/gcc/target-def.h
@@ -488,6 +488,9 @@ 
 /* In tree-ssa-math-opts.c  */
 #define TARGET_BUILTIN_RECIPROCAL default_builtin_reciprocal
 
+/* In combine.c.  */
+#define TARGET_EXPAND_COMPOUND_OPERATION expand_compound_operation
+
 /* In varasm.c.  */
 #ifndef TARGET_SECTION_TYPE_FLAGS
 #define TARGET_SECTION_TYPE_FLAGS default_section_type_flags
@@ -1070,6 +1073,7 @@ 
   TARGET_SECONDARY_RELOAD,			\
   TARGET_EXPAND_TO_RTL_HOOK,			\
   TARGET_INSTANTIATE_DECLS,			\
+  TARGET_EXPAND_COMPOUND_OPERATION,		\
   TARGET_HARD_REGNO_SCRATCH_OK,			\
   TARGET_CASE_VALUES_THRESHOLD,			\
   TARGET_FRAME_POINTER_REQUIRED,		\
diff --git a/gcc/target.h b/gcc/target.h
index 2f181eb..0ff890a 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -1108,6 +1108,10 @@  struct gcc_target
      but will be later.  */
   void (* instantiate_decls) (void);
 
+  /* Used by combiner to convert ZERO_EXTRACT, SIGN_EXTRACT, ZERO_EXTEND
+     and SIGN_EXTEND into basic operations.  */
+  rtx (* expand_compound_operation) (rtx);
+
   /* Return true if is OK to use a hard register REGNO as scratch register
      in peephole2.  */
   bool (* hard_regno_scratch_ok) (unsigned int regno);