diff mbox

[ARM] Loosen MODES_TIEABLE_P

Message ID g44o0lop18.fsf@richards-thinkpad.stglab.manchester.uk.ibm.com
State New
Headers show

Commit Message

Richard Sandiford Sept. 9, 2011, 12:53 p.m. UTC
ARM's MODES_TIEABLE_P only allows classes of the same mode to be tied:

    #define MODES_TIEABLE_P(MODE1, MODE2)  \
      (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))

But for NEON, we'd like structure modes to be tied to their vector
elements.  In particular, a vector subreg of a structure register
should be considered "cheap", rather than as something that is likely
to need an intermediate.

The current definition made sense before my patch to redefine
CLASS_CANNOT_CHANGE_MODE:

    http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01631.html

so I think this is really a missing piece from that patch.  I haven't
measured any direct benefit from this patch alone, because no pass
propogates the sources of subreg moves, even if the modes are tieable.
But with a patch to do that too, I see significant improvements for
several vectorised loops.

Tested on arm-linux-gnueabi.  OK to install?

Richard


gcc/
	* config/arm/arm-protos.h (arm_modes_tieable_p): Declare.
	* config/arm/arm.h (MODES_TIEABLE_P): Use it.
	* config/arm/arm.c (arm_modes_tieable_p): New function.  Allow
	NEON vector and structure modes to be tied.
diff mbox

Patch

Index: gcc/config/arm/arm-protos.h
===================================================================
--- gcc/config/arm/arm-protos.h	2011-09-08 10:31:01.455663735 +0100
+++ gcc/config/arm/arm-protos.h	2011-09-09 13:46:32.797939324 +0100
@@ -46,6 +46,7 @@  extern void arm_output_fn_unwind (FILE *
 extern bool arm_vector_mode_supported_p (enum machine_mode);
 extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
 extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
+extern bool arm_modes_tieable_p (enum machine_mode, enum machine_mode);
 extern int const_ok_for_arm (HOST_WIDE_INT);
 extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
 extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
Index: gcc/config/arm/arm.h
===================================================================
--- gcc/config/arm/arm.h	2011-09-08 10:31:01.455663735 +0100
+++ gcc/config/arm/arm.h	2011-09-09 13:46:32.865939164 +0100
@@ -962,12 +962,7 @@  #define HARD_REGNO_NREGS(REGNO, MODE)  	
 #define HARD_REGNO_MODE_OK(REGNO, MODE)					\
   arm_hard_regno_mode_ok ((REGNO), (MODE))
 
-/* Value is 1 if it is a good idea to tie two pseudo registers
-   when one has mode MODE1 and one has mode MODE2.
-   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
-   for any hard reg, then this must be 0 for correct output.  */
-#define MODES_TIEABLE_P(MODE1, MODE2)  \
-  (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
+#define MODES_TIEABLE_P(MODE1, MODE2) arm_modes_tieable_p (MODE1, MODE2)
 
 #define VALID_IWMMXT_REG_MODE(MODE) \
  (arm_vector_mode_supported_p (MODE) || (MODE) == DImode)
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	2011-09-08 10:31:01.455663735 +0100
+++ gcc/config/arm/arm.c	2011-09-09 13:46:32.842939218 +0100
@@ -18236,6 +18236,29 @@  arm_hard_regno_mode_ok (unsigned int reg
 	  && regno <= LAST_FPA_REGNUM);
 }
 
+/* Implement MODES_TIEABLE_P.  */
+
+bool
+arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+  if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
+    return true;
+
+  /* We specifically want to allow elements of "structure" modes to
+     be tieable to the structure.  This more general condition allows
+     other rarer situations too.  */
+  if (TARGET_NEON
+      && (VALID_NEON_DREG_MODE (mode1)
+	  || VALID_NEON_QREG_MODE (mode1)
+	  || VALID_NEON_STRUCT_MODE (mode1))
+      && (VALID_NEON_DREG_MODE (mode2)
+	  || VALID_NEON_QREG_MODE (mode2)
+	  || VALID_NEON_STRUCT_MODE (mode2)))
+    return true;
+
+  return false;
+}
+
 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
    not used in arm mode.  */