Patchwork [AARCH64] : Invent new regclass - FP low regs.

login
register
mail settings
Submitter Tejas Belagod
Date June 19, 2012, 2:03 p.m.
Message ID <4FE086A3.5010209@arm.com>
Download mbox | patch
Permalink /patch/165732/
State New
Headers show

Comments

Tejas Belagod - June 19, 2012, 2:03 p.m.
Hi,

The attached patch invents a new register class V0 - V15 that is needed for some
lane variants of AdvSIMD instructions that can only take V0 - V15 as their 
indexed register when working on half-word type.

Regression tests are happy. OK?

Thanks,
Tejas Belagod.
ARM.

Changelog:

2012-06-19  Tejas Belagod  <tejas.belagod@arm.com>

gcc/
	* config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh_lane<mode>,
	aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal,
	aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
	aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
	aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal,
	aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
	aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
	aarch64_sqdmull_lane<mode>_internal, aarch64_sqdmull_lane<mode>,
	aarch64_sqdmull_laneq<mode>, aarch64_sqdmull2_lane<mode>_internal,
	aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Change the
	constraint of the indexed operand to use <vwl> instead of w.
	* config/aarch64/aarch64.c (aarch64_hard_regno_nregs): Add case for
	FP_LO_REGS class.
	(aarch64_regno_regclass): Return FP_LO_REGS if register in V0 - V15.
	(aarch64_secondary_reload): Change condition to check for both FP reg
	classes.
	(aarch64_class_max_nregs): Add case for FP_LO_REGS.
	* config/aarch64/aarch64.h (reg_class): New register class FP_LO_REGS.
	(REG_CLASS_NAMES): Likewise.
	(REG_CLASS_CONTENTS): Likewise.
	(FP_LO_REGNUM_P): New.
	* config/aarch64/aarch64.md (V15_REGNUM): New.
	* config/aarch64/constraints.md (x): New register constraint.
	* config/aarch64/iterators.md (vwx): New.
Marcus Shawcroft - June 19, 2012, 3:09 p.m.
On 19/06/12 15:03, Tejas Belagod wrote:
>
> Hi,
>
> The attached patch invents a new register class V0 - V15 that is needed for some
> lane variants of AdvSIMD instructions that can only take V0 - V15 as their
> indexed register when working on half-word type.
>
> Regression tests are happy. OK?

OK
/Marcus

Patch

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9ceefee..43017df 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1897,7 +1897,7 @@ 
         (unspec:VSDQ_HSI
 	  [(match_operand:VSDQ_HSI 1 "register_operand" "w")
            (vec_select:<VEL>
-             (match_operand:<VCON> 2 "register_operand" "w")
+             (match_operand:<VCON> 2 "register_operand" "<vwx>")
              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
 	 VQDMULH))]
   "TARGET_SIMD"
@@ -1940,7 +1940,7 @@ 
 	      (sign_extend:<VWIDE>
 		(vec_duplicate:VD_HSI
 		  (vec_select:<VEL>
-		    (match_operand:<VCON> 3 "register_operand" "w")
+		    (match_operand:<VCON> 3 "register_operand" "<vwx>")
 		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
               ))
 	    (const_int 1))))]
@@ -1960,7 +1960,7 @@ 
 		(match_operand:SD_HSI 2 "register_operand" "w"))
 	      (sign_extend:<VWIDE>
 		(vec_select:<VEL>
-		  (match_operand:<VCON> 3 "register_operand" "w")
+		  (match_operand:<VCON> 3 "register_operand" "<vwx>")
 		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
               )
 	    (const_int 1))))]
@@ -1974,7 +1974,7 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -1989,7 +1989,7 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2004,7 +2004,7 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2019,7 +2019,7 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2114,7 +2114,7 @@ 
 		(sign_extend:<VWIDE>
                   (vec_duplicate:<VHALF>
 		    (vec_select:<VEL>
-		      (match_operand:<VCON> 3 "register_operand" "w")
+		      (match_operand:<VCON> 3 "register_operand" "<vwx>")
 		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
 		    ))))
 	      (const_int 1))))]
@@ -2128,7 +2128,7 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2144,7 +2144,7 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2160,7 +2160,7 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2176,7 +2176,7 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2264,7 +2264,7 @@ 
 	       (sign_extend:<VWIDE>
                  (vec_duplicate:VD_HSI
                    (vec_select:<VEL>
-		     (match_operand:<VCON> 2 "register_operand" "w")
+		     (match_operand:<VCON> 2 "register_operand" "<vwx>")
 		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 	       ))
 	     (const_int 1)))]
@@ -2282,7 +2282,7 @@ 
 		 (match_operand:SD_HSI 1 "register_operand" "w"))
 	       (sign_extend:<VWIDE>
                  (vec_select:<VEL>
-		   (match_operand:<VCON> 2 "register_operand" "w")
+		   (match_operand:<VCON> 2 "register_operand" "<vwx>")
 		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
 	       ))
 	     (const_int 1)))]
@@ -2295,7 +2295,7 @@ 
 (define_expand "aarch64_sqdmull_lane<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VSD_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2308,7 +2308,7 @@ 
 (define_expand "aarch64_sqdmull_laneq<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VD_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2386,7 +2386,7 @@ 
 	       (sign_extend:<VWIDE>
                  (vec_duplicate:<VHALF>
                    (vec_select:<VEL>
-		     (match_operand:<VCON> 2 "register_operand" "w")
+		     (match_operand:<VCON> 2 "register_operand" "<vwx>")
 		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 	       ))
 	     (const_int 1)))]
@@ -2399,7 +2399,7 @@ 
 (define_expand "aarch64_sqdmull2_lane<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2414,7 +2414,7 @@ 
 (define_expand "aarch64_sqdmull2_laneq<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3e4b48e..b877df3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -241,6 +241,7 @@  aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
   switch (aarch64_regno_regclass (regno))
     {
     case FP_REGS:
+    case FP_LO_REGS:
       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
     default:
       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
@@ -3457,7 +3458,7 @@  aarch64_regno_regclass (unsigned regno)
     return CORE_REGS;
 
   if (FP_REGNUM_P (regno))
-    return FP_REGS;
+    return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
 
   return NO_REGS;
 }
@@ -3590,10 +3591,9 @@  aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
 
   /* Without the TARGET_SIMD instructions we cannot move a Q register
      to a Q register directly.  We need a scratch.  */
-  if (rclass == FP_REGS && REG_P (x)
-      && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
-      && FP_REGNUM_P (REGNO (x))
-      && !TARGET_SIMD)
+  if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
+      && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
+      && reg_class_subset_p (rclass, FP_REGS))
     {
       if (mode == TFmode)
         sri->icode = CODE_FOR_aarch64_reload_movtf;
@@ -3609,7 +3609,8 @@  aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
     return FP_REGS;
 
-  if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
+  if ((mode == TImode || mode == TFmode) && CONSTANT_P(x)
+      && reg_class_subset_p (rclass, FP_REGS))
       return CORE_REGS;
 
   return NO_REGS;
@@ -3748,6 +3749,7 @@  aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
     case GENERAL_REGS:
     case ALL_REGS:
     case FP_REGS:
+    case FP_LO_REGS:
       return (GET_MODE_SIZE (mode) + 7) / 8;
 
     case STACK_REG:
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 2faded7..56e2df5 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -361,6 +361,10 @@  extern unsigned long aarch64_tune_flags;
 
 #define FP_REGNUM_P(REGNO)			\
   (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM))
+
+#define FP_LO_REGNUM_P(REGNO)            \
+  (((unsigned) (REGNO - V0_REGNUM)) <= (V15_REGNUM - V0_REGNUM))
+
 
 /* Register and constant classes.  */
 
@@ -371,6 +375,7 @@  enum reg_class
   GENERAL_REGS,
   STACK_REG,
   POINTER_REGS,
+  FP_LO_REGS,
   FP_REGS,
   ALL_REGS,
   LIM_REG_CLASSES		/* Last */
@@ -385,6 +390,7 @@  enum reg_class
   "GENERAL_REGS",				\
   "STACK_REG",					\
   "POINTER_REGS",				\
+  "FP_LO_REGS",					\
   "FP_REGS",					\
   "ALL_REGS"					\
 }
@@ -396,6 +402,7 @@  enum reg_class
   { 0x7fffffff, 0x00000000, 0x00000003 },	/* GENERAL_REGS */	\
   { 0x80000000, 0x00000000, 0x00000000 },	/* STACK_REG */		\
   { 0xffffffff, 0x00000000, 0x00000003 },	/* POINTER_REGS */	\
+  { 0x00000000, 0x0000ffff, 0x00000000 },       /* FP_LO_REGS  */	\
   { 0x00000000, 0xffffffff, 0x00000000 },       /* FP_REGS  */		\
   { 0xffffffff, 0xffffffff, 0x00000007 }	/* ALL_REGS */		\
 }
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a666ed9..7b2a899 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -57,6 +57,7 @@ 
     (LR_REGNUM		30)
     (SP_REGNUM		31)
     (V0_REGNUM		32)
+    (V15_REGNUM		47)
     (V31_REGNUM		63)
     (SFP_REGNUM		64)
     (AP_REGNUM		65)
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 91eba09..da50a47 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -24,6 +24,9 @@ 
 (define_register_constraint "w" "FP_REGS"
   "Floating point and SIMD vector registers.")
 
+(define_register_constraint "x" "FP_LO_REGS"
+  "Floating point and SIMD vector registers V0 - V15.")
+
 (define_constraint "I"
  "A constant that can be used with an ADD operation."
  (and (match_code "const_int")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 6dc3b2f..fc7fc50 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -326,6 +326,10 @@ 
 				(V2SF "V2SI") (V4SF  "V4SI")
 				(DI   "DI")   (V2DI  "V2DI")])
 
+;; Vm for lane instructions is restricted to FP_LO_REGS.
+(define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
+		       (V2SI "w") (V4SI "w") (SI "w")])
+
 ;; -------------------------------------------------------------------
 ;; Code Iterators
 ;; -------------------------------------------------------------------