Patchwork [powerpc] Rework#2 VSX scalar floating point support, patch #3

login
register
mail settings
Submitter Michael Meissner
Date Sept. 24, 2013, 8:33 p.m.
Message ID <20130924203310.GA25337@ibm-tiger.the-meissners.org>
Download mbox | patch
Permalink /patch/277609/
State New
Headers show

Comments

Michael Meissner - Sept. 24, 2013, 8:33 p.m.
This patch adds the initial support for putting DI, DF, and SF values in the
upper registers (traditional Altivec registers) using the -mupper-regs-df and
-mupper-regs-sf patches.  Those switches will not be enabled by default until
the rest of the changes are made.  This patch passes the bootstrap test and
make check test.  I tested all of the targets I tested previously (power4-8,
G4/G5, SPE, cell, e5500/e5600, and paired floating point), and all machines
generate the same code.  Is it ok to install this patch?

[gcc]
2013-09-24  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Allow
	DFmode, DImode, and SFmode in the upper VSX registers based on the
	-mupper-regs-{df,sf} flags.  Fix wu constraint to be ALTIVEC_REGS
	if -mpower8-vector.  Combine -mvsx-timode handling with the rest
	of the VSX register handling.

	* config/rs6000/rs6000.md (f32_lv): Use %x0 for VSX regsters.
	(f32_sv): Likewise.
	(zero_extendsidi2_lfiwzx): Add support for loading into the
	Altivec registers with -mpower8-vector.  Use wu/wv constraints to
	only do VSX memory options on Altivec registers.
	(extendsidi2_lfiwax): Likewise.
	(extendsfdf2_fpr): Likewise.
	(mov<mode>_hardfloat, SF/SD modes): Likewise.
	(mov<mode>_hardfloat32, DF/DD modes): Likewise.
	(mov<mode>_hardfloat64, DF/DD modes): Likewise.
	(movdi_internal64): Likewise.

[gcc/testsuite]
2013-09-24  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/p8vector-ldst.c: New test for -mupper-regs-sf
	and -mupper-regs-df.
David Edelsohn - Sept. 26, 2013, 3:09 a.m.
On Tue, Sep 24, 2013 at 4:33 PM, Michael Meissner
<meissner@linux.vnet.ibm.com> wrote:
> This patch adds the initial support for putting DI, DF, and SF values in the
> upper registers (traditional Altivec registers) using the -mupper-regs-df and
> -mupper-regs-sf patches.  Those switches will not be enabled by default until
> the rest of the changes are made.  This patch passes the bootstrap test and
> make check test.  I tested all of the targets I tested previously (power4-8,
> G4/G5, SPE, cell, e5500/e5600, and paired floating point), and all machines
> generate the same code.  Is it ok to install this patch?
>
> [gcc]
> 2013-09-24  Michael Meissner  <meissner@linux.vnet.ibm.com>
>
>         * config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Allow
>         DFmode, DImode, and SFmode in the upper VSX registers based on the
>         -mupper-regs-{df,sf} flags.  Fix wu constraint to be ALTIVEC_REGS
>         if -mpower8-vector.  Combine -mvsx-timode handling with the rest
>         of the VSX register handling.
>
>         * config/rs6000/rs6000.md (f32_lv): Use %x0 for VSX regsters.
>         (f32_sv): Likewise.
>         (zero_extendsidi2_lfiwzx): Add support for loading into the
>         Altivec registers with -mpower8-vector.  Use wu/wv constraints to
>         only do VSX memory options on Altivec registers.
>         (extendsidi2_lfiwax): Likewise.
>         (extendsfdf2_fpr): Likewise.
>         (mov<mode>_hardfloat, SF/SD modes): Likewise.
>         (mov<mode>_hardfloat32, DF/DD modes): Likewise.
>         (mov<mode>_hardfloat64, DF/DD modes): Likewise.
>         (movdi_internal64): Likewise.
>
> [gcc/testsuite]
> 2013-09-24  Michael Meissner  <meissner@linux.vnet.ibm.com>
>
>         * gcc.target/powerpc/p8vector-ldst.c: New test for -mupper-regs-sf
>         and -mupper-regs-df.

Okay.

Thanks, David

Patch

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 202855)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -1626,19 +1626,28 @@  rs6000_hard_regno_mode_ok (int regno, en
 
   /* VSX registers that overlap the FPR registers are larger than for non-VSX
      implementations.  Don't allow an item to be split between a FP register
-     and an Altivec register.  */
-  if (VECTOR_MEM_VSX_P (mode))
+     and an Altivec register.  Allow TImode in all VSX registers if the user
+     asked for it.  */
+  if (TARGET_VSX && VSX_REGNO_P (regno)
+      && (VECTOR_MEM_VSX_P (mode)
+	  || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode)
+	  || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode))
+	  || (TARGET_VSX_TIMODE && mode == TImode)))
     {
       if (FP_REGNO_P (regno))
 	return FP_REGNO_P (last_regno);
 
       if (ALTIVEC_REGNO_P (regno))
-	return ALTIVEC_REGNO_P (last_regno);
-    }
+	{
+	  if (mode == SFmode && !TARGET_UPPER_REGS_SF)
+	    return 0;
 
-  /* Allow TImode in all VSX registers if the user asked for it.  */
-  if (mode == TImode && TARGET_VSX_TIMODE && VSX_REGNO_P (regno))
-    return 1;
+	  if ((mode == DFmode || mode == DImode) && !TARGET_UPPER_REGS_DF)
+	    return 0;
+
+	  return ALTIVEC_REGNO_P (last_regno);
+	}
+    }
 
   /* The GPRs can hold any mode, but values bigger than one register
      cannot go past R31.  */
@@ -2413,7 +2422,7 @@  rs6000_init_hard_regno_mode_ok (bool glo
 
   if (TARGET_P8_VECTOR)
     {
-      rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
       rs6000_constraints[RS6000_CONSTRAINT_wy]
 	= rs6000_constraints[RS6000_CONSTRAINT_ww]
 	= (TARGET_UPPER_REGS_SF) ? VSX_REGS : FLOAT_REGS;
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 202846)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -314,13 +314,13 @@  (define_mode_attr real_value_to_target [
 (define_mode_attr f32_lr [(SF "f")		 (SD "wz")])
 (define_mode_attr f32_lm [(SF "m")		 (SD "Z")])
 (define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
-(define_mode_attr f32_lv [(SF "lxsspx %0,%y1")	 (SD "lxsiwzx %0,%y1")])
+(define_mode_attr f32_lv [(SF "lxsspx %x0,%y1")	 (SD "lxsiwzx %x0,%y1")])
 
 ; Definitions for store from 32-bit fpr register
 (define_mode_attr f32_sr [(SF "f")		  (SD "wx")])
 (define_mode_attr f32_sm [(SF "m")		  (SD "Z")])
 (define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
-(define_mode_attr f32_sv [(SF "stxsspx %1,%y0")	  (SD "stxsiwzx %1,%y0")])
+(define_mode_attr f32_sv [(SF "stxsspx %x1,%y0")  (SD "stxsiwzx %x1,%y0")])
 
 ; Definitions for 32-bit fpr direct move
 (define_mode_attr f32_dm [(SF "wn") (SD "wm")])
@@ -541,7 +541,7 @@  (define_split
   "")
 
 (define_insn "*zero_extendsidi2_lfiwzx"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wm")
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wu")
 	(zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))]
   "TARGET_POWERPC64 && TARGET_LFIWZX"
   "@
@@ -711,7 +711,7 @@  (define_expand "extendsidi2"
   "")
 
 (define_insn "*extendsidi2_lfiwax"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wm")
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wu")
 	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
   "TARGET_POWERPC64 && TARGET_LFIWAX"
   "@
@@ -5066,13 +5066,16 @@  (define_expand "extendsfdf2"
   "")
 
 (define_insn_and_split "*extendsfdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d")
-	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m")))]
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,wy,?wy,wv")
+	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wz,Z")))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
   "@
    #
    fmr %0,%1
-   lfs%U1%X1 %0,%1"
+   lfs%U1%X1 %0,%1
+   #
+   xxlor %x0,%x1,%x1
+   lxsspx %x0,%y1"
   "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
   [(const_int 0)]
 {
@@ -5088,7 +5091,16 @@  (define_insn_and_split "*extendsfdf2_fpr
 	 (if_then_else
 	   (match_test "update_address_mem (operands[1], VOIDmode)")
 	   (const_string "fpload_u")
-	   (const_string "fpload")))])])
+	   (const_string "fpload")))
+       (const_string "fp")
+       (const_string "vecsimple")
+       (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(if_then_else
+	 (match_test "update_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_u")
+	 (const_string "fpload")))])])
 
 (define_expand "truncdfsf2"
   [(set (match_operand:SF 0 "gpc_reg_operand" "")
@@ -9290,8 +9302,8 @@  (define_split
 }")
 
 (define_insn "mov<mode>_hardfloat"
-  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wm,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
-	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wm,r,<f32_dm>,r,h,0,G,Fn"))]
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wu,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
+	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wu,r,<f32_dm>,r,h,0,G,Fn"))]
   "(gpc_reg_operand (operands[0], <MODE>mode)
    || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
@@ -9492,8 +9504,8 @@  (define_split
 ;; reloading.
 
 (define_insn "*mov<mode>_hardfloat32"
-  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,!r,!r,!r")
-	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,G,H,F"))]
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,!r,!r,!r")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,G,H,F"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -9502,11 +9514,8 @@  (define_insn "*mov<mode>_hardfloat32"
    lfd%U1%X1 %0,%1
    fmr %0,%1
    lxsd%U1x %x0,%y1
-   lxsd%U1x %x0,%y1
-   stxsd%U0x %x1,%y0
    stxsd%U0x %x1,%y0
    xxlor %x0,%x1,%x1
-   xxlor %x0,%x1,%x1
    xxlxor %x0,%x0,%x0
    #
    #
@@ -9535,27 +9544,18 @@  (define_insn "*mov<mode>_hardfloat32"
 	 (const_string "fpload_ux")
 	 (const_string "fpload"))
        (if_then_else
-	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
-	 (const_string "fpload_ux")
-	 (const_string "fpload"))
-       (if_then_else
-	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
-	 (const_string "fpstore_ux")
-	 (const_string "fpstore"))
-       (if_then_else
 	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
 	 (const_string "fpstore_ux")
 	 (const_string "fpstore"))
        (const_string "vecsimple")
        (const_string "vecsimple")
-       (const_string "vecsimple")
        (const_string "store")
        (const_string "load")
        (const_string "two")
        (const_string "fp")
        (const_string "fp")
        (const_string "*")])
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8,8,8,8,12,16")])
+   (set_attr "length" "4,4,4,4,4,4,4,8,8,8,8,12,16")])
 
 (define_insn "*mov<mode>_softfloat32"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
@@ -9572,8 +9572,8 @@  (define_insn "*mov<mode>_softfloat32"
 ; ld/std require word-aligned displacements -> 'Y' constraint.
 ; List Y->r and r->Y before r->r for reload.
 (define_insn "*mov<mode>_hardfloat64"
-  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm")
-	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))]
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -9582,11 +9582,8 @@  (define_insn "*mov<mode>_hardfloat64"
    lfd%U1%X1 %0,%1
    fmr %0,%1
    lxsd%U1x %x0,%y1
-   lxsd%U1x %x0,%y1
-   stxsd%U0x %x1,%y0
    stxsd%U0x %x1,%y0
    xxlor %x0,%x1,%x1
-   xxlor %x0,%x1,%x1
    xxlxor %x0,%x0,%x0
    std%U0%X0 %1,%0
    ld%U1%X1 %0,%1
@@ -9622,20 +9619,11 @@  (define_insn "*mov<mode>_hardfloat64"
 	 (const_string "fpload_ux")
 	 (const_string "fpload"))
        (if_then_else
-	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
-	 (const_string "fpload_ux")
-	 (const_string "fpload"))
-       (if_then_else
-	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
-	 (const_string "fpstore_ux")
-	 (const_string "fpstore"))
-       (if_then_else
 	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
 	 (const_string "fpstore_ux")
 	 (const_string "fpstore"))
        (const_string "vecsimple")
        (const_string "vecsimple")
-       (const_string "vecsimple")
        (if_then_else
 	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
 	 (const_string "store_ux")
@@ -9661,7 +9649,7 @@  (define_insn "*mov<mode>_hardfloat64"
        (const_string "mffgpr")
        (const_string "mftgpr")
        (const_string "mffgpr")])
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")])
 
 (define_insn "*mov<mode>_softfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
@@ -10324,8 +10312,8 @@  (define_split
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
 (define_insn "*movdi_internal64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm")
-	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r,*wm,r"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wv,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm")
+	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wv,Z,wa,*h,r,0,O,*wg,r,*wm,r"))]
   "TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
Index: gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c	(revision 0)
@@ -0,0 +1,42 @@ 
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-sf" } */
+
+float load_sf (float *p)
+{
+  float f = *p;
+  __asm__ ("# reg %x0" : "+v" (f));
+  return f;
+}
+
+double load_df (double *p)
+{
+  double d = *p;
+  __asm__ ("# reg %x0" : "+v" (d));
+  return d;
+}
+
+double load_dfsf (float *p)
+{
+  double d = (double) *p;
+  __asm__ ("# reg %x0" : "+v" (d));
+  return d;
+}
+
+void store_sf (float *p, float f)
+{
+  __asm__ ("# reg %x0" : "+v" (f));
+  *p = f;
+}
+
+void store_df (double *p, double d)
+{
+  __asm__ ("# reg %x0" : "+v" (d));
+  *p = d;
+}
+
+/* { dg-final { scan-assembler-times "lxsspx"  2 } } */
+/* { dg-final { scan-assembler-times "lxsdx"   1 } } */
+/* { dg-final { scan-assembler-times "stxsspx" 1 } } */
+/* { dg-final { scan-assembler-times "stxsdx"  1 } } */