diff mbox

, Add power9 support to GCC, patch #10 (SFmode/DFmode d-form addressing)

Message ID 20151111002642.GA5334@ibm-tiger.the-meissners.org
State New
Headers show

Commit Message

Michael Meissner Nov. 11, 2015, 12:26 a.m. UTC
Arghh, forgot the the patch once again.

[gcc]
2015-11-10  Michael Meissner  <meissner@linux.vnet.ibm.com>

	<patch #10>
	* config/rs6000/constraints.md (wb constraint): New constraint for
	ISA 3.0 d-form scalar addressing.

	* config/rs6000/rs6000.c (mode_supports_vmx_dform): Add support
	for ISA 3.0 D-form addressing to load SFmode/DFmode scalars into
	Altivec registers.  Add wb constraint for Altivec registers with
	D-form addressing.  If we have ISA 3.0 d-form support, undo
	secondary reload support for using FPR registers if we want to do
	D-form addressing.
	(rs6000_debug_reg_global): Likewise.
	(rs6000_setup_reg_addr_masks): Likewise.
	(rs6000_init_hard_regno_mode_ok): Likewise.
	(rs6000_secondary_reload): Likewise.
	(rs6000_preferred_reload_class): Likewise.
	(rs6000_secondary_reload_class): Likewise.

	* config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add wb
	constraint.

	* config/rs6000/rs6000.md (f32_lr2 mode attribute): Add support
	for ISA 3.0 SFmode/DFmode d-form addressing to Altivec registers.
	(f32_lm2): Likewise.
	(f32_li2): Likewise.
	(f32_sr2): Likewise.
	(f32_sm2): Likewise.
	(f32_si2): Likewise.
	(f64_p9): Likewise.
	(extendsfdf2_fpr): Likewise.
	(mov<mode>_hardfloat): Likewise.
	(mov<mode>_hardfloat32): Likewise.
	(mov<mode>_hardfloat64): Likewise.

	* doc/md.texi (RS/6000 constraints): Document wb constraint.
	Fixup we constraint documentation.

[gcc/testsuite]
2015-11-10  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/dform-1.c: New test.
	* gcc.target/powerpc/dform-2.c: Likewise.
diff mbox

Patch

Index: gcc/config/rs6000/constraints.md
===================================================================
--- gcc/config/rs6000/constraints.md	(revision 230078)
+++ gcc/config/rs6000/constraints.md	(working copy)
@@ -56,7 +56,8 @@  (define_register_constraint "z" "CA_REGS
 (define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
   "Any VSX register if the -mvsx option was used or NO_REGS.")
 
-;; wb is not currently used
+(define_register_constraint "wb" "rs6000_constraints[RS6000_CONSTRAINT_wb]"
+  "Altivec register if the -mpower9-dform option was used or NO_REGS.")
 
 ;; NOTE: For compatibility, "wc" is reserved to represent individual CR bits.
 ;; It is currently used for that purpose in LLVM.
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 230078)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -408,6 +408,13 @@  mode_supports_pre_modify_p (machine_mode
 	  != 0);
 }
 
+/* Return true if we have D-form addressing in altivec registers.  */
+static inline bool
+mode_supports_vmx_dform (machine_mode mode)
+{
+  return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
+}
+
 
 /* Target cpu costs.  */
 
@@ -2258,7 +2265,9 @@  rs6000_debug_reg_global (void)
 	   "f  reg_class = %s\n"
 	   "v  reg_class = %s\n"
 	   "wa reg_class = %s\n"
+	   "wb reg_class = %s\n"
 	   "wd reg_class = %s\n"
+	   "we reg_class = %s\n"
 	   "wf reg_class = %s\n"
 	   "wg reg_class = %s\n"
 	   "wh reg_class = %s\n"
@@ -2283,7 +2292,9 @@  rs6000_debug_reg_global (void)
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
@@ -2664,9 +2675,15 @@  rs6000_setup_reg_addr_masks (void)
 	    }
 
 	  /* GPR and FPR registers can do REG+OFFSET addressing, except
-	     possibly for SDmode.  */
+	     possibly for SDmode.  ISA 3.0 (i.e. power9) adds D-form
+	     addressing for scalars to altivec registers.  */
 	  if ((addr_mask != 0) && !indexed_only_p
-	      && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
+	      && msize <= 8
+	      && (rc == RELOAD_REG_GPR
+		  || rc == RELOAD_REG_FPR
+		  || (rc == RELOAD_REG_VMX
+		      && TARGET_P9_DFORM
+		      && (m2 == DFmode || m2 == SFmode))))
 	    addr_mask |= RELOAD_REG_OFFSET;
 
 	  /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
@@ -2990,6 +3007,10 @@  rs6000_init_hard_regno_mode_ok (bool glo
 	rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS;	/* TFmode  */
     }
 
+  /* Support for new D-form instructions.  */
+  if (TARGET_P9_DFORM)
+    rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
+
   /* Support for new direct moves.  */
   if (TARGET_DIRECT_MOVE_128)
     rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
@@ -18324,8 +18345,10 @@  rs6000_secondary_reload (bool in_p,
 
   /* If this is a scalar floating point value and we want to load it into the
      traditional Altivec registers, do it via a move via a traditional floating
-     point register.  Also make sure that non-zero constants use a FPR.  */
+     point register, unless we have D-form addressing.  Also make sure that
+     non-zero constants use a FPR.  */
   if (!done_p && reg_addr[mode].scalar_in_vmx_p
+      && !mode_supports_vmx_dform (mode)
       && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
       && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
     {
@@ -18889,10 +18912,14 @@  rs6000_preferred_reload_class (rtx x, en
 	  return NO_REGS;
 	}
 
-      /* If this is a scalar floating point value, prefer the traditional
-	 floating point registers so that we can use D-form (register+offset)
-	 addressing.  */
-      if (GET_MODE_SIZE (mode) < 16)
+      /* D-form addressing can easily reload the value.  */
+      if (mode_supports_vmx_dform (mode))
+	return rclass;
+
+      /* If this is a scalar floating point value and we don't have D-form
+	 addressing, prefer the traditional floating point registers so that we
+	 can use D-form (register+offset) addressing.  */
+      if (GET_MODE_SIZE (mode) < 16 && rclass == VSX_REGS)
 	return FLOAT_REGS;
 
       /* Prefer the Altivec registers if Altivec is handling the vector
@@ -19041,6 +19068,7 @@  rs6000_secondary_reload_class (enum reg_
      instead of reloading the secondary memory address for Altivec moves.  */
   if (TARGET_VSX
       && GET_MODE_SIZE (mode) < 16
+      && !mode_supports_vmx_dform (mode)
       && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
            && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
           || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h	(revision 230079)
+++ gcc/config/rs6000/rs6000.h	(working copy)
@@ -1523,6 +1523,7 @@  enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_f,		/* fpr registers for single values */
   RS6000_CONSTRAINT_v,		/* Altivec registers */
   RS6000_CONSTRAINT_wa,		/* Any VSX register */
+  RS6000_CONSTRAINT_wb,		/* Altivec register if ISA 3.0 vector. */
   RS6000_CONSTRAINT_wd,		/* VSX register for V2DF */
   RS6000_CONSTRAINT_we,		/* VSX register if ISA 3.0 vector. */
   RS6000_CONSTRAINT_wf,		/* VSX register for V4SF */
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 230079)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -429,16 +429,22 @@  (define_mode_attr real_value_to_target [
 					(DD "REAL_VALUE_TO_TARGET_DECIMAL64")])
 
 ; Definitions for load to 32-bit fpr register
-(define_mode_attr f32_lr [(SF "f")		 (SD "wz")])
-(define_mode_attr f32_lm [(SF "m")		 (SD "Z")])
-(define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
-(define_mode_attr f32_lv [(SF "lxsspx %x0,%y1")	 (SD "lxsiwzx %x0,%y1")])
+(define_mode_attr f32_lr  [(SF "f")		  (SD "wz")])
+(define_mode_attr f32_lr2 [(SF "wb")		  (SD "wn")])
+(define_mode_attr f32_lm  [(SF "m")		  (SD "Z")])
+(define_mode_attr f32_lm2 [(SF "o")		  (SD "wn")])
+(define_mode_attr f32_li  [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
+(define_mode_attr f32_li2 [(SF "lxssp %0,%1")     (SD "lfiwzx %0,%y1")])
+(define_mode_attr f32_lv  [(SF "lxsspx %x0,%y1")  (SD "lxsiwzx %x0,%y1")])
 
 ; Definitions for store from 32-bit fpr register
-(define_mode_attr f32_sr [(SF "f")		  (SD "wx")])
-(define_mode_attr f32_sm [(SF "m")		  (SD "Z")])
-(define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
-(define_mode_attr f32_sv [(SF "stxsspx %x1,%y0")  (SD "stxsiwzx %x1,%y0")])
+(define_mode_attr f32_sr  [(SF "f")		   (SD "wx")])
+(define_mode_attr f32_sr2 [(SF "wb")		   (SD "wn")])
+(define_mode_attr f32_sm  [(SF "m")		   (SD "Z")])
+(define_mode_attr f32_sm2 [(SF "o")		   (SD "wn")])
+(define_mode_attr f32_si  [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
+(define_mode_attr f32_si2 [(SF "stxssp %1,%0")     (SD "stfiwx %1,%y0")])
+(define_mode_attr f32_sv  [(SF "stxsspx %x1,%y0")  (SD "stxsiwzx %x1,%y0")])
 
 ; Definitions for 32-bit fpr direct move
 ; At present, the decimal modes are not allowed in the traditional altivec
@@ -460,6 +466,9 @@  (define_mode_attr f64_dm  [(DF "wk") (DD
 ; Definitions for 64-bit use of altivec registers
 (define_mode_attr f64_av  [(DF "wv") (DD "wn")])
 
+; Definitions for 64-bit access to ISA 3.0 (power9) vector
+(define_mode_attr f64_p9  [(DF "wb") (DD "wn")])
+
 ; These modes do not fit in integer registers in 32-bit mode.
 ; but on e500v2, the gpr are 64 bit registers
 (define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
@@ -4455,8 +4464,8 @@  (define_expand "extendsfdf2"
   "")
 
 (define_insn_and_split "*extendsfdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wu")
-	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wy,Z")))]
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wu,wb")
+	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wy,Z,o")))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
   "@
    #
@@ -4464,14 +4473,15 @@  (define_insn_and_split "*extendsfdf2_fpr
    lfs%U1%X1 %0,%1
    #
    xscpsgndp %x0,%x1,%x1
-   lxsspx %x0,%y1"
+   lxsspx %x0,%y1
+   lxssp %0,%1"
   "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
   [(const_int 0)]
 {
   emit_note (NOTE_INSN_DELETED);
   DONE;
 }
-  [(set_attr "type" "fp,fp,fpload,fp,fp,fpload")])
+  [(set_attr "type" "fp,fp,fpload,fp,fp,fpload,fpload")])
 
 (define_expand "truncdfsf2"
   [(set (match_operand:SF 0 "gpc_reg_operand" "")
@@ -6435,8 +6445,8 @@  (define_split
 }")
 
 (define_insn "mov<mode>_hardfloat"
-  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,<f32_vsx>,<f32_vsx>,!r,<f32_lr>,<f32_sm>,<f32_av>,Z,?<f32_dm>,?r,*c*l,!r,*h")
-	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,<f32_vsx>,j,j,<f32_lm>,<f32_sr>,Z,<f32_av>,r,<f32_dm>,r,h,0"))]
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,<f32_vsx>,<f32_vsx>,!r,<f32_lr>,<f32_lr2>,<f32_sm>,<f32_sm2>,<f32_av>,Z,?<f32_dm>,?r,*c*l,!r,*h")
+	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,<f32_vsx>,j,j,<f32_lm>,<f32_lm2>,<f32_sr>,<f32_sr2>,Z,<f32_av>,r,<f32_dm>,r,h,0"))]
   "(gpc_reg_operand (operands[0], <MODE>mode)
    || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
@@ -6449,7 +6459,9 @@  (define_insn "mov<mode>_hardfloat"
    xxlxor %x0,%x0,%x0
    li %0,0
    <f32_li>
+   <f32_li2>
    <f32_si>
+   <f32_si2>
    <f32_lv>
    <f32_sv>
    mtvsrwz %x0,%1
@@ -6457,7 +6469,7 @@  (define_insn "mov<mode>_hardfloat"
    mt%0 %1
    mf%1 %0
    nop"
-  [(set_attr "type" "*,load,store,fp,fp,vecsimple,integer,fpload,fpstore,fpload,fpstore,mftgpr,mffgpr,mtjmpr,mfjmpr,*")
+  [(set_attr "type" "*,load,store,fp,fp,vecsimple,integer,fpload,fpload,fpstore,fpstore,fpload,fpstore,mftgpr,mffgpr,mtjmpr,mfjmpr,*")
    (set_attr "length" "4")])
 
 (define_insn "*mov<mode>_softfloat"
@@ -6566,14 +6578,15 @@  (define_split
 ;; into a floating point register when it is needed for a floating point
 ;; operation.  Prefer traditional floating point registers over VSX registers,
 ;; since the D-form version of the memory instructions does not need a GPR for
-;; reloading.
+;; reloading.  ISA 3.0 (power9) adds D-form addressing for scalars to Altivec
+;; registers.
 
 ;; If we have FPR registers, rs6000_emit_move has moved all constants to memory,
 ;; except for 0.0 which can be created on VSX with an xor instruction.
 
 (define_insn "*mov<mode>_hardfloat32"
-  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,<f64_av>,Z,<f64_vsx>,<f64_vsx>,!r,Y,r,!r")
-	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,<f64_av>,<f64_vsx>,j,j,r,Y,r"))]
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,<f64_av>,Z,<f64_p9>,o,<f64_vsx>,<f64_vsx>,!r,Y,r,!r")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,<f64_av>,o,<f64_p9>,<f64_vsx>,j,j,r,Y,r"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -6583,14 +6596,16 @@  (define_insn "*mov<mode>_hardfloat32"
    fmr %0,%1
    lxsd%U1x %x0,%y1
    stxsd%U0x %x1,%y0
+   lxsd %0,%1
+   stxsd %1,%0
    xxlor %x0,%x1,%x1
    xxlxor %x0,%x0,%x0
    #
    #
    #
    #"
-  [(set_attr "type" "fpstore,fpload,fp,fpload,fpstore,vecsimple,vecsimple,two,store,load,two")
-   (set_attr "length" "4,4,4,4,4,4,4,8,8,8,8")])
+  [(set_attr "type" "fpstore,fpload,fp,fpload,fpstore,fpload,fpstore,vecsimple,vecsimple,two,store,load,two")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,8,8,8,8")])
 
 (define_insn "*mov<mode>_softfloat32"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
@@ -6608,8 +6623,8 @@  (define_insn "*mov<mode>_softfloat32"
 ; ld/std require word-aligned displacements -> 'Y' constraint.
 ; List Y->r and r->Y before r->r for reload.
 (define_insn "*mov<mode>_hardfloat64"
-  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,<f64_av>,Z,<f64_vsx>,<f64_vsx>,!r,Y,r,!r,*c*l,!r,*h,r,wg,r,<f64_dm>")
-	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,<f64_av>,<f64_vsx>,j,j,r,Y,r,r,h,0,wg,r,<f64_dm>,r"))]
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,<f64_p9>,o,<f64_av>,Z,<f64_vsx>,<f64_vsx>,!r,Y,r,!r,*c*l,!r,*h,r,wg,r,<f64_dm>")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,o,<f64_p9>,Z,<f64_av>,<f64_vsx>,j,j,r,Y,r,r,h,0,wg,r,<f64_dm>,r"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -6617,6 +6632,8 @@  (define_insn "*mov<mode>_hardfloat64"
    stfd%U0%X0 %1,%0
    lfd%U1%X1 %0,%1
    fmr %0,%1
+   lxsd %0,%1
+   stxsd %1,%0
    lxsd%U1x %x0,%y1
    stxsd%U0x %x1,%y0
    xxlor %x0,%x1,%x1
@@ -6632,7 +6649,7 @@  (define_insn "*mov<mode>_hardfloat64"
    mffgpr %0,%1
    mfvsrd %0,%x1
    mtvsrd %x0,%1"
-  [(set_attr "type" "fpstore,fpload,fp,fpload,fpstore,vecsimple,vecsimple,integer,store,load,*,mtjmpr,mfjmpr,*,mftgpr,mffgpr,mftgpr,mffgpr")
+  [(set_attr "type" "fpstore,fpload,fp,fpload,fpstore,fpload,fpstore,vecsimple,vecsimple,integer,store,load,*,mtjmpr,mfjmpr,*,mftgpr,mffgpr,mftgpr,mffgpr")
    (set_attr "length" "4")])
 
 (define_insn "*mov<mode>_softfloat64"
Index: gcc/doc/md.texi
===================================================================
--- gcc/doc/md.texi	(revision 230078)
+++ gcc/doc/md.texi	(working copy)
@@ -3137,11 +3137,15 @@  asm ("xsaddqp %x0,%x1,%x2" : "=v" (v1) :
 
 is incorrect.
 
+@item wb
+Altivec register if @option{-mpower9-dform} is used or NO_REGS.
+
 @item wd
 VSX vector register to hold vector double data or NO_REGS.
 
 @item we
-VSX register if the -mpower9-vector -m64 options were used or NO_REGS.
+VSX register if the @option{-mpower9-vector} and @option{-m64} options
+were used or NO_REGS.
 
 @item wf
 VSX vector register to hold vector float data or NO_REGS.
Index: gcc/testsuite/gcc.target/powerpc/dform-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/dform-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/dform-1.c	(revision 0)
@@ -0,0 +1,207 @@ 
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -mpower9-dform -O2" } */
+
+#ifndef TYPE
+#define TYPE double
+#endif
+
+#ifndef TYPE_IN
+#define TYPE_IN TYPE
+#endif
+
+#ifndef TYPE_OUT
+#define TYPE_OUT TYPE
+#endif
+
+#ifndef ITYPE
+#define ITYPE long
+#endif
+
+#ifdef DO_CALL
+extern ITYPE get_bits (ITYPE);
+
+#else
+#define get_bits(X) (X)
+#endif
+
+void test (ITYPE *bits, ITYPE n, TYPE one, TYPE_IN *p, TYPE_OUT *q)
+{
+  TYPE x_00 = p[ 0];
+  TYPE x_01 = p[ 1];
+  TYPE x_02 = p[ 2];
+  TYPE x_03 = p[ 3];
+  TYPE x_04 = p[ 4];
+  TYPE x_05 = p[ 5];
+  TYPE x_06 = p[ 6];
+  TYPE x_07 = p[ 7];
+  TYPE x_08 = p[ 8];
+  TYPE x_09 = p[ 9];
+
+  TYPE x_10 = p[10];
+  TYPE x_11 = p[11];
+  TYPE x_12 = p[12];
+  TYPE x_13 = p[13];
+  TYPE x_14 = p[14];
+  TYPE x_15 = p[15];
+  TYPE x_16 = p[16];
+  TYPE x_17 = p[17];
+  TYPE x_18 = p[18];
+  TYPE x_19 = p[19];
+
+  TYPE x_20 = p[20];
+  TYPE x_21 = p[21];
+  TYPE x_22 = p[22];
+  TYPE x_23 = p[23];
+  TYPE x_24 = p[24];
+  TYPE x_25 = p[25];
+  TYPE x_26 = p[26];
+  TYPE x_27 = p[27];
+  TYPE x_28 = p[28];
+  TYPE x_29 = p[29];
+
+  TYPE x_30 = p[30];
+  TYPE x_31 = p[31];
+  TYPE x_32 = p[32];
+  TYPE x_33 = p[33];
+  TYPE x_34 = p[34];
+  TYPE x_35 = p[35];
+  TYPE x_36 = p[36];
+  TYPE x_37 = p[37];
+  TYPE x_38 = p[38];
+  TYPE x_39 = p[39];
+
+  TYPE x_40 = p[40];
+  TYPE x_41 = p[41];
+  TYPE x_42 = p[42];
+  TYPE x_43 = p[43];
+  TYPE x_44 = p[44];
+  TYPE x_45 = p[45];
+  TYPE x_46 = p[46];
+  TYPE x_47 = p[47];
+  TYPE x_48 = p[48];
+  TYPE x_49 = p[49];
+
+  ITYPE i;
+
+  for (i = 0; i < n; i++)
+    {
+      ITYPE bit = get_bits (bits[i]);
+
+      if ((bit & ((ITYPE)1) << 	0) != 0) x_00 += one;
+      if ((bit & ((ITYPE)1) << 	1) != 0) x_01 += one;
+      if ((bit & ((ITYPE)1) << 	2) != 0) x_02 += one;
+      if ((bit & ((ITYPE)1) << 	3) != 0) x_03 += one;
+      if ((bit & ((ITYPE)1) << 	4) != 0) x_04 += one;
+      if ((bit & ((ITYPE)1) << 	5) != 0) x_05 += one;
+      if ((bit & ((ITYPE)1) << 	6) != 0) x_06 += one;
+      if ((bit & ((ITYPE)1) << 	7) != 0) x_07 += one;
+      if ((bit & ((ITYPE)1) << 	8) != 0) x_08 += one;
+      if ((bit & ((ITYPE)1) << 	9) != 0) x_09 += one;
+
+      if ((bit & ((ITYPE)1) << 10) != 0) x_10 += one;
+      if ((bit & ((ITYPE)1) << 11) != 0) x_11 += one;
+      if ((bit & ((ITYPE)1) << 12) != 0) x_12 += one;
+      if ((bit & ((ITYPE)1) << 13) != 0) x_13 += one;
+      if ((bit & ((ITYPE)1) << 14) != 0) x_14 += one;
+      if ((bit & ((ITYPE)1) << 15) != 0) x_15 += one;
+      if ((bit & ((ITYPE)1) << 16) != 0) x_16 += one;
+      if ((bit & ((ITYPE)1) << 17) != 0) x_17 += one;
+      if ((bit & ((ITYPE)1) << 18) != 0) x_18 += one;
+      if ((bit & ((ITYPE)1) << 19) != 0) x_19 += one;
+
+      if ((bit & ((ITYPE)1) << 20) != 0) x_20 += one;
+      if ((bit & ((ITYPE)1) << 21) != 0) x_21 += one;
+      if ((bit & ((ITYPE)1) << 22) != 0) x_22 += one;
+      if ((bit & ((ITYPE)1) << 23) != 0) x_23 += one;
+      if ((bit & ((ITYPE)1) << 24) != 0) x_24 += one;
+      if ((bit & ((ITYPE)1) << 25) != 0) x_25 += one;
+      if ((bit & ((ITYPE)1) << 26) != 0) x_26 += one;
+      if ((bit & ((ITYPE)1) << 27) != 0) x_27 += one;
+      if ((bit & ((ITYPE)1) << 28) != 0) x_28 += one;
+      if ((bit & ((ITYPE)1) << 29) != 0) x_29 += one;
+
+      if ((bit & ((ITYPE)1) << 30) != 0) x_30 += one;
+      if ((bit & ((ITYPE)1) << 31) != 0) x_31 += one;
+      if ((bit & ((ITYPE)1) << 32) != 0) x_32 += one;
+      if ((bit & ((ITYPE)1) << 33) != 0) x_33 += one;
+      if ((bit & ((ITYPE)1) << 34) != 0) x_34 += one;
+      if ((bit & ((ITYPE)1) << 35) != 0) x_35 += one;
+      if ((bit & ((ITYPE)1) << 36) != 0) x_36 += one;
+      if ((bit & ((ITYPE)1) << 37) != 0) x_37 += one;
+      if ((bit & ((ITYPE)1) << 38) != 0) x_38 += one;
+      if ((bit & ((ITYPE)1) << 39) != 0) x_39 += one;
+
+      if ((bit & ((ITYPE)1) << 40) != 0) x_40 += one;
+      if ((bit & ((ITYPE)1) << 41) != 0) x_41 += one;
+      if ((bit & ((ITYPE)1) << 42) != 0) x_42 += one;
+      if ((bit & ((ITYPE)1) << 43) != 0) x_43 += one;
+      if ((bit & ((ITYPE)1) << 44) != 0) x_44 += one;
+      if ((bit & ((ITYPE)1) << 45) != 0) x_45 += one;
+      if ((bit & ((ITYPE)1) << 46) != 0) x_46 += one;
+      if ((bit & ((ITYPE)1) << 47) != 0) x_47 += one;
+      if ((bit & ((ITYPE)1) << 48) != 0) x_48 += one;
+      if ((bit & ((ITYPE)1) << 49) != 0) x_49 += one;
+    }
+
+  q[ 0] = x_00;
+  q[ 1] = x_01;
+  q[ 2] = x_02;
+  q[ 3] = x_03;
+  q[ 4] = x_04;
+  q[ 5] = x_05;
+  q[ 6] = x_06;
+  q[ 7] = x_07;
+  q[ 8] = x_08;
+  q[ 9] = x_09;
+
+  q[10] = x_10;
+  q[11] = x_11;
+  q[12] = x_12;
+  q[13] = x_13;
+  q[14] = x_14;
+  q[15] = x_15;
+  q[16] = x_16;
+  q[17] = x_17;
+  q[18] = x_18;
+  q[19] = x_19;
+
+  q[20] = x_20;
+  q[21] = x_21;
+  q[22] = x_22;
+  q[23] = x_23;
+  q[24] = x_24;
+  q[25] = x_25;
+  q[26] = x_26;
+  q[27] = x_27;
+  q[28] = x_28;
+  q[29] = x_29;
+
+  q[30] = x_30;
+  q[31] = x_31;
+  q[32] = x_32;
+  q[33] = x_33;
+  q[34] = x_34;
+  q[35] = x_35;
+  q[36] = x_36;
+  q[37] = x_37;
+  q[38] = x_38;
+  q[39] = x_39;
+
+  q[40] = x_40;
+  q[41] = x_41;
+  q[42] = x_42;
+  q[43] = x_43;
+  q[44] = x_44;
+  q[45] = x_45;
+  q[46] = x_46;
+  q[47] = x_47;
+  q[48] = x_48;
+  q[49] = x_49;
+}
+
+/* { dg-final { scan-assembler     "lxsd "   } } */
+/* { dg-final { scan-assembler     "stxsd "  } } */
+/* { dg-final { scan-assembler-not "mfvsrd " } } */
+/* { dg-final { scan-assembler-not "mtvsrd " } } */
Index: gcc/testsuite/gcc.target/powerpc/dform-2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/dform-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/dform-2.c	(revision 0)
@@ -0,0 +1,209 @@ 
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -mpower9-dform -O2" } */
+
+#ifndef TYPE
+#define TYPE float
+#endif
+
+#ifndef TYPE_IN
+#define TYPE_IN TYPE
+#endif
+
+#ifndef TYPE_OUT
+#define TYPE_OUT TYPE
+#endif
+
+#ifndef ITYPE
+#define ITYPE long
+#endif
+
+#ifdef DO_CALL
+extern ITYPE get_bits (ITYPE);
+
+#else
+#define get_bits(X) (X)
+#endif
+
+void test (ITYPE *bits, ITYPE n, TYPE one, TYPE_IN *p, TYPE_OUT *q)
+{
+  TYPE x_00 = p[ 0];
+  TYPE x_01 = p[ 1];
+  TYPE x_02 = p[ 2];
+  TYPE x_03 = p[ 3];
+  TYPE x_04 = p[ 4];
+  TYPE x_05 = p[ 5];
+  TYPE x_06 = p[ 6];
+  TYPE x_07 = p[ 7];
+  TYPE x_08 = p[ 8];
+  TYPE x_09 = p[ 9];
+
+  TYPE x_10 = p[10];
+  TYPE x_11 = p[11];
+  TYPE x_12 = p[12];
+  TYPE x_13 = p[13];
+  TYPE x_14 = p[14];
+  TYPE x_15 = p[15];
+  TYPE x_16 = p[16];
+  TYPE x_17 = p[17];
+  TYPE x_18 = p[18];
+  TYPE x_19 = p[19];
+
+  TYPE x_20 = p[20];
+  TYPE x_21 = p[21];
+  TYPE x_22 = p[22];
+  TYPE x_23 = p[23];
+  TYPE x_24 = p[24];
+  TYPE x_25 = p[25];
+  TYPE x_26 = p[26];
+  TYPE x_27 = p[27];
+  TYPE x_28 = p[28];
+  TYPE x_29 = p[29];
+
+  TYPE x_30 = p[30];
+  TYPE x_31 = p[31];
+  TYPE x_32 = p[32];
+  TYPE x_33 = p[33];
+  TYPE x_34 = p[34];
+  TYPE x_35 = p[35];
+  TYPE x_36 = p[36];
+  TYPE x_37 = p[37];
+  TYPE x_38 = p[38];
+  TYPE x_39 = p[39];
+
+  TYPE x_40 = p[40];
+  TYPE x_41 = p[41];
+  TYPE x_42 = p[42];
+  TYPE x_43 = p[43];
+  TYPE x_44 = p[44];
+  TYPE x_45 = p[45];
+  TYPE x_46 = p[46];
+  TYPE x_47 = p[47];
+  TYPE x_48 = p[48];
+  TYPE x_49 = p[49];
+
+  ITYPE i;
+
+  for (i = 0; i < n; i++)
+    {
+      ITYPE bit = get_bits (bits[i]);
+
+      if ((bit & ((ITYPE)1) << 	0) != 0) x_00 += one;
+      if ((bit & ((ITYPE)1) << 	1) != 0) x_01 += one;
+      if ((bit & ((ITYPE)1) << 	2) != 0) x_02 += one;
+      if ((bit & ((ITYPE)1) << 	3) != 0) x_03 += one;
+      if ((bit & ((ITYPE)1) << 	4) != 0) x_04 += one;
+      if ((bit & ((ITYPE)1) << 	5) != 0) x_05 += one;
+      if ((bit & ((ITYPE)1) << 	6) != 0) x_06 += one;
+      if ((bit & ((ITYPE)1) << 	7) != 0) x_07 += one;
+      if ((bit & ((ITYPE)1) << 	8) != 0) x_08 += one;
+      if ((bit & ((ITYPE)1) << 	9) != 0) x_09 += one;
+
+      if ((bit & ((ITYPE)1) << 10) != 0) x_10 += one;
+      if ((bit & ((ITYPE)1) << 11) != 0) x_11 += one;
+      if ((bit & ((ITYPE)1) << 12) != 0) x_12 += one;
+      if ((bit & ((ITYPE)1) << 13) != 0) x_13 += one;
+      if ((bit & ((ITYPE)1) << 14) != 0) x_14 += one;
+      if ((bit & ((ITYPE)1) << 15) != 0) x_15 += one;
+      if ((bit & ((ITYPE)1) << 16) != 0) x_16 += one;
+      if ((bit & ((ITYPE)1) << 17) != 0) x_17 += one;
+      if ((bit & ((ITYPE)1) << 18) != 0) x_18 += one;
+      if ((bit & ((ITYPE)1) << 19) != 0) x_19 += one;
+
+      if ((bit & ((ITYPE)1) << 20) != 0) x_20 += one;
+      if ((bit & ((ITYPE)1) << 21) != 0) x_21 += one;
+      if ((bit & ((ITYPE)1) << 22) != 0) x_22 += one;
+      if ((bit & ((ITYPE)1) << 23) != 0) x_23 += one;
+      if ((bit & ((ITYPE)1) << 24) != 0) x_24 += one;
+      if ((bit & ((ITYPE)1) << 25) != 0) x_25 += one;
+      if ((bit & ((ITYPE)1) << 26) != 0) x_26 += one;
+      if ((bit & ((ITYPE)1) << 27) != 0) x_27 += one;
+      if ((bit & ((ITYPE)1) << 28) != 0) x_28 += one;
+      if ((bit & ((ITYPE)1) << 29) != 0) x_29 += one;
+
+      if ((bit & ((ITYPE)1) << 30) != 0) x_30 += one;
+      if ((bit & ((ITYPE)1) << 31) != 0) x_31 += one;
+      if ((bit & ((ITYPE)1) << 32) != 0) x_32 += one;
+      if ((bit & ((ITYPE)1) << 33) != 0) x_33 += one;
+      if ((bit & ((ITYPE)1) << 34) != 0) x_34 += one;
+      if ((bit & ((ITYPE)1) << 35) != 0) x_35 += one;
+      if ((bit & ((ITYPE)1) << 36) != 0) x_36 += one;
+      if ((bit & ((ITYPE)1) << 37) != 0) x_37 += one;
+      if ((bit & ((ITYPE)1) << 38) != 0) x_38 += one;
+      if ((bit & ((ITYPE)1) << 39) != 0) x_39 += one;
+
+      if ((bit & ((ITYPE)1) << 40) != 0) x_40 += one;
+      if ((bit & ((ITYPE)1) << 41) != 0) x_41 += one;
+      if ((bit & ((ITYPE)1) << 42) != 0) x_42 += one;
+      if ((bit & ((ITYPE)1) << 43) != 0) x_43 += one;
+      if ((bit & ((ITYPE)1) << 44) != 0) x_44 += one;
+      if ((bit & ((ITYPE)1) << 45) != 0) x_45 += one;
+      if ((bit & ((ITYPE)1) << 46) != 0) x_46 += one;
+      if ((bit & ((ITYPE)1) << 47) != 0) x_47 += one;
+      if ((bit & ((ITYPE)1) << 48) != 0) x_48 += one;
+      if ((bit & ((ITYPE)1) << 49) != 0) x_49 += one;
+    }
+
+  q[ 0] = x_00;
+  q[ 1] = x_01;
+  q[ 2] = x_02;
+  q[ 3] = x_03;
+  q[ 4] = x_04;
+  q[ 5] = x_05;
+  q[ 6] = x_06;
+  q[ 7] = x_07;
+  q[ 8] = x_08;
+  q[ 9] = x_09;
+
+  q[10] = x_10;
+  q[11] = x_11;
+  q[12] = x_12;
+  q[13] = x_13;
+  q[14] = x_14;
+  q[15] = x_15;
+  q[16] = x_16;
+  q[17] = x_17;
+  q[18] = x_18;
+  q[19] = x_19;
+
+  q[20] = x_20;
+  q[21] = x_21;
+  q[22] = x_22;
+  q[23] = x_23;
+  q[24] = x_24;
+  q[25] = x_25;
+  q[26] = x_26;
+  q[27] = x_27;
+  q[28] = x_28;
+  q[29] = x_29;
+
+  q[30] = x_30;
+  q[31] = x_31;
+  q[32] = x_32;
+  q[33] = x_33;
+  q[34] = x_34;
+  q[35] = x_35;
+  q[36] = x_36;
+  q[37] = x_37;
+  q[38] = x_38;
+  q[39] = x_39;
+
+  q[40] = x_40;
+  q[41] = x_41;
+  q[42] = x_42;
+  q[43] = x_43;
+  q[44] = x_44;
+  q[45] = x_45;
+  q[46] = x_46;
+  q[47] = x_47;
+  q[48] = x_48;
+  q[49] = x_49;
+}
+
+/* { dg-final { scan-assembler     "lxssp "     } } */
+/* { dg-final { scan-assembler     "stxssp "    } } */
+/* { dg-final { scan-assembler-not "mfvsrd "    } } */
+/* { dg-final { scan-assembler-not "mtvsrd "    } } */
+/* { dg-final { scan-assembler-not "xscvdpspn " } } */
+