V5, #7 of 15: Restrict vector extract to not use prefixed memory
diff mbox series

Message ID 20191009203431.GG2063@ibm-toto.the-meissners.org
State New
Headers show
Series
  • V5, #7 of 15: Restrict vector extract to not use prefixed memory
Related show

Commit Message

Michael Meissner Oct. 9, 2019, 8:34 p.m. UTC
This patch fixes an issue Aaron Sawdey with vector extracts.  The vector
address adjust functions have a single base register temporary.  However for
PC-relative addresses where the vector element is not constant, it logically
will need two temporary registers, one to load up the PC-relative address, and
the other to hold the offset of the vector element being extracted.

If a vector has a prefixed address and the vector element being extracted is
not constant, this code will load the vector into a register, and then do the
variable extract from that register, insted of just loading the scalar value
up.

Along with the other patches, I have done bootstraps on a little endian power8
system, and there were no regressions in the test suite.  Can I check this into
the trunk?

2019-10-08  Michael Meissner  <meissner@linux.ibm.com>

	* config/rs6000/rs6000.c (rs6000_adjust_vec_address): Add support
	for optimizing PC-relative addresses with a constant element
	number.  Assert that PC-relative vectors do not have variable
	element numbers.  Add support for prefixed addresses with 34-bit
	offsets.
	* config/rs6000/vsx.md (vsx_extract_<mode>_var, VSX_D iterator):
	Do not allow combining prefixed memory with a variable vector
	extract.
	(vsx_extract_v4sf_var): Do not allow combining prefixed memory
	with a variable vector extract.
	(vsx_extract_<mode>_var, VSX_EXTRACT_I iterator): Do not allow
	combining prefixed memory with a variable vector extract.
	(vsx_extract_<mode>_<VS_scalar>mode_var): Do not allow combining
	prefixed memory with a variable vector extract.

Patch
diff mbox series

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 276719)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -6701,6 +6701,7 @@  rs6000_adjust_vec_address (rtx scalar_re
   rtx element_offset;
   rtx new_addr;
   bool valid_addr_p;
+  bool pcrel_p = pcrel_local_address (addr, Pmode);
 
   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -6738,6 +6739,38 @@  rs6000_adjust_vec_address (rtx scalar_re
   else if (REG_P (addr) || SUBREG_P (addr))
     new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
 
+  /* Optimize PC-relative addresses with a constant offset.  */
+  else if (pcrel_p && CONST_INT_P (element_offset))
+    {
+      rtx addr2 = addr;
+      HOST_WIDE_INT offset = INTVAL (element_offset);
+
+      if (GET_CODE (addr2) == CONST)
+	addr2 = XEXP (addr2, 0);
+
+      if (GET_CODE (addr2) == PLUS)
+	{
+	  offset += INTVAL (XEXP (addr2, 1));
+	  addr2 = XEXP (addr2, 0);
+	}
+
+      gcc_assert (SIGNED_34BIT_OFFSET_P (offset));
+      if (offset)
+	{
+	  addr2 = gen_rtx_PLUS (Pmode, addr2, GEN_INT (offset));
+	  new_addr = gen_rtx_CONST (Pmode, addr2);
+	}
+      else
+	new_addr = addr2;
+    }
+
+  /* With only one temporary base register, we can't support a PC-relative
+     address added to a variable offset.  This is because the PADDI instruction
+     requires RA to be 0 when doing a PC-relative add (i.e. no register to add
+     to).  */
+  else if (pcrel_p)
+    gcc_unreachable ();
+
   /* Optimize D-FORM addresses with constant offset with a constant element, to
      include the element offset in the address directly.  */
   else if (GET_CODE (addr) == PLUS)
@@ -6752,8 +6785,11 @@  rs6000_adjust_vec_address (rtx scalar_re
 	  HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
 	  rtx offset_rtx = GEN_INT (offset);
 
-	  if (IN_RANGE (offset, -32768, 32767)
-	      && (scalar_size < 8 || (offset & 0x3) == 0))
+	  if (TARGET_PREFIXED_ADDR && SIGNED_34BIT_OFFSET_P (offset))
+	    new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
+
+	  else if (SIGNED_16BIT_OFFSET_P (offset)
+		   && (scalar_size < 8 || (offset & 0x3) == 0))
 	    new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
 	  else
 	    {
@@ -6801,11 +6837,11 @@  rs6000_adjust_vec_address (rtx scalar_re
       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
     }
 
-  /* If we have a PLUS, we need to see whether the particular register class
-     allows for D-FORM or X-FORM addressing.  */
-  if (GET_CODE (new_addr) == PLUS)
+  /* If we have a PLUS or a PC-relative address without the PLUS, we need to
+     see whether the particular register class allows for D-FORM or X-FORM
+     addressing.  */
+  if (GET_CODE (new_addr) == PLUS || pcrel_p)
     {
-      rtx op1 = XEXP (new_addr, 1);
       addr_mask_type addr_mask;
       unsigned int scalar_regno = reg_or_subregno (scalar_reg);
 
@@ -6822,10 +6858,16 @@  rs6000_adjust_vec_address (rtx scalar_re
       else
 	gcc_unreachable ();
 
-      if (REG_P (op1) || SUBREG_P (op1))
-	valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
-      else
+      if (pcrel_p)
 	valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
+      else
+	{
+	  rtx op1 = XEXP (new_addr, 1);
+	  if (REG_P (op1) || SUBREG_P (op1))
+	    valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
+	  else
+	    valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
+	}
     }
 
   else if (REG_P (new_addr) || SUBREG_P (new_addr))
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md	(revision 276720)
+++ gcc/config/rs6000/vsx.md	(working copy)
@@ -3239,9 +3239,10 @@  (define_insn "vsx_vslo_<mode>"
 ;; Variable V2DI/V2DF extract
 (define_insn_and_split "vsx_extract_<mode>_var"
   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,wa,r")
-	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
-			     (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
-			    UNSPEC_VSX_EXTRACT))
+	(unspec:<VS_scalar>
+	 [(match_operand:VSX_D 1 "reg_or_non_prefixed_memory" "v,em,em")
+	  (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
+	 UNSPEC_VSX_EXTRACT))
    (clobber (match_scratch:DI 3 "=r,&b,&b"))
    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
@@ -3309,9 +3310,10 @@  (define_insn_and_split "*vsx_extract_v4s
 ;; Variable V4SF extract
 (define_insn_and_split "vsx_extract_v4sf_var"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,wa,?r")
-	(unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
-		    (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
-		   UNSPEC_VSX_EXTRACT))
+	(unspec:SF
+	 [(match_operand:V4SF 1 "reg_or_non_prefixed_memory" "v,em,em")
+	  (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
+	 UNSPEC_VSX_EXTRACT))
    (clobber (match_scratch:DI 3 "=r,&b,&b"))
    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
@@ -3672,7 +3674,7 @@  (define_insn_and_split "*vsx_extract_<mo
 (define_insn_and_split "vsx_extract_<mode>_var"
   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
 	(unspec:<VS_scalar>
-	 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,v,m")
+	 [(match_operand:VSX_EXTRACT_I 1 "reg_or_non_prefixed_memory" "v,v,em")
 	  (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
 	 UNSPEC_VSX_EXTRACT))
    (clobber (match_scratch:DI 3 "=r,r,&b"))
@@ -3692,7 +3694,7 @@  (define_insn_and_split "*vsx_extract_<mo
   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
 	(zero_extend:<VS_scalar>
 	 (unspec:<VSX_EXTRACT_I:VS_scalar>
-	  [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,v,m")
+	  [(match_operand:VSX_EXTRACT_I 1 "reg_or_non_prefixed_memory" "v,v,em")
 	   (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
 	  UNSPEC_VSX_EXTRACT)))
    (clobber (match_scratch:DI 3 "=r,r,&b"))