[rs6000,v4] enable gimple folding for vec_xl, vec_xst

Message ID 1531242651.26783.9.camel@brimstone.rchland.ibm.com
State New
Headers show
Series
  • [rs6000,v4] enable gimple folding for vec_xl, vec_xst
Related show

Commit Message

Will Schmidt July 10, 2018, 5:10 p.m.
Hi,
Add support for Gimple folding for unaligned vector loads and stores.
    
Regtest completed across variety of systems, P6,P7,P8,P9.
    
[v2] Added the type for the MEM_REF, per feedback.
Testcases for gimple-folding of the same are currently in-tree
as powerpc/fold-vec-load-*.c and powerpc/fold-vec-store-*.c.
    
[v3] Updated the alignment for the MEM_REF to be 4bytes.
Updated/added/removed comments in the code for clarity.
    
[v4] Updated the alignment for the stores to match what was done
for the loads.
    
Sniff tests passed for V4.  Will do full regtest as well, just in case.
OK for trunk assuming successful test results?
    
Thanks
-Will
    
[gcc]
    
2018-07-10 Will Schmidt <will_schmidt@vnet.ibm.com>

	* config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add
	vec_xst variants to the list.
	(rs6000_gimple_fold_builtin): Add support for folding unaligned
	vector loads and stores.

Comments

Segher Boessenkool July 12, 2018, 7:01 p.m. | #1
Hi!

On Tue, Jul 10, 2018 at 12:10:51PM -0500, Will Schmidt wrote:
> Add support for Gimple folding for unaligned vector loads and stores.

This is fine if the experts agree.  Thanks!  And thanks to the reviewers,
too.

One detail:

> 	* config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add
> 	vec_xst variants to the list.
> 	(rs6000_gimple_fold_builtin): Add support for folding unaligned
> 	vector loads and stores.

> +    case VSX_BUILTIN_STXVW4X_V4SI:
> +    case VSX_BUILTIN_STXVD2X_V2DF:
> +    case VSX_BUILTIN_STXVD2X_V2DI:
> +      {
> +	 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored.  */

Indent here is wrong.


Segher

Patch

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 8bc4109..229cfac 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -15401,10 +15401,16 @@  rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
     case ALTIVEC_BUILTIN_STVX_V8HI:
     case ALTIVEC_BUILTIN_STVX_V4SI:
     case ALTIVEC_BUILTIN_STVX_V4SF:
     case ALTIVEC_BUILTIN_STVX_V2DI:
     case ALTIVEC_BUILTIN_STVX_V2DF:
+    case VSX_BUILTIN_STXVW4X_V16QI:
+    case VSX_BUILTIN_STXVW4X_V8HI:
+    case VSX_BUILTIN_STXVW4X_V4SF:
+    case VSX_BUILTIN_STXVW4X_V4SI:
+    case VSX_BUILTIN_STXVD2X_V2DF:
+    case VSX_BUILTIN_STXVD2X_V2DI:
       return true;
     default:
       return false;
     }
 }
@@ -15910,10 +15916,81 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	gimple_set_location (g, loc);
 	gsi_replace (gsi, g, true);
 	return true;
       }
 
+    /* unaligned Vector loads.  */
+    case VSX_BUILTIN_LXVW4X_V16QI:
+    case VSX_BUILTIN_LXVW4X_V8HI:
+    case VSX_BUILTIN_LXVW4X_V4SF:
+    case VSX_BUILTIN_LXVW4X_V4SI:
+    case VSX_BUILTIN_LXVD2X_V2DF:
+    case VSX_BUILTIN_LXVD2X_V2DI:
+      {
+	 arg0 = gimple_call_arg (stmt, 0);  // offset
+	 arg1 = gimple_call_arg (stmt, 1);  // address
+	 lhs = gimple_call_lhs (stmt);
+	 location_t loc = gimple_location (stmt);
+	 /* Since arg1 may be cast to a different type, just use ptr_type_node
+	    here instead of trying to enforce TBAA on pointer types.  */
+	 tree arg1_type = ptr_type_node;
+	 tree lhs_type = TREE_TYPE (lhs);
+	 /* In GIMPLE the type of the MEM_REF specifies the alignment.  The
+	   required alignment (power) is 4 bytes regardless of data type.  */
+	 tree align_ltype = build_aligned_type (lhs_type, 4);
+	 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
+	    the tree using the value from arg0.  The resulting type will match
+	    the type of arg1.  */
+	 gimple_seq stmts = NULL;
+	 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
+	 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
+				       arg1_type, arg1, temp_offset);
+	 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	 /* Use the build2 helper to set up the mem_ref.  The MEM_REF could also
+	    take an offset, but since we've already incorporated the offset
+	    above, here we just pass in a zero.  */
+	 gimple *g;
+	 g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
+						build_int_cst (arg1_type, 0)));
+	 gimple_set_location (g, loc);
+	 gsi_replace (gsi, g, true);
+	 return true;
+      }
+
+    /* unaligned Vector stores.  */
+    case VSX_BUILTIN_STXVW4X_V16QI:
+    case VSX_BUILTIN_STXVW4X_V8HI:
+    case VSX_BUILTIN_STXVW4X_V4SF:
+    case VSX_BUILTIN_STXVW4X_V4SI:
+    case VSX_BUILTIN_STXVD2X_V2DF:
+    case VSX_BUILTIN_STXVD2X_V2DI:
+      {
+	 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored.  */
+	 arg1 = gimple_call_arg (stmt, 1); /* Offset.  */
+	 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address.  */
+	 location_t loc = gimple_location (stmt);
+	 tree arg0_type = TREE_TYPE (arg0);
+	 /* Use ptr_type_node (no TBAA) for the arg2_type.  */
+	 tree arg2_type = ptr_type_node;
+	 /* In GIMPLE the type of the MEM_REF specifies the alignment.  The
+	    required alignment (power) is 4 bytes regardless of data type.  */
+	 tree align_stype = build_aligned_type (arg0_type, 4);
+	 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
+	    the tree using the value from arg1.  */
+	 gimple_seq stmts = NULL;
+	 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
+	 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
+				       arg2_type, arg2, temp_offset);
+	 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	 gimple *g;
+	 g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr,
+					   build_int_cst (arg2_type, 0)), arg0);
+	 gimple_set_location (g, loc);
+	 gsi_replace (gsi, g, true);
+	 return true;
+      }
+
     /* Vector Fused multiply-add (fma).  */
     case ALTIVEC_BUILTIN_VMADDFP:
     case VSX_BUILTIN_XVMADDDP:
     case ALTIVEC_BUILTIN_VMLADDUHM:
       {