diff mbox

[gomp4] predicate register caching

Message ID 55F176DB.7040004@acm.org
State New
Headers show

Commit Message

Nathan Sidwell Sept. 10, 2015, 12:26 p.m. UTC
I've committed this to gomp4.  Rather than recalculate the 'not lane 0' 
predicate on each use, we calculate it at the top of the function and use 
throughout.  This appears to be the recommended approach.

nathan
diff mbox

Patch

2015-09-10  Nathan Sidwell  <nathan@codesourcery.com>

	* config/nvptx/nvptx.c (nvptx_init_axis_predicate): New.
	(nvptx_declare_function_name): Initialize axis predicates.
	(nvptx_single): Use or init machine_function axis predicate.
	* config/nvptx/nvptx.h (struct machine_function): Add
	axis_predicate field.

Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 227632)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -603,6 +603,20 @@  nvptx_record_needed_fndecl (tree decl)
     *slot = decl;
 }
 
+/* Emit code to initialize the REGNO predicate register to indicate
+   whether we are not lane zero on the NAME axis.  */
+
+static void
+nvptx_init_axis_predicate (FILE *file, int regno, const char *name)
+{
+  fprintf (file, "\t{\n");
+      
+  fprintf (file, "\t.reg.u32\t%%%s;\n", name);
+  fprintf (file, "\t\tmov.u32\t%%%s, %%tid.%s;\n", name, name);
+  fprintf (file, "\t\tsetp.ne.u32\t%%r%d, %%%s, 0;\n", regno, name);
+  fprintf (file, "\t}\n");
+}
+
 /* Implement ASM_DECLARE_FUNCTION_NAME.  Writes the start of a ptx
    function, including local var decls and copies from the arguments to
    local regs.  */
@@ -727,6 +741,14 @@  nvptx_declare_function_name (FILE *file,
   if (stdarg_p (fntype))
     fprintf (file, "\tld.param.u%d %%argp, [%%in_argp];\n",
 	     GET_MODE_BITSIZE (Pmode));
+
+  /* Emit axis predicates. */
+  if (cfun->machine->axis_predicate[0])
+    nvptx_init_axis_predicate (file,
+			       REGNO (cfun->machine->axis_predicate[0]), "y");
+  if (cfun->machine->axis_predicate[1])
+    nvptx_init_axis_predicate (file,
+			       REGNO (cfun->machine->axis_predicate[1]), "x");
 }
 
 /* Output a return instruction.  Also copy the return value to its outgoing
@@ -2958,13 +2980,15 @@  nvptx_single (unsigned mask, basic_block
   for (mode = GOMP_DIM_WORKER; mode <= GOMP_DIM_VECTOR; mode++)
     if (GOMP_DIM_MASK (mode) & skip_mask)
       {
-	rtx id = gen_reg_rtx (SImode);
-	rtx pred = gen_reg_rtx (BImode);
 	rtx_code_label *label = gen_label_rtx ();
+	rtx pred = cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER];
 
-	emit_insn_before (gen_oacc_dim_pos (id, GEN_INT (mode)), head);
-	rtx cond = gen_rtx_SET (pred, gen_rtx_NE (BImode, id, const0_rtx));
-	emit_insn_before (cond, head);
+	if (!pred)
+	  {
+	    pred = gen_reg_rtx (BImode);
+	    cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER] = pred;
+	  }
+	
 	rtx br;
 	if (mode == GOMP_DIM_VECTOR)
 	  br = gen_br_true (pred, label);
Index: gcc/config/nvptx/nvptx.h
===================================================================
--- gcc/config/nvptx/nvptx.h	(revision 227632)
+++ gcc/config/nvptx/nvptx.h	(working copy)
@@ -238,6 +238,7 @@  struct GTY(()) machine_function
   HOST_WIDE_INT outgoing_stdarg_size;
   int ret_reg_mode;
   int punning_buffer_size;
+  rtx axis_predicate[2];
 };
 #endif