diff mbox

More improvements to sparc VIS vec_init code generation.

Message ID 201111091741.36943.ebotcazou@adacore.com
State New
Headers show

Commit Message

Eric Botcazou Nov. 9, 2011, 4:41 p.m. UTC
> Eric, the testsuite target tests for vis2 and vi3 capable hardware
> work well in my own testing but if you find some problem with how
> it's done just let me know and I'll try to fix it up.

There are many failures in 64-bit mode with VIS1 because of the use of the high 
part to expand vec_init, both in vector_init_move_words:

    case V2SImode:
      emit_move_insn (gen_highpart (SImode, target), XVECEXP (vals, 0, 0));
      emit_move_insn (gen_lowpart (SImode, target), XVECEXP (vals, 0, 1));
      return true;

and in sparc_expand_vector_init_vis1:

  if (tmp != target)
    emit_move_insn (target, gen_highpart (mode, tmp));

Taking the high part is valid only if it is at least as large as a word (in the 
middle-end sense).  Otherwise, the compiler stops.  So, in 64-bit mode, this 
breaks for V2SImode in vector_init_move_words and for V2HImode and V4QImode in 
sparc_expand_vector_init_vis1.

I tried to think about some solutions, for example using a paradoxical subreg 
in sparc_expand_vector_init_vis1, but this pessimizes.

> Support for the short floating point loads starts to show up here as
> well, and I intend to flesh these out, support the short float stores,
> and add VIS intrinsic access to them.

There isn't an equivalent for 32-bit, is it?  That is, you can load 8, 16 and 
64 bits in the upper FP regs, but not 32 bits?


While trying to debug the failures, I've made some cosmetic changes left and 
right.  Tested on SPARC/Solaris, applied on the mainline.


2011-11-09  Eric Botcazou  <ebotcazou@adacore.com>

	* config/sparc/sparc.c (output_v8plus_shift): Take INSN parameter first
	and adjust head comment.
	(output_v8plus_mult): Change NAME into OPCODE and adjust throughout.
	(vector_init_bshuffle): Add head comment.
	(vector_init_move_words): Likewise.
	(vector_init_prepare_elts): Likewise.  Take LOCS parameter first.
	(sparc_expand_vector_init): Likewise.  Adjust call to above function.
	(sparc_expand_vector_init_vis2): Likewise.
	(sparc_expand_vector_init_vis1): Likewise.
	(sparc_expand_conditional_move): Likewise.
	(sparc_expand_vcond): Likewise.
	* config/sparc/sparc-protos.h (output_v8plus_shift): Adjust.
	* config/sparc/sparc.md (ashldi3_v8plus): Adjust call to
	output_v8plus_shift.
	(ashrdi3_v8plus): Likewise.
	(lshrdi3_v8plus): Likewise.

Comments

David Miller Nov. 9, 2011, 6:54 p.m. UTC | #1
From: Eric Botcazou <ebotcazou@adacore.com>
Date: Wed, 9 Nov 2011 17:41:36 +0100

> There isn't an equivalent for 32-bit, is it?  That is, you can load 8, 16 and 
> 64 bits in the upper FP regs, but not 32 bits?

Indeed, you need to use normal 32-bit loads and thus the lower 32
float regs.

BTW, I suspect the paradoxical subreg trick will work without
pessimizing as long as you emit a clobber first.

Thanks for looking into the 64-bit failures, and actually if you want
I can work on fixing them myself this afternoon.
Eric Botcazou Nov. 9, 2011, 9:31 p.m. UTC | #2
> Thanks for looking into the 64-bit failures, and actually if you want
> I can work on fixing them myself this afternoon.

Yes, you probably have a better grasp on the code than me.
diff mbox

Patch

Index: config/sparc/sparc.md
===================================================================
--- config/sparc/sparc.md	(revision 181149)
+++ config/sparc/sparc.md	(working copy)
@@ -5649,7 +5649,7 @@  (define_insn "ashldi3_v8plus"
 		   (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
    (clobber (match_scratch:SI 3 "=X,X,&h"))]
   "TARGET_V8PLUS"
-  "* return output_v8plus_shift (operands, insn, \"sllx\");"
+  "* return output_v8plus_shift (insn ,operands, \"sllx\");"
   [(set_attr "type" "multi")
    (set_attr "length" "5,5,6")])
 
@@ -5759,7 +5759,7 @@  (define_insn "ashrdi3_v8plus"
 		     (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
    (clobber (match_scratch:SI 3 "=X,X,&h"))]
   "TARGET_V8PLUS"
-  "* return output_v8plus_shift (operands, insn, \"srax\");"
+  "* return output_v8plus_shift (insn, operands, \"srax\");"
   [(set_attr "type" "multi")
    (set_attr "length" "5,5,6")])
 
@@ -5849,7 +5849,7 @@  (define_insn "lshrdi3_v8plus"
 		     (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
    (clobber (match_scratch:SI 3 "=X,X,&h"))]
   "TARGET_V8PLUS"
-  "* return output_v8plus_shift (operands, insn, \"srlx\");"
+  "* return output_v8plus_shift (insn, operands, \"srlx\");"
   [(set_attr "type" "multi")
    (set_attr "length" "5,5,6")])
 
Index: config/sparc/sparc-protos.h
===================================================================
--- config/sparc/sparc-protos.h	(revision 181149)
+++ config/sparc/sparc-protos.h	(working copy)
@@ -74,7 +74,8 @@  extern const char *output_ubranch (rtx,
 extern const char *output_cbranch (rtx, rtx, int, int, int, rtx);
 extern const char *output_return (rtx);
 extern const char *output_sibcall (rtx, rtx);
-extern const char *output_v8plus_shift (rtx *, rtx, const char *);
+extern const char *output_v8plus_shift (rtx, rtx *, const char *);
+extern const char *output_v8plus_mult (rtx, rtx *, const char *);
 extern const char *output_v9branch (rtx, rtx, int, int, int, int, rtx);
 extern const char *output_probe_stack_range (rtx, rtx);
 extern bool emit_scc_insn (rtx []);
@@ -104,7 +105,6 @@  extern int v9_regcmp_p (enum rtx_code);
 extern int sparc_check_64 (rtx, rtx);
 extern rtx gen_df_reg (rtx, int);
 extern void sparc_expand_compare_and_swap_12 (rtx, rtx, rtx, rtx);
-extern const char *output_v8plus_mult (rtx, rtx *, const char *);
 extern void sparc_expand_vector_init (rtx, rtx);
 extern void sparc_expand_vec_perm_bmask(enum machine_mode, rtx);
 extern bool sparc_expand_conditional_move (enum machine_mode, rtx *);
Index: config/sparc/sparc.c
===================================================================
--- config/sparc/sparc.c	(revision 181149)
+++ config/sparc/sparc.c	(working copy)
@@ -576,7 +576,8 @@  static bool sparc_print_operand_punct_va
 static void sparc_print_operand (FILE *, rtx, int);
 static void sparc_print_operand_address (FILE *, rtx);
 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
-					   enum machine_mode, secondary_reload_info *);
+					   enum machine_mode,
+					   secondary_reload_info *);
 
 #ifdef SUBTARGET_ATTRIBUTE_TABLE
 /* Table of valid machine attributes.  */
@@ -9120,10 +9121,11 @@  sparc_check_64 (rtx x, rtx insn)
   return 0;
 }
 
-/* Returns assembly code to perform a DImode shift using
-   a 64-bit global or out register on SPARC-V8+.  */
+/* Output a wide shift instruction in V8+ mode.  INSN is the instruction,
+   OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
+
 const char *
-output_v8plus_shift (rtx *operands, rtx insn, const char *opcode)
+output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
 {
   static char asm_code[60];
 
@@ -9148,12 +9150,13 @@  output_v8plus_shift (rtx *operands, rtx
       output_asm_insn ("or\t%L1, %3, %3", operands);
     }
 
-  strcpy(asm_code, opcode);
+  strcpy (asm_code, opcode);
 
   if (which_alternative != 2)
     return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
   else
-    return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
+    return
+      strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
 }
 
 /* Output rtl to increment the profiler label LABELNO
@@ -11156,7 +11159,7 @@  sparc_conditional_register_usage (void)
     global_regs[SPARC_GSR_REG] = 1;
 }
 
-/* Implement TARGET_PREFERRED_RELOAD_CLASS
+/* Implement TARGET_PREFERRED_RELOAD_CLASS:
 
    - We can't load constants into FP registers.
    - We can't load FP constants into integer registers when soft-float,
@@ -11206,8 +11209,11 @@  sparc_preferred_reload_class (rtx x, reg
   return rclass;
 }
 
+/* Output a wide multiply instruction in V8+ mode.  INSN is the instruction,
+   OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
+
 const char *
-output_v8plus_mult (rtx insn, rtx *operands, const char *name)
+output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
 {
   char mulstr[32];
 
@@ -11222,7 +11228,7 @@  output_v8plus_mult (rtx insn, rtx *opera
       if (which_alternative == 1)
 	{
 	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
-	  sprintf (mulstr, "%s\t%%H1, %%2, %%L0", name);
+	  sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
 	  output_asm_insn (mulstr, operands);
 	  return "srlx\t%L0, 32, %H0";
 	}
@@ -11230,7 +11236,7 @@  output_v8plus_mult (rtx insn, rtx *opera
 	{
 	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
           output_asm_insn ("or\t%L1, %3, %3", operands);
-          sprintf (mulstr, "%s\t%%3, %%2, %%3", name);
+          sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
 	  output_asm_insn (mulstr, operands);
 	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
           return "mov\t%3, %L0";
@@ -11241,7 +11247,7 @@  output_v8plus_mult (rtx insn, rtx *opera
       if (which_alternative == 1)
 	{
 	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
-          sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", name);
+          sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
 	  output_asm_insn (mulstr, operands);
 	  return "srlx\t%L0, 32, %H0";
 	}
@@ -11249,7 +11255,7 @@  output_v8plus_mult (rtx insn, rtx *opera
 	{
 	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
           output_asm_insn ("or\t%L1, %3, %3", operands);
-	  sprintf (mulstr, "%s\t%%3, %%3, %%3", name);
+	  sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
 	  output_asm_insn (mulstr, operands);
 	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
           return "mov\t%3, %L0";
@@ -11262,7 +11268,7 @@  output_v8plus_mult (rtx insn, rtx *opera
       output_asm_insn ("or\t%L1, %H1, %H1", operands);
       output_asm_insn ("sllx\t%H2, 32, %L1", operands);
       output_asm_insn ("or\t%L2, %L1, %L1", operands);
-      sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", name);
+      sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
       output_asm_insn (mulstr, operands);
       return "srlx\t%L0, 32, %H0";
     }
@@ -11272,15 +11278,20 @@  output_v8plus_mult (rtx insn, rtx *opera
       output_asm_insn ("sllx\t%H2, 32, %4", operands);
       output_asm_insn ("or\t%L1, %3, %3", operands);
       output_asm_insn ("or\t%L2, %4, %4", operands);
-      sprintf (mulstr, "%s\t%%3, %%4, %%3", name);
+      sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
       output_asm_insn (mulstr, operands);
       output_asm_insn ("srlx\t%3, 32, %H0", operands);
       return "mov\t%3, %L0";
     }
 }
 
+/* Subroutine of sparc_expand_vector_init.  Emit code to initialize TARGET to
+   the N_ELTS values for individual fields contained in LOCS by means of VIS2
+   BSHUFFLE insn.  MODE and INNER_MODE are the modes describing TARGET.  */
+
 static void
-vector_init_bshuffle (rtx target, rtx *locs, int n_elts, enum machine_mode mode,
+vector_init_bshuffle (rtx target, rtx *locs, int n_elts,
+		      enum machine_mode mode,
 		      enum machine_mode inner_mode)
 {
   rtx mid_target, r0_high, r0_low, r1_high, r1_low;
@@ -11394,6 +11405,11 @@  vector_init_bshuffle (rtx target, rtx *l
     emit_move_insn (target, gen_lowpart (partial_mode, mid_target));
 }
 
+/* Subroutine of sparc_expand_vector_init.  Emit code to initialize TARGET to
+   values for individual fields VALS by means of simple word moves if this is
+   possible.  MODE and INNER_MODE are the modes describing TARGET.  Return true
+   on success.  */
+
 static bool
 vector_init_move_words (rtx target, rtx vals, enum machine_mode mode,
 			enum machine_mode inner_mode)
@@ -11417,11 +11433,13 @@  vector_init_move_words (rtx target, rtx
   return false;
 }
 
-/* Move the elements in rtvec VALS into registers compatible with MODE.
-   Store the rtx for these regs into the corresponding array entry of
-   LOCS.  */
+/* Subroutine of sparc_expand_vector_init.  Move the N_ELTS elements in VALS
+   into registers compatible with MODE and INNER_MODE.  Store the RTX for
+   these regs into the corresponding array entry of LOCS.  */
+
 static void
-vector_init_prepare_elts (rtx vals, int n_elts, rtx *locs, enum machine_mode mode,
+vector_init_prepare_elts (rtx *locs, rtx vals, int n_elts,
+			  enum machine_mode mode,
 			  enum machine_mode inner_mode)
 {
   enum machine_mode loc_mode;
@@ -11494,7 +11512,9 @@  vector_init_prepare_elts (rtx vals, int
 		}
 	      else if (code != MEM)
 		{
-		  rtx stk = assign_stack_temp (inner_mode, GET_MODE_SIZE(inner_mode), 0);
+		  rtx stk
+		    = assign_stack_temp (inner_mode, GET_MODE_SIZE(inner_mode),
+					 0);
 		  emit_move_insn (stk, elt);
 		  m = stk;
 		}
@@ -11516,6 +11536,11 @@  vector_init_prepare_elts (rtx vals, int
     }
 }
 
+/* Subroutine of sparc_expand_vector_init.  Emit code to initialize TARGET to
+   the N_ELTS values for individual fields contained in LOCS by means of VIS2
+   instructions, among which N_UNIQUE are unique.  MODE and INNER_MODE are the
+   modes describing TARGET.  */
+
 static void
 sparc_expand_vector_init_vis2 (rtx target, rtx *locs, int n_elts, int n_unique,
 			       enum machine_mode mode,
@@ -11541,6 +11566,10 @@  sparc_expand_vector_init_vis2 (rtx targe
     }
 }
 
+/* Subroutine of sparc_expand_vector_init.  Emit code to initialize TARGET to
+   the N_ELTS values for individual fields contained in LOCS by means of VIS1
+   instructions, among which N_UNIQUE are unique.  MODE is TARGET's mode.  */
+
 static void
 sparc_expand_vector_init_vis1 (rtx target, rtx *locs, int n_elts, int n_unique,
 			       enum machine_mode mode)
@@ -11609,6 +11638,8 @@  sparc_expand_vector_init_vis1 (rtx targe
     emit_move_insn (target, gen_highpart (mode, tmp));
 }
 
+/* Emit code to initialize TARGET to values for individual fields VALS.  */
+
 void
 sparc_expand_vector_init (rtx target, rtx vals)
 {
@@ -11650,7 +11681,7 @@  sparc_expand_vector_init (rtx target, rt
   if (vector_init_move_words (target, vals, mode, inner_mode))
     return;
 
-  vector_init_prepare_elts (vals, n_elts, locs, mode, inner_mode);
+  vector_init_prepare_elts (locs, vals, n_elts, mode, inner_mode);
 
   if (TARGET_VIS2)
     sparc_expand_vector_init_vis2 (target, locs, n_elts, n_unique,
@@ -11659,6 +11690,8 @@  sparc_expand_vector_init (rtx target, rt
     sparc_expand_vector_init_vis1 (target, locs, n_elts, n_unique, mode);
 }
 
+/* Implement TARGET_SECONDARY_RELOAD.  */
+
 static reg_class_t
 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
 			enum machine_mode mode, secondary_reload_info *sri)
@@ -11722,6 +11755,9 @@  sparc_secondary_reload (bool in_p, rtx x
   return NO_REGS;
 }
 
+/* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
+   OPERANDS[0] in MODE.  OPERANDS[1] is the operator of the condition.  */
+
 bool
 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
 {
@@ -11777,6 +11813,12 @@  sparc_expand_conditional_move (enum mach
   return true;
 }
 
+/* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
+   into OPERANDS[0] in MODE, depending on the outcome of the comparison of
+   OPERANDS[4] and OPERANDS[5].  OPERANDS[3] is the operator of the condition.
+   FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
+   code to be used for the condition mask.  */
+
 void
 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
 {