[msp430] add __delay_cycles() builtin
diff mbox

Message ID 201405142130.s4ELUFp7008593@greed.delorie.com
State New
Headers show

Commit Message

DJ Delorie May 14, 2014, 9:30 p.m. UTC
Adds a new __delay_cycles() builtin to the msp430 backend.  Committed.

	* config/msp430/msp430.c (msp430_builtin): Add
	MSP430_BUILTIN_DELAY_CYCLES.
	(msp430_init_builtins): Register void __delay_cycles(long long).
	(msp430_builtin_decl): Add it.
	(cg_magic_constant): New.
	(msp430_expand_delay_cycles): New.
	(msp430_expand_builtin): Call it.
	(msp430_print_operand_raw): Change integer printing from "int" to
	HOST_WIDE_INT.
	* config/msp430/msp430.md (define_constants): Add delay_cycles tags.
	(delay_cycles_start): New.
	(delay_cycles_end): New.
	(delay_cycles_32): New.
	(delay_cycles_32x): New.
	(delay_cycles_16): New.
	(delay_cycles_16x): New.
	(delay_cycles_2): New.
	(delay_cycles_1): New.
	* doc/extend.texi: Document __delay_cycles().

Patch
diff mbox

Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi	(revision 210437)
+++ gcc/doc/extend.texi	(working copy)
@@ -13191,12 +13191,21 @@  once the handler returns.
 
 @item __bis_SR_register_on_exit (int @var{mask})
 This sets the indicated bits in the saved copy of the status register
 currently residing on the stack.  This only works inside interrupt
 handlers and the changes to the status register will only take affect
 once the handler returns.
+
+@item __delay_cycles (long long @var{cycles})
+This inserts an instruction sequence that takes exactly @var{cycles}
+cycles (between 0 and about 17E9) to complete.  The inserted sequence
+may use jumps, loops, or no-ops, and does not interfere with any other
+instructions.  Note that @var{cycles} must be a compile-time constant
+integer - that is, you must pass a number, not a variable that may be
+optimized to a constant later.  The number of cycles delayed by this
+builtin is exact.
 @end table
 
 @node NDS32 Built-in Functions
 @subsection NDS32 Built-in Functions
 
 These built-in functions are available for the NDS32 target:
Index: gcc/config/msp430/msp430.md
===================================================================
--- gcc/config/msp430/msp430.md	(revision 210437)
+++ gcc/config/msp430/msp430.md	(working copy)
@@ -44,12 +44,21 @@ 
    UNS_PUSH_INTR
    UNS_POP_INTR
    UNS_BIC_SR
    UNS_BIS_SR
 
    UNS_REFSYM_NEED_EXIT
+
+   UNS_DELAY_32
+   UNS_DELAY_32X
+   UNS_DELAY_16
+   UNS_DELAY_16X
+   UNS_DELAY_2
+   UNS_DELAY_1
+   UNS_DELAY_START
+   UNS_DELAY_END
   ])
 
 (include "predicates.md")
 (include "constraints.md")
 
 (define_mode_iterator QHI [QI HI PSI])
@@ -1314,12 +1323,96 @@ 
       return \"MOV.W\t%1, %0 { SUB.W\t#0, %0 { AND.W\t%2, %0\";
     else
       return \"SUB.W\t#0, %0 { AND.W\t%2, %0\";
   "
   )
 
+(define_insn "delay_cycles_start"
+  [(unspec_volatile [(match_operand 0 "immediate_operand" "i")]
+		    UNS_DELAY_START)]
+  ""
+  "; Begin %J0 cycle delay"
+  )
+
+(define_insn "delay_cycles_end"
+  [(unspec_volatile [(match_operand 0 "immediate_operand" "i")]
+		    UNS_DELAY_END)]
+  ""
+  "; End %J0 cycle delay"
+  )
+
+(define_insn "delay_cycles_32"
+  [(unspec_volatile [(match_operand 0 "immediate_operand" "i")
+		     (match_operand 1 "immediate_operand" "i")
+		     ] UNS_DELAY_32)]
+  ""
+  "PUSH	r13
+	PUSH	r14
+	MOV.W	%A0, r13
+	MOV.W	%B0, r14
+1:	SUB.W	#1, r13
+	SUBC.W	#0, r14
+	JNE	1b
+	TST.W	r13
+	JNE	1b
+	POP	r14
+	POP	r13"
+  )
+
+(define_insn "delay_cycles_32x"
+  [(unspec_volatile [(match_operand 0 "immediate_operand" "i")
+		     (match_operand 1 "immediate_operand" "i")
+		     ] UNS_DELAY_32X)]
+  ""
+  "PUSHM.A	#2,r13
+	MOV.W	%A0, r13
+	MOV.W	%B0, r14
+1:	SUB.W	#1, r13
+	SUBC.W	#0, r14
+	JNE	1b
+	TST.W	r13
+	JNE	1b
+	POPM.A	#2,r13"
+  )
+
+(define_insn "delay_cycles_16"
+  [(unspec_volatile [(match_operand 0 "immediate_operand" "i")
+		     (match_operand 1 "immediate_operand" "i")
+		     ] UNS_DELAY_16)]
+  ""
+  "PUSH	r13
+	MOV.W	%0, r13
+1:	SUB.W	#1, r13
+	JNE	1b
+	POP	r13"
+  )
+
+(define_insn "delay_cycles_16x"
+  [(unspec_volatile [(match_operand 0 "immediate_operand" "i")
+		     (match_operand 1 "immediate_operand" "i")
+		     ] UNS_DELAY_16X)]
+  ""
+  "PUSHM.A	#1,r13
+	MOV.W	%0, r13
+1:	SUB.W	#1, r13
+	JNE	1b
+	POPM.A	#1,r13"
+  )
+
+(define_insn "delay_cycles_2"
+  [(unspec_volatile [(const_int 0) ] UNS_DELAY_2)]
+  ""
+  "JMP	.+2"
+  )
+
+(define_insn "delay_cycles_1"
+  [(unspec_volatile [(const_int 0) ] UNS_DELAY_1)]
+  ""
+  "NOP"
+  )
+
 (define_insn "mulhisi3"
   [(set (match_operand:SI                          0 "register_operand" "=r")
 	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%0"))
 		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
   "optimize > 2 && msp430_hwmult_type != NONE"
   "*
Index: gcc/config/msp430/msp430.c
===================================================================
--- gcc/config/msp430/msp430.c	(revision 210437)
+++ gcc/config/msp430/msp430.c	(working copy)
@@ -1191,55 +1191,178 @@  msp430_function_section (tree decl, enum
 #define TARGET_ASM_FUNCTION_SECTION msp430_function_section
 
 enum msp430_builtin
 {
   MSP430_BUILTIN_BIC_SR,
   MSP430_BUILTIN_BIS_SR,
+  MSP430_BUILTIN_DELAY_CYCLES,
   MSP430_BUILTIN_max
 };
 
 static GTY(()) tree msp430_builtins [(int) MSP430_BUILTIN_max];
 
 static void
 msp430_init_builtins (void)
 {
   tree void_ftype_int = build_function_type_list (void_type_node, integer_type_node, NULL);
+  tree void_ftype_longlong = build_function_type_list (void_type_node, long_long_integer_type_node, NULL);
 
   msp430_builtins[MSP430_BUILTIN_BIC_SR] =
     add_builtin_function ( "__bic_SR_register_on_exit", void_ftype_int,
 			   MSP430_BUILTIN_BIC_SR, BUILT_IN_MD, NULL, NULL_TREE);
 
   msp430_builtins[MSP430_BUILTIN_BIS_SR] =
     add_builtin_function ( "__bis_SR_register_on_exit", void_ftype_int,
 			   MSP430_BUILTIN_BIS_SR, BUILT_IN_MD, NULL, NULL_TREE);
+
+  msp430_builtins[MSP430_BUILTIN_DELAY_CYCLES] =
+    add_builtin_function ( "__delay_cycles", void_ftype_longlong,
+			   MSP430_BUILTIN_DELAY_CYCLES, BUILT_IN_MD, NULL, NULL_TREE);
 }
 
 static tree
 msp430_builtin_decl (unsigned code, bool initialize ATTRIBUTE_UNUSED)
 {
   switch (code)
     {
     case MSP430_BUILTIN_BIC_SR:
     case MSP430_BUILTIN_BIS_SR:
+    case MSP430_BUILTIN_DELAY_CYCLES:
       return msp430_builtins[code];
     default:
       return error_mark_node;
     }
 }
 
+/* These constants are really register reads, which are faster than
+   regular constants.  */
+static int
+cg_magic_constant (HOST_WIDE_INT c)
+{
+  switch (c)
+    {
+    case 0xffff:
+    case -1:
+    case 0:
+    case 1:
+    case 2:
+    case 4:
+    case 8:
+      return 1;
+    default:
+      return 0;
+    }
+}
+
+static rtx
+msp430_expand_delay_cycles (rtx arg)
+{
+  HOST_WIDE_INT i, c, n;
+  /* extra cycles for MSP430X instructions */
+#define CYCX(M,X) (msp430x ? (X) : (M))
+
+  if (GET_CODE (arg) != CONST_INT)
+    {
+      error ("__delay_cycles() only takes constant arguments");
+      return NULL_RTX;
+    }
+
+  c = INTVAL (arg);
+
+  if (HOST_BITS_PER_WIDE_INT > 32)
+    {
+      if (c < 0)
+	{
+	  error ("__delay_cycles only takes non-negative cycle counts.");
+	  return NULL_RTX;
+	}
+    }
+
+  emit_insn (gen_delay_cycles_start (arg));
+
+  /* For 32-bit loops, there's 13(16) + 5(min(x,0x10000) + 6x cycles.  */
+  if (c > 3 * 0xffff + CYCX (7, 10))
+    {
+      n = c;
+      /* There's 4 cycles in the short (i>0xffff) loop and 7 in the long (x<=0xffff) loop */
+      if (c >= 0x10000 * 7 + CYCX (14, 16))
+	{
+	  i = 0x10000;
+	  c -= CYCX (14, 16) + 7 * 0x10000;
+	  i += c / 4;
+	  c %= 4;
+	  if ((unsigned long long) i > 0xffffffffULL)
+	    {
+	      error ("__delay_cycles is limited to 32-bit loop counts.");
+	      return NULL_RTX;
+	    }
+	}
+      else
+	{
+	  i = (c - CYCX (14, 16)) / 7;
+	  c -= CYCX (14, 16) + i * 7;
+	}
+
+      if (cg_magic_constant (i & 0xffff))
+	c ++;
+      if (cg_magic_constant ((i >> 16) & 0xffff))
+	c ++;
+
+      if (msp430x)
+	emit_insn (gen_delay_cycles_32x (GEN_INT (i), GEN_INT (n - c)));
+      else
+	emit_insn (gen_delay_cycles_32 (GEN_INT (i), GEN_INT (n - c)));
+    }
+
+  /* For 16-bit loops, there's 7(10) + 3x cycles - so the max cycles is 0x30004(7).  */
+  if (c > 12)
+    {
+      n = c;
+      i = (c - CYCX (7, 10)) / 3;
+      c -= CYCX (7, 10) + i * 3;
+
+      if (cg_magic_constant (i))
+	c ++;
+
+      if (msp430x)
+	emit_insn (gen_delay_cycles_16x (GEN_INT (i), GEN_INT (n - c)));
+      else
+	emit_insn (gen_delay_cycles_16 (GEN_INT (i), GEN_INT (n - c)));
+    }
+
+  while (c > 1)
+    {
+      emit_insn (gen_delay_cycles_2 ());
+      c -= 2;
+    }
+
+  if (c)
+    {
+      emit_insn (gen_delay_cycles_1 ());
+      c -= 1;
+    }
+
+  emit_insn (gen_delay_cycles_end (arg));
+
+  return NULL_RTX;
+}
+
 static rtx
 msp430_expand_builtin (tree exp,
 		       rtx target ATTRIBUTE_UNUSED,
 		       rtx subtarget ATTRIBUTE_UNUSED,
 		       enum machine_mode mode ATTRIBUTE_UNUSED,
 		       int ignore ATTRIBUTE_UNUSED)
 {
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
   rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
 
+  if (fcode == MSP430_BUILTIN_DELAY_CYCLES)
+    return msp430_expand_delay_cycles (arg1);
+
   if (! msp430_is_interrupt_func ())
     {
       error ("MSP430 builtin functions only work inside interrupt handlers");
       return NULL_RTX;
     }