diff mbox series

[MSP430] Use hardware multiply routine to perform HImode widening multiplication (mulhisi3)

Message ID 20191023183724.3df86a4c@jozef-kubuntu
State New
Headers show
Series [MSP430] Use hardware multiply routine to perform HImode widening multiplication (mulhisi3) | expand

Commit Message

Jozef Lawrynowicz Oct. 23, 2019, 5:37 p.m. UTC
For MSP430 in some configurations, GCC will generate code for mulhisi3 by
inserting instructions to widen each 16-bit operand before calling a library
routine for mulsi3.
However, there exists a hardware multiply routine to perform this widening
multiplication, but it is only made use of at -O3 where it is inserted
inline into program.

We can reduce code size and improve performance by always calling the mspabi
helper function to perform this widening multiplication when hardware multiply
is available. 

I also benchmarked the effect of using a library function for mulsidi3
but it resulted in slower execution both with and without hardware multiply
support. It also increased code size for a variety of programs.

Successfully regtested on trunk.

Additionally regtested msp430.exp at -O1, -O2, -O3 and -Os.
There are tests which check each supported hardware multiply option
executes correctly, so running at these optimization levels verifies the changes
in this patch.

Ok for trunk?

Comments

Jeff Law Oct. 27, 2019, 3:32 p.m. UTC | #1
On 10/23/19 11:37 AM, Jozef Lawrynowicz wrote:
> For MSP430 in some configurations, GCC will generate code for mulhisi3 by
> inserting instructions to widen each 16-bit operand before calling a library
> routine for mulsi3.
> However, there exists a hardware multiply routine to perform this widening
> multiplication, but it is only made use of at -O3 where it is inserted
> inline into program.
> 
> We can reduce code size and improve performance by always calling the mspabi
> helper function to perform this widening multiplication when hardware multiply
> is available. 
> 
> I also benchmarked the effect of using a library function for mulsidi3
> but it resulted in slower execution both with and without hardware multiply
> support. It also increased code size for a variety of programs.
> 
> Successfully regtested on trunk.
> 
> Additionally regtested msp430.exp at -O1, -O2, -O3 and -Os.
> There are tests which check each supported hardware multiply option
> executes correctly, so running at these optimization levels verifies the changes
> in this patch.
> 
> Ok for trunk?
> 
> 
> 0001-MSP430-Use-mspabi-helper-function-to-perform-HImode-.patch
> 
> From 695ae0e560396034bc1fc2e9d9e601ab7b3d901b Mon Sep 17 00:00:00 2001
> From: Jozef Lawrynowicz <jozef.l@mittosystems.com>
> Date: Wed, 23 Oct 2019 13:19:45 +0100
> Subject: [PATCH] MSP430: Use mspabi helper function to perform HImode widening
>  multiplication
> 
> gcc/ChangeLog:
> 
> 2019-10-23  Jozef Lawrynowicz  <jozef.l@mittosystems.com>
> 
> 	* config/msp430/msp430.c (msp430_expand_helper): Support expansion of
> 	calls to __mspabi_mpy* functions.
> 	* config/msp430/msp430.md (mulhisi3): New define_expand.
> 	(umulhisi3): New define_expand.
> 	(*mulhisi3_inline): Use old mulhisi3 define_insn.
> 	(*umulhisi3_inline): Use old umulhisi3 define_insn.
OK
jeff
diff mbox series

Patch

From 695ae0e560396034bc1fc2e9d9e601ab7b3d901b Mon Sep 17 00:00:00 2001
From: Jozef Lawrynowicz <jozef.l@mittosystems.com>
Date: Wed, 23 Oct 2019 13:19:45 +0100
Subject: [PATCH] MSP430: Use mspabi helper function to perform HImode widening
 multiplication

gcc/ChangeLog:

2019-10-23  Jozef Lawrynowicz  <jozef.l@mittosystems.com>

	* config/msp430/msp430.c (msp430_expand_helper): Support expansion of
	calls to __mspabi_mpy* functions.
	* config/msp430/msp430.md (mulhisi3): New define_expand.
	(umulhisi3): New define_expand.
	(*mulhisi3_inline): Use old mulhisi3 define_insn.
	(*umulhisi3_inline): Use old umulhisi3 define_insn.
---
 gcc/config/msp430/msp430.c  | 69 ++++++++++++++++++++++++++++++-------
 gcc/config/msp430/msp430.md | 46 +++++++++++++++++++++++--
 2 files changed, 101 insertions(+), 14 deletions(-)

diff --git a/gcc/config/msp430/msp430.c b/gcc/config/msp430/msp430.c
index cd394333983..8a5579f8bce 100644
--- a/gcc/config/msp430/msp430.c
+++ b/gcc/config/msp430/msp430.c
@@ -53,6 +53,7 @@ 
 
 
 static void msp430_compute_frame_info (void);
+static bool use_32bit_hwmult (void);
 
 
 
@@ -2710,7 +2711,7 @@  void
 msp430_expand_helper (rtx *operands, const char *helper_name,
 		      bool const_variants)
 {
-  rtx c, f;
+  rtx c, fusage, fsym;
   char *helper_const = NULL;
   int arg1 = 12;
   int arg2 = 13;
@@ -2719,8 +2720,14 @@  msp430_expand_helper (rtx *operands, const char *helper_name,
   machine_mode arg1mode = GET_MODE (operands[1]);
   machine_mode arg2mode = GET_MODE (operands[2]);
   int have_430x = msp430x ? 1 : 0;
+  int expand_mpy = strncmp (helper_name, "__mspabi_mpy",
+			    sizeof ("__mspabi_mpy") - 1) == 0;
+  /* This function has been used incorrectly if CONST_VARIANTS is TRUE for a
+     hwmpy function.  */
+  gcc_assert (!(expand_mpy && const_variants));
 
-  if (CONST_INT_P (operands[2]))
+  /* Emit size-optimal insns for small shifts we can easily do inline.  */
+  if (CONST_INT_P (operands[2]) && !expand_mpy)
     {
       int i;
 
@@ -2737,6 +2744,10 @@  msp430_expand_helper (rtx *operands, const char *helper_name,
 	}
     }
 
+  if (arg1mode != VOIDmode && arg2mode != VOIDmode)
+    /* Modes of arguments must be equal if not constants.  */
+    gcc_assert (arg1mode == arg2mode);
+
   if (arg1mode == VOIDmode)
     arg1mode = arg0mode;
   if (arg2mode == VOIDmode)
@@ -2749,12 +2760,13 @@  msp430_expand_helper (rtx *operands, const char *helper_name,
     }
   else if (arg1mode == DImode)
     {
-      /* Shift value in R8:R11, shift amount in R12.  */
       arg1 = 8;
       arg1sz = 4;
       arg2 = 12;
     }
 
+  /* Use the "const_variant" of a shift library function if requested.
+     These are faster, but have larger code size.  */
   if (const_variants
       && CONST_INT_P (operands[2])
       && INTVAL (operands[2]) >= 1
@@ -2768,25 +2780,58 @@  msp430_expand_helper (rtx *operands, const char *helper_name,
 		(int) INTVAL (operands[2]));
     }
 
+  /* Setup the arguments to the helper function.  */
   emit_move_insn (gen_rtx_REG (arg1mode, arg1),
 		  operands[1]);
   if (!helper_const)
     emit_move_insn (gen_rtx_REG (arg2mode, arg2),
 		    operands[2]);
 
-  c = gen_call_value_internal (gen_rtx_REG (arg0mode, 12),
-			       gen_rtx_SYMBOL_REF (VOIDmode, helper_const
-						   ? helper_const
-						   : helper_name),
-			       GEN_INT (0));
+  if (expand_mpy)
+    {
+      if (msp430_use_f5_series_hwmult ())
+	fsym = gen_rtx_SYMBOL_REF (VOIDmode, concat (helper_name,
+						     "_f5hw", NULL));
+      else if (use_32bit_hwmult ())
+	{
+	  /* When the arguments are 16-bits, the 16-bit hardware multiplier is
+	     used.  */
+	  if (arg1mode == HImode)
+	    fsym = gen_rtx_SYMBOL_REF (VOIDmode, concat (helper_name,
+							 "_hw", NULL));
+	  else
+	    fsym = gen_rtx_SYMBOL_REF (VOIDmode, concat (helper_name,
+							 "_hw32", NULL));
+	}
+      /* 16-bit hardware multiply.  */
+      else if (msp430_has_hwmult ())
+	fsym = gen_rtx_SYMBOL_REF (VOIDmode, concat (helper_name,
+						     "_hw", NULL));
+      else
+	fsym = gen_rtx_SYMBOL_REF (VOIDmode, helper_name);
+    }
+  else
+    fsym = gen_rtx_SYMBOL_REF (VOIDmode,
+			       helper_const ? helper_const : helper_name);
+
+  c = gen_call_value_internal (gen_rtx_REG (arg0mode, 12), fsym, GEN_INT (0));
+
   c = emit_call_insn (c);
   RTL_CONST_CALL_P (c) = 1;
 
-  f = 0;
-  use_regs (&f, arg1, arg1sz);
+  /* Add register usage information for the arguments to the call.  */
+  fusage = NULL;
+  use_regs (&fusage, arg1, arg1sz);
   if (!helper_const)
-    use_regs (&f, arg2, 1);
-  add_function_usage_to (c, f);
+    {
+      /* If we are expanding a shift, we only need to use the low register
+	 for the shift amount.  */
+      if (!expand_mpy)
+	use_regs (&fusage, arg2, 1);
+      else
+	use_regs (&fusage, arg2, arg1sz);
+    }
+  add_function_usage_to (c, fusage);
 
   emit_move_insn (operands[0],
 		  /* Return value will always start in R12.  */
diff --git a/gcc/config/msp430/msp430.md b/gcc/config/msp430/msp430.md
index e5ba445c60d..9826017b1ef 100644
--- a/gcc/config/msp430/msp430.md
+++ b/gcc/config/msp430/msp430.md
@@ -1638,7 +1638,49 @@ 
   "NOP"
   )
 
-(define_insn "mulhisi3"
+; libgcc helper functions for widening multiplication aren't currently
+; generated by gcc, so we can't catch them later and map them to the mspabi
+; functions.
+; We catch the patterns here and either generate a call to the helper function,
+; or emit the hardware multiply instruction sequence inline.
+;
+; If we don't have hardware multiply support, it will generally be slower and
+; result in larger code to call the mspabi library function to perform the
+; widening multiplication than just leaving GCC to widen the arguments itself.
+;
+; We don't use library functions for SImode->DImode widening since its always
+; larger and slower than letting GCC widen the arguments inline.
+(define_expand "mulhisi3"
+  [(set (match_operand:SI			   0 "register_operand" "=r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "msp430_has_hwmult ()"
+  {
+    /* Leave the other case for the inline insn.  */
+    if (!(optimize > 2 && msp430_has_hwmult ()))
+    {
+      msp430_expand_helper (operands, "__mspabi_mpysl", false);
+      DONE;
+    }
+  }
+)
+
+(define_expand "umulhisi3"
+  [(set (match_operand:SI			   0 "register_operand" "=r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "msp430_has_hwmult ()"
+  {
+    /* Leave the other case for the inline insn.  */
+    if (!(optimize > 2 && msp430_has_hwmult ()))
+    {
+      msp430_expand_helper (operands, "__mspabi_mpyul", false);
+      DONE;
+    }
+  }
+)
+
+(define_insn "*mulhisi3_inline"
   [(set (match_operand:SI                          0 "register_operand" "=r")
 	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%0"))
 		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
@@ -1651,7 +1693,7 @@ 
   "
 )
 
-(define_insn "umulhisi3"
+(define_insn "*umulhisi3_inline"
   [(set (match_operand:SI                          0 "register_operand" "=r")
 	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%0"))
 		 (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
-- 
2.17.1