diff mbox

[SH] Implement builtin_strlen

Message ID 5278DE2D.9020604@st.com
State New
Headers show

Commit Message

Christian Bruel Nov. 5, 2013, 12:01 p.m. UTC
Hello,

This patch inlines strlen when optimizing for speed.

A strlen body is now inlined as:

        mov     r4,r0
        tst     #3,r0
        bf/s    .L6
        mov     r4,r1
        mov     #0,r3
.L4:
        mov.l   @r1+,r2
        cmp/str r3,r2
        bf      .L4
        add     #-4,r1
.L6:
        mov.b   @r1+,r2
        tst     r2,r2
        bf/s    .L6
        sett
        mov     r1,r0
        rts
        subc    r4,r0

A few percent performance improvement here and there for regexp based
benchmarks, but worth to highlight is a 70% speedup for eembc
networking/qos that now nicely combines sequences like !strncmp(*av,
"any", strlen(*av))

No regressions for sh-none-elf. OK for trunk ?

Many thanks

Christian

Comments

Kaz Kojima Nov. 5, 2013, 1:12 p.m. UTC | #1
Christian Bruel <christian.bruel@st.com> wrote:
> No regressions for sh-none-elf. OK for trunk ?

OK.

Regards,
	kaz
Christian Bruel Nov. 6, 2013, 8:52 a.m. UTC | #2
On 11/05/2013 02:12 PM, Kaz Kojima wrote:
> Christian Bruel <christian.bruel@st.com> wrote:
>> No regressions for sh-none-elf. OK for trunk ?
> OK.
>
> Regards,
> 	kaz

thanks, applied together with the cleanup referenced earlier and a
slight variable renaming (start_addr->curr_addr, end_addr->start_addr)
for readability as obvious,

Christian
Eric Botcazou Nov. 6, 2013, 10:57 a.m. UTC | #3
> thanks, applied together with the cleanup referenced earlier and a
> slight variable renaming (start_addr->curr_addr, end_addr->start_addr)
> for readability as obvious,

Minor nit: no gcc/ prefix in gcc/ChangeLog.
Christian Bruel Nov. 6, 2013, 11:33 a.m. UTC | #4
On 11/06/2013 11:57 AM, Eric Botcazou wrote:
>> thanks, applied together with the cleanup referenced earlier and a
>> slight variable renaming (start_addr->curr_addr, end_addr->start_addr)
>> for readability as obvious,
> Minor nit: no gcc/ prefix in gcc/ChangeLog.
ok I'll didn't notice this.,  do you mind that I cleanup the other
entries in the same time ?

        * gcc/config/sh/sh-mem.cc (sh_expand_cmpnstr, sh_expand_cmpstr):
        * gcc/config/sh/sh-protos.h (sh_expand_strlen): Declare.
        * gcc/config/sh/sh.md (strlensi): New pattern.
        * gcc/config/i386/i386.c (memory_address_length): Extract a part
        * gcc/config/i386/i386.h: Add new tune features about macro-fusion.
        * gcc/config/i386/x86-tune.def (DEF_TUNE): Ditto.
        * gcc/doc/tm.texi: Generated.
        * gcc/doc/tm.texi.in: Ditto.
        * gcc/haifa-sched.c (try_group_insn): New Function.
        * gcc/target.def: Add two hooks: macro_fusion_p and
        * gcc/config/i386/i386-c.c (ix86_target_macros_internal): Separate
        * gcc/config/i386/i386.c (ix86_option_override_internal): Ditto.
        * gcc/config/i386/i386.h (enum target_cpu_default): Ditto.
        * gcc/config/i386/x86-tune.def (DEF_TUNE): Ditto.
        * gcc/tree-cfg.c (replace_loop_annotate): Replace warning by
        * gcc/config/sh/sh-mem.cc (sh_expand_cmpnstr): New function.
        * gcc/config/sh/sh-protos.h (sh_expand_cmpstrn): Declare.
        * gcc/config/sh/sh.md (cmpstrnsi): New pattern.
        * gcc/config/arc/arc.c (arc_ccfsm_post_advance): Also handle
        * gcc/config/sh/t-sh (sh-mem.o): New rule.
        * gcc/config/sh/sh-mem.cc (expand_block_move): Moved here.
        * gcc/config/sh/sh.c (force_into, expand_block_move): Move to
sh-mem.c.
        * gcc/config/sh/sh-protos.h (sh_expand_cmpstr): Declare.
        * gcc/config/sh/sh.md (cmpstrsi, cmpstr_t): New patterns.
        * gcc/config/rs6000/vector.md (vec_unpacks_hi_v4sf): Correct for
        * gcc/config/arm/vfp.md (movhf_vfp_neon): Update type attribute.
        * gcc/config/arm/neon.md (neon_mov): Update type attribute.

thanks

Christian
Eric Botcazou Nov. 6, 2013, 11:56 a.m. UTC | #5
> ok I'll didn't notice this.,  do you mind that I cleanup the other
> entries in the same time ?

I already cleaned up some, but go ahead.
diff mbox

Patch

2013-11-05  Christian Bruel  <christian.bruel@st.com>

	* gcc/config/sh/sh-mem.cc (sh_expand_strlen): New function.
	* gcc/config/sh/sh-protos.h (sh_expand_strlen): Declare.
	* gcc/config/sh/sh.md (strlensi): New pattern.
	(UNSPEC_BUILTIN_STRLEN): Define.

2013-11-05  Christian Bruel  <christian.bruel@st.com>

	* gcc.target/sh/strlen.c: New test.

diff --exclude='*~' --exclude=.svn -ruN gcc/config/sh/sh.md ../../gnu_trunk.test/gcc/gcc/config/sh/sh.md
--- gcc/config/sh/sh.md	2013-11-05 12:28:38.000000000 +0100
+++ ../../gnu_trunk.test/gcc/gcc/config/sh/sh.md	2013-11-05 11:16:00.000000000 +0100
@@ -161,6 +161,9 @@ 
   ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
   (UNSPEC_PCREL_SYMOFF	46)
 
+  ;; Misc builtins
+  (UNSPEC_BUILTIN_STRLEN 47)
+
   ;; These are used with unspec_volatile.
   (UNSPECV_BLOCKAGE	0)
   (UNSPECV_ALIGN	1)
@@ -12081,6 +12084,20 @@ 
     FAIL;
 })
 
+(define_expand "strlensi"
+  [(set (match_operand:SI 0 "register_operand")
+	(unspec:SI [(match_operand:BLK 1 "memory_operand")
+		   (match_operand:SI 2 "immediate_operand")
+		   (match_operand:SI 3 "immediate_operand")]
+		  UNSPEC_BUILTIN_STRLEN))]
+  "TARGET_SH1 && optimize"
+{
+ if (! optimize_insn_for_size_p () && sh_expand_strlen (operands))
+   DONE;
+ else
+   FAIL;
+})
+
 
 ;; -------------------------------------------------------------------------
 ;; Floating point instructions.
diff --exclude='*~' --exclude=.svn -ruN gcc/config/sh/sh-mem.cc ../../gnu_trunk.test/gcc/gcc/config/sh/sh-mem.cc
--- gcc/config/sh/sh-mem.cc	2013-11-05 12:30:33.000000000 +0100
+++ ../../gnu_trunk.test/gcc/gcc/config/sh/sh-mem.cc	2013-11-04 15:34:05.000000000 +0100
@@ -469,3 +469,83 @@ 
 
   return true;
 }
+
+/* Emit code to perform a strlen
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the string.
+   OPERANDS[2] is the char to search.
+   OPERANDS[3] is the alignment.  */
+bool
+sh_expand_strlen (rtx *operands)
+{
+  rtx addr1 = operands[1];
+  rtx start_addr = copy_addr_to_reg (XEXP (addr1, 0));
+  rtx end_addr = gen_reg_rtx (Pmode);
+  rtx tmp0 = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (SImode);
+  rtx L_return = gen_label_rtx ();
+  rtx L_loop_byte = gen_label_rtx ();
+
+  rtx jump;
+  rtx L_loop_long = gen_label_rtx ();
+  rtx L_end_loop_long = gen_label_rtx ();
+
+  int align = INTVAL (operands[3]);
+
+  emit_move_insn (operands[0], GEN_INT (-1));
+
+  /* remember start of string.  */
+  emit_move_insn (end_addr, start_addr);
+
+  if (align < 4)
+    {
+      emit_insn (gen_tstsi_t (GEN_INT (3), start_addr));
+      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+    }
+
+  emit_move_insn (tmp0, operands[2]);
+
+  addr1 = adjust_automodify_address (addr1, SImode, start_addr, 0);
+
+  /*start long loop.  */
+  emit_label (L_loop_long);
+
+  /* tmp1 is aligned, OK to load.  */
+  emit_move_insn (tmp1, addr1);
+  emit_move_insn (start_addr, plus_constant (Pmode, start_addr, 4));
+
+  /* Is there a 0 byte ?  */
+  emit_insn (gen_cmpstr_t (tmp0, tmp1));
+
+  jump = emit_jump_insn (gen_branch_false (L_loop_long));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+  /* end loop.  */
+
+  emit_label (L_end_loop_long);
+
+  emit_move_insn (start_addr, plus_constant (Pmode, start_addr, -4));
+
+  /* start byte loop.  */
+  addr1 = adjust_address (addr1, QImode, 0);
+
+  emit_label (L_loop_byte);
+
+  emit_insn (gen_extendqisi2 (tmp1, addr1));
+  emit_move_insn (start_addr, plus_constant (Pmode, start_addr, 1));
+
+  emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
+  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+
+  /* end loop.  */
+
+  emit_label (L_return);
+
+  emit_insn (gen_addsi3 (end_addr, end_addr, GEN_INT (1)));
+
+  emit_insn (gen_subsi3 (operands[0], start_addr, end_addr));
+
+  return true;
+}
diff --exclude='*~' --exclude=.svn -ruN gcc/config/sh/sh-protos.h ../../gnu_trunk.test/gcc/gcc/config/sh/sh-protos.h
--- gcc/config/sh/sh-protos.h	2013-11-05 12:47:44.000000000 +0100
+++ ../../gnu_trunk.test/gcc/gcc/config/sh/sh-protos.h	2013-11-05 10:14:48.000000000 +0100
@@ -118,6 +118,7 @@ 
 extern void prepare_move_operands (rtx[], enum machine_mode mode);
 extern bool sh_expand_cmpstr (rtx *);
 extern bool sh_expand_cmpnstr (rtx *);
+extern bool sh_expand_strlen  (rtx *);
 extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode,
 					       enum rtx_code comparison);
 extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int);
diff --exclude='*~' --exclude=.svn -ruN gcc/testsuite/ChangeLog ../../gnu_trunk.test/gcc/gcc/testsuite/ChangeLog
diff --exclude='*~' --exclude=.svn -ruN gcc/testsuite/gcc.target/sh/strlen.c ../../gnu_trunk.test/gcc/gcc/testsuite/gcc.target/sh/strlen.c
--- gcc/testsuite/gcc.target/sh/strlen.c	1970-01-01 01:00:00.000000000 +0100
+++ ../../gnu_trunk.test/gcc/gcc/testsuite/gcc.target/sh/strlen.c	2013-11-05 09:45:43.000000000 +0100
@@ -0,0 +1,19 @@ 
+/* Check that the __builtin_strlen function is inlined with cmp/str
+   when optimizing for speed.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */
+/* { dg-final { scan-assembler-not "jmp" } } */
+/* { dg-final { scan-assembler-times "cmp/str" 2 } } */
+/* { dg-final { scan-assembler-times "tst\t#3" 1 } } */
+
+test00 (const char *s1)
+{
+  return __builtin_strlen (s1);
+}
+
+/* Check that no test for alignment is needed.  */
+test03(const char *s1)
+{
+  return __builtin_strlen (__builtin_assume_aligned (s1, 4));
+}