diff mbox

[RFC] Patch candidate for PR81657

Message ID 20c1c720-dd5d-2b70-66d3-35131c63ea0f@suse.cz
State New
Headers show

Commit Message

Martin Liška Aug. 2, 2017, 8:58 a.m. UTC
Hi.

I've just sketched a patch for the PR. Well I'm not fully happy about
complexity of emit_block_move_hints function and I need to add another
logic. Any ideas how to make it more transparent? Maybe split it into
analysis function that will return decision and second that will just
do the decision? Maybe endp should be replaced with an enum to make it
more readable?

Ideas welcomed.

Thanks,
Martin
diff mbox

Patch

From 916fcf526ecac9eb73439cfee893e8447a1f3d53 Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Wed, 2 Aug 2017 10:56:03 +0200
Subject: [PATCH] Patch candidate.

---
 gcc/builtins.c         | 13 ++++++++++++-
 gcc/config/i386/i386.h |  3 +++
 gcc/defaults.h         |  7 +++++++
 gcc/doc/tm.texi        |  5 +++++
 gcc/doc/tm.texi.in     |  5 +++++
 gcc/expr.c             | 13 ++++++++++++-
 gcc/expr.h             |  4 +++-
 7 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 016f68d2cb6..f97655f3b3e 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -3485,13 +3485,24 @@  expand_builtin_memory_copy_args (tree dest, tree src, tree len,
   src_mem = get_memory_rtx (src, len);
   set_mem_align (src_mem, src_align);
 
+  bool is_move_done;
+
   /* Copy word part most expediently.  */
   dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx,
 				     CALL_EXPR_TAILCALL (exp)
 				     && (endp == 0 || target == const0_rtx)
 				     ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL,
 				     expected_align, expected_size,
-				     min_size, max_size, probable_max_size);
+				     min_size, max_size, probable_max_size,
+				     TARGET_HAS_FAST_MEMPCPY_ROUTINE
+				     && endp == 1,
+				     &is_move_done);
+
+  /* Bail out when a mempcpy call would be expanded as libcall and when
+     we have a target that provides a fast implementation
+     of mempcpy routine.  */
+  if (!is_move_done)
+    return NULL_RTX;
 
   if (dest_addr == 0)
     {
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index e8ae3e3b3cc..bf205f1345b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1964,6 +1964,9 @@  typedef struct ix86_args {
 
 #define CLEAR_RATIO(speed) ((speed) ? MIN (6, ix86_cost->move_ratio) : 2)
 
+/* C library provides fast implementation of mempcpy function.  */
+#define TARGET_HAS_FAST_MEMPCPY_ROUTINE 1
+
 /* Define if shifts truncate the shift count which implies one can
    omit a sign-extension or zero-extension of a shift count.
 
diff --git a/gcc/defaults.h b/gcc/defaults.h
index 7ad92d920f8..73325e93699 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -1346,6 +1346,13 @@  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define SET_RATIO(speed) MOVE_RATIO (speed)
 #endif
 
+/* By default do not generate libcall to mempcpy and rather use
+   libcall to memcpy and adjustment of return value.  */
+
+#ifndef TARGET_HAS_FAST_MEMPCPY_ROUTINE
+#define TARGET_HAS_FAST_MEMPCPY_ROUTINE 0
+#endif
+
 /* Supply a default definition for FUNCTION_ARG_PADDING:
    usually pad upward, but pad short args downward on
    big-endian machines.  */
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 23e85c7afea..af076356910 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6500,6 +6500,11 @@  optimized for speed rather than size.
 If you don't define this, it defaults to the value of @code{MOVE_RATIO}.
 @end defmac
 
+@defmac TARGET_HAS_FAST_MEMPCPY_ROUTINE
+By default do not generate libcall to mempcpy and rather use
+libcall to memcpy and adjustment of return value.
+@end defmac
+
 @defmac USE_LOAD_POST_INCREMENT (@var{mode})
 A C expression used to determine whether a load postincrement is a good
 thing to use for a given mode.  Defaults to the value of
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 6df08a2c477..334258dd59d 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4731,6 +4731,11 @@  optimized for speed rather than size.
 If you don't define this, it defaults to the value of @code{MOVE_RATIO}.
 @end defmac
 
+@defmac TARGET_HAS_FAST_MEMPCPY_ROUTINE
+By default do not generate libcall to mempcpy and rather use
+libcall to memcpy and adjustment of return value.
+@end defmac
+
 @defmac USE_LOAD_POST_INCREMENT (@var{mode})
 A C expression used to determine whether a load postincrement is a good
 thing to use for a given mode.  Defaults to the value of
diff --git a/gcc/expr.c b/gcc/expr.c
index b194866313d..b5b73ae12ca 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -1548,12 +1548,16 @@  emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
 		       unsigned int expected_align, HOST_WIDE_INT expected_size,
 		       unsigned HOST_WIDE_INT min_size,
 		       unsigned HOST_WIDE_INT max_size,
-		       unsigned HOST_WIDE_INT probable_max_size)
+		       unsigned HOST_WIDE_INT probable_max_size,
+		       bool bail_out_libcall, bool *is_move_done)
 {
   bool may_use_call;
   rtx retval = 0;
   unsigned int align;
 
+  if (is_move_done)
+    *is_move_done = true;
+
   gcc_assert (size);
   if (CONST_INT_P (size) && INTVAL (size) == 0)
     return 0;
@@ -1610,6 +1614,13 @@  emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
 	   && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))
 	   && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (y)))
     {
+      if (bail_out_libcall)
+	{
+	  if (is_move_done)
+	    *is_move_done = false;
+	  return retval;
+	}
+
       /* Since x and y are passed to a libcall, mark the corresponding
 	 tree EXPR as addressable.  */
       tree y_expr = MEM_EXPR (y);
diff --git a/gcc/expr.h b/gcc/expr.h
index b92ff3ce8a6..1cc82b9b00e 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -110,7 +110,9 @@  extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
 			          unsigned int, HOST_WIDE_INT,
 				  unsigned HOST_WIDE_INT,
 				  unsigned HOST_WIDE_INT,
-				  unsigned HOST_WIDE_INT);
+				  unsigned HOST_WIDE_INT,
+				  bool bail_out_libcall = false,
+				  bool *is_move_done = NULL);
 extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool,
 				 by_pieces_constfn, void *);
 extern bool emit_storent_insn (rtx to, rtx from);
-- 
2.13.3