From patchwork Mon Oct 11 20:46:17 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jakub Jelinek X-Patchwork-Id: 67481 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 6BF08B70AF for ; Tue, 12 Oct 2010 07:46:30 +1100 (EST) Received: (qmail 18167 invoked by alias); 11 Oct 2010 20:46:29 -0000 Received: (qmail 18151 invoked by uid 22791); 11 Oct 2010 20:46:27 -0000 X-SWARE-Spam-Status: No, hits=-5.5 required=5.0 tests=AWL, BAYES_05, RCVD_IN_DNSWL_HI, SPF_HELO_PASS, TW_CP, TW_TM, T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Mon, 11 Oct 2010 20:46:21 +0000 Received: from int-mx03.intmail.prod.int.phx2.redhat.com (int-mx03.intmail.prod.int.phx2.redhat.com [10.5.11.16]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id o9BKkKR3006706 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Mon, 11 Oct 2010 16:46:20 -0400 Received: from tyan-ft48-01.lab.bos.redhat.com (tyan-ft48-01.lab.bos.redhat.com [10.16.42.4]) by int-mx03.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id o9BKkJkc014305 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Mon, 11 Oct 2010 16:46:20 -0400 Received: from tyan-ft48-01.lab.bos.redhat.com (localhost.localdomain [127.0.0.1]) by tyan-ft48-01.lab.bos.redhat.com (8.14.4/8.14.4) with ESMTP id o9BKkJmn008862; Mon, 11 Oct 2010 22:46:19 +0200 Received: (from jakub@localhost) by tyan-ft48-01.lab.bos.redhat.com (8.14.4/8.14.4/Submit) id o9BKkHuc008860; Mon, 11 Oct 2010 22:46:17 +0200 Date: Mon, 11 Oct 2010 22:46:17 +0200 From: Jakub Jelinek To: Richard Guenther Cc: gcc-patches@gcc.gnu.org Subject: [PATCH] Optimize memcpy + memset (PR fortran/45636) Message-ID: <20101011204617.GJ2082@tyan-ft48-01.lab.bos.redhat.com> Reply-To: Jakub Jelinek MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.20 (2009-12-10) X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hi! This patch optimizes memcpy (x, "abcd", 4); memset (x + 4, ' ', 4); sequences into a single memcpy (x, "abcd ", 8); if it is known (or expected) that the memcpy will be implemented using store_by_pieces (otherwise we'd risk enlarging .rodata too much, e.g. if there are only few longer string literals in memcpy originally and adding too many different memset variants after them would mean the string literals couldn't be shared anymore). The patch also handles memcpy (x, "a", 1); memset (x + 1, 'b', 11); where the memcpy has been optimized into a store. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2010-10-11 Jakub Jelinek PR fortran/45636 * tree-ssa-forwprop.c: Include expr.h. (constant_pointer_difference, simplify_builtin_call): New functions. (tree_ssa_forward_propagate_single_use_vars): Call simplify_builtin_call on builtin calls. * gcc.c-torture/execute/pr45636.c: New test. * gfortran.dg/pr45636.f90: New test. Jakub --- gcc/tree-ssa-forwprop.c.jj 2010-10-07 19:44:48.000000000 +0200 +++ gcc/tree-ssa-forwprop.c 2010-10-11 16:43:03.000000000 +0200 @@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. #include "langhooks.h" #include "flags.h" #include "gimple.h" +#include "expr.h" /* This pass propagates the RHS of assignment statements into use sites of the LHS of the assignment. It's basically a specialized @@ -1317,6 +1318,296 @@ simplify_gimple_switch (gimple stmt) } } +/* For pointers p2 and p1 return p2 - p1 if the + difference is known and constant, otherwise return NULL. */ + +static tree +constant_pointer_difference (tree p1, tree p2) +{ + int i, j; + tree exps[2][5]; + tree offs[2][5]; + int cnt[2]; + + for (i = 0; i < 2; i++) + { + tree p = i ? p1 : p2; + tree off = size_zero_node; + gimple stmt; + enum tree_code code; + + j = 0; + do + { + if (!POINTER_TYPE_P (TREE_TYPE (p))) + break; + if (TREE_CODE (p) == ADDR_EXPR) + { + tree q = TREE_OPERAND (p, 0); + if (handled_component_p (q) + || TREE_CODE (q) == MEM_REF + || TREE_CODE (q) == TARGET_MEM_REF) + { + HOST_WIDE_INT offset, size, maxsize; + tree base + = get_ref_base_and_extent (q, &offset, + &size, &maxsize); + if (size == maxsize + && offset >= 0 + && (offset % BITS_PER_UNIT) == 0) + { + q = base; + off = size_binop (PLUS_EXPR, off, + size_int (offset / BITS_PER_UNIT)); + } + } + if (TREE_CODE (q) == MEM_REF + && TREE_CODE (TREE_OPERAND (q, 0)) == SSA_NAME) + { + p = TREE_OPERAND (q, 0); + off = size_binop (PLUS_EXPR, off, + fold_convert (sizetype, + TREE_OPERAND (q, 1))); + } + else + { + exps[i][j] = q; + offs[i][j++] = off; + break; + } + } + if (TREE_CODE (p) != SSA_NAME) + break; + exps[i][j] = p; + offs[i][j++] = off; + if (j == 5) + break; + stmt = SSA_NAME_DEF_STMT (p); + if (!is_gimple_assign (stmt) || gimple_assign_lhs (stmt) != p) + break; + code = gimple_assign_rhs_code (stmt); + if (code == POINTER_PLUS_EXPR) + { + if (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST) + break; + off = size_binop (PLUS_EXPR, off, gimple_assign_rhs2 (stmt)); + p = gimple_assign_rhs1 (stmt); + } + else if (code == ADDR_EXPR || code == NOP_EXPR) + p = gimple_assign_rhs1 (stmt); + else + break; + } + while (1); + cnt[i] = j; + } + + for (i = 0; i < cnt[0]; i++) + for (j = 0; j < cnt[1]; j++) + if (exps[0][i] == exps[1][j]) + return size_binop (MINUS_EXPR, offs[0][i], offs[1][j]); + + return NULL_TREE; +} + +/* *GSI_P is a GIMPLE_CALL to a builtin function. + Optimize + memcpy (p, "abcd", 4); + memset (p + 4, ' ', 3); + into + memcpy (p, "abcd ", 7); + call if the latter can be stored by pieces during expansion. */ + +static bool +simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2) +{ + gimple stmt1, stmt2 = gsi_stmt (*gsi_p); + tree vuse = gimple_vuse (stmt2); + if (vuse == NULL) + return false; + stmt1 = SSA_NAME_DEF_STMT (vuse); + + switch (DECL_FUNCTION_CODE (callee2)) + { + case BUILT_IN_MEMSET: + if (gimple_call_num_args (stmt2) != 3 + || gimple_call_lhs (stmt2) + || CHAR_BIT != 8 + || BITS_PER_UNIT != 8) + break; + else + { + tree callee1; + tree ptr1, src1, str1, off1, len1, lhs1; + tree ptr2 = gimple_call_arg (stmt2, 0); + tree val2 = gimple_call_arg (stmt2, 1); + tree len2 = gimple_call_arg (stmt2, 2); + tree diff, vdef, new_str_cst; + unsigned int ptr1_align; + unsigned HOST_WIDE_INT src_len; + char *src_buf; + imm_use_iterator imm_iter; + use_operand_p use_p; + bool do_fail; + + if (!host_integerp (val2, 0) + || !host_integerp (len2, 1)) + break; + if (is_gimple_call (stmt1)) + { + callee1 = gimple_call_fndecl (stmt1); + if (callee1 == NULL_TREE + || DECL_BUILT_IN_CLASS (callee1) != BUILT_IN_NORMAL + || gimple_call_num_args (stmt1) != 3) + break; + if (DECL_FUNCTION_CODE (callee1) != BUILT_IN_MEMCPY + && DECL_FUNCTION_CODE (callee1) != BUILT_IN_MEMPCPY) + break; + ptr1 = gimple_call_arg (stmt1, 0); + src1 = gimple_call_arg (stmt1, 1); + len1 = gimple_call_arg (stmt1, 2); + lhs1 = gimple_call_lhs (stmt1); + if (!host_integerp (len1, 1)) + break; + str1 = string_constant (src1, &off1); + if (str1 == NULL_TREE) + break; + if (!host_integerp (off1, 1) + || compare_tree_int (off1, TREE_STRING_LENGTH (str1) - 1) > 0 + || compare_tree_int (len1, TREE_STRING_LENGTH (str1) + - tree_low_cst (off1, 1)) > 0 + || TREE_CODE (TREE_TYPE (str1)) != ARRAY_TYPE + || TYPE_MODE (TREE_TYPE (TREE_TYPE (str1))) + != TYPE_MODE (char_type_node)) + break; + } + else if (gimple_assign_single_p (stmt1)) + { + ptr1 = gimple_assign_lhs (stmt1); + src1 = gimple_assign_rhs1 (stmt1); + if (TREE_CODE (ptr1) != MEM_REF + || TYPE_MODE (TREE_TYPE (ptr1)) != TYPE_MODE (char_type_node) + || !host_integerp (src1, 0)) + break; + ptr1 = build_fold_addr_expr (ptr1); + callee1 = NULL_TREE; + len1 = size_one_node; + lhs1 = NULL_TREE; + off1 = size_zero_node; + str1 = NULL_TREE; + } + else + break; + + diff = constant_pointer_difference (ptr1, ptr2); + if (diff == NULL && lhs1 != NULL) + { + diff = constant_pointer_difference (lhs1, ptr2); + if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY + && diff != NULL) + diff = size_binop (PLUS_EXPR, diff, + fold_convert (sizetype, len1)); + } + if (diff == NULL + || !host_integerp (diff, 1) + || tree_int_cst_lt (len1, diff)) + break; + + src_len = tree_low_cst (diff, 1); + src_len += tree_low_cst (len2, 1); + if (src_len < (unsigned HOST_WIDE_INT) tree_low_cst (len1, 1)) + src_len = tree_low_cst (len1, 1); + if (src_len > 1024) + break; + + do_fail = false; + if (lhs1 != NULL + && DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY) + { + if (TREE_CODE (lhs1) != SSA_NAME) + break; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs1) + { + if (USE_STMT (use_p) != stmt2) + { + do_fail = true; + break; + } + } + if (do_fail) + break; + } + vdef = gimple_vdef (stmt1); + if (vdef != NULL) + { + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, vdef) + { + if (USE_STMT (use_p) != stmt2) + { + do_fail = true; + break; + } + } + if (do_fail) + break; + } + + ptr1_align = get_pointer_alignment (ptr1, BIGGEST_ALIGNMENT); + src_buf = XALLOCAVEC (char, src_len + 1); + if (callee1) + memcpy (src_buf, + TREE_STRING_POINTER (str1) + tree_low_cst (off1, 1), + tree_low_cst (len1, 1)); + else + src_buf[0] = tree_low_cst (src1, 0); + memset (src_buf + tree_low_cst (diff, 1), + tree_low_cst (val2, 1), tree_low_cst (len2, 1)); + src_buf[src_len] = '\0'; + if (strlen (src_buf) != src_len) + break; + rtl_profile_for_bb (gimple_bb (stmt2)); + if (!can_store_by_pieces (src_len, + builtin_strncpy_read_str, + src_buf, ptr1_align, false)) + break; + + new_str_cst = build_string_literal (src_len, src_buf); + if (callee1) + { + if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY) + gimple_call_set_lhs (stmt1, NULL_TREE); + gimple_call_set_arg (stmt1, 1, new_str_cst); + gimple_call_set_arg (stmt1, 2, + build_int_cst (TREE_TYPE (len1), src_len)); + update_stmt (stmt1); + unlink_stmt_vdef (stmt2); + gsi_remove (gsi_p, true); + release_defs (stmt2); + return true; + } + else + { + gimple_stmt_iterator gsi = gsi_for_stmt (stmt1); + + gimple_call_set_fndecl (stmt2, built_in_decls [BUILT_IN_MEMCPY]); + gimple_call_set_arg (stmt2, 0, ptr1); + gimple_call_set_arg (stmt2, 1, new_str_cst); + gimple_call_set_arg (stmt2, 2, + build_int_cst (TREE_TYPE (len2), src_len)); + unlink_stmt_vdef (stmt1); + gsi_remove (&gsi, true); + release_defs (stmt1); + update_stmt (stmt2); + return false; + } + } + break; + default: + break; + } + return false; +} + /* Run bitwise and assignments throug the folder. If the first argument is an ssa name that is itself a result of a typecast of an ADDR_EXPR to an integer, feed the ADDR_EXPR to the folder rather than the ssa name. @@ -1784,6 +2075,14 @@ tree_ssa_forward_propagate_single_use_va WARN_STRICT_OVERFLOW_CONDITIONAL); gsi_next (&gsi); } + else if (is_gimple_call (stmt)) + { + tree callee = gimple_call_fndecl (stmt); + if (callee == NULL_TREE + || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL + || !simplify_builtin_call (&gsi, callee)) + gsi_next (&gsi); + } else gsi_next (&gsi); } --- gcc/Makefile.in.jj 2010-10-11 07:51:09.000000000 +0200 +++ gcc/Makefile.in 2010-10-11 09:15:25.000000000 +0200 @@ -2404,7 +2404,7 @@ tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(TIMEVAR_H) \ - langhooks.h $(FLAGS_H) $(GIMPLE_H) tree-pretty-print.h + langhooks.h $(FLAGS_H) $(GIMPLE_H) tree-pretty-print.h $(EXPR_H) tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(TIMEVAR_H) \ --- gcc/testsuite/gcc.c-torture/execute/pr45636.c.jj 2010-10-11 09:15:25.000000000 +0200 +++ gcc/testsuite/gcc.c-torture/execute/pr45636.c 2010-10-11 09:15:25.000000000 +0200 @@ -0,0 +1,75 @@ +/* PR fortran/45636 */ + +typedef __SIZE_TYPE__ size_t; +void *memcpy (void *__restrict__, const void *__restrict__, size_t); +void *mempcpy (void *__restrict__, const void *__restrict__, size_t); +void *memset (void *, int, size_t); +int memcmp (const void *, const void *, size_t); +extern void abort (void); + +struct A { int i; char c[32]; } a[2]; + +__attribute__((noinline, noclone)) int +f1 (char *p, int q, int z) +{ + memcpy (p, "abcd", 4); + if (q) + z = z + 123; + else + z *= 114; + memset (p + 4, ' ', 2); + return z; +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + char *p = mempcpy (&a[0].c[13], "123456", 4); + memset (p, '7', 3); +} + +__attribute__((noinline, noclone)) void +f3 (struct A *p) +{ + p++; + char *q = &p->c[10]; + memcpy (q + 4, "__1234567" + 2, 7); + memset (&p->c[21], '9', 3); +} + +__attribute__((noinline, noclone)) void +f4 (void) +{ + memcpy (&a[0].c[10], "0123456789", 10); + memset (&a[0].c[13], ' ', 3); +} + +__attribute__((noinline, noclone)) void +check (const char *p, const char *str, size_t size) +{ + const char *q; + for (q = (const char *) &a; q < p; q++) + if (*q) + abort (); + if (memcmp (p, str, size) != 0) + abort (); + for (q = p + size; q < (const char *) (&a[0] + 2); q++) + if (*q) + abort (); + memset (&a, '\0', sizeof a); +} + +int +main (void) +{ + if (f1 (&a[0].c[7], 1, 2) != 125) + abort (); + check (&a[0].c[7], "abcd ", 6); + f2 (); + check (&a[0].c[13], "1234777", 7); + f3 (&a[0]); + check (&a[1].c[14], "1234567999", 10); + f4 (); + check (&a[0].c[10], "012 6789", 10); + return 0; +} --- gcc/testsuite/gfortran.dg/pr45636.f90.jj 2010-10-11 13:19:41.000000000 +0200 +++ gcc/testsuite/gfortran.dg/pr45636.f90 2010-10-11 16:49:19.000000000 +0200 @@ -0,0 +1,14 @@ +! PR fortran/45636 +! { dg-do compile } +! { dg-options "-O2 -fdump-tree-forwprop2" } +! PR 45636 - make sure no memset is needed for a short right-hand side. +program main + character(len=2), parameter :: x='a ' + character(len=1), parameter :: y='b' + character(len=4) :: a, b + a = x + b = y + call sub(a, b) +end program main +! { dg-final { scan-tree-dump-times "memset" 0 "forwprop2" } } +! { dg-final { cleanup-tree-dump "forwprop2" } }