[store,merging,RFA] Re-implement merging code

Message ID	57FB4BC6.6080403@foss.arm.com
State	New
Headers	show Return-Path: <gcc-patches-return-438054-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:cc:subject:content-type; q=dns; s=default; b=NTGK9Zzmvn42lWpsiFhe+L/UpOYfSoJjgL0wL5r68P5 Xg9Z3+0qCJT04VwgBKy+pMGQIJ6HqopgWcZsLPi+LFKGoQlIX/V2ILJBz6amalg3 54kZa1YD55JH4+aYgiEQ5TSOd7JI90nrrP9Td1f89pOXUeh5re05UgcOjks2vu08 = Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org Message-ID: <57FB4BC6.6080403@foss.arm.com> Date: Mon, 10 Oct 2016 09:05:26 +0100 From: Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.2.0 MIME-Version: 1.0 To: GCC Patches <gcc-patches@gcc.gnu.org> CC: Richard Biener <rguenther@suse.de> Subject: [PATCH][store merging][RFA] Re-implement merging code Content-Type: multipart/mixed; boundary="------------030309060705010505060403"

diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c index 45dc615482780bcb4c9e0d261f25c334d83dd878..c6c81596528a065e6dec023522c1430997fb4593 100644 --- a/gcc/gimple-ssa-store-merging.c +++ b/gcc/gimple-ssa-store-merging.c @@ -199,6 +199,87 @@ dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len) fprintf (fd, "\n"); } +/* Fill a byte array PTR of SZ elements with zeroes. This is to be used by + encode_tree_to_bitpos to zero-initialize most likely small arrays but + with a non-compile-time-constant size. */ + +static inline void +zero_char_buf (unsigned char *ptr, unsigned int sz) +{ + for (unsigned int i = 0; i < sz; i++) + ptr[i] = 0; +} + +/* Shift the bytes in PTR of SZ elements by AMNT bits, carrying over the bits + between adjacent elements. */ + +static void +shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt) +{ + unsigned char carry_over = 0U; + unsigned char carry_mask = (~0U) << ((unsigned char)(BITS_PER_UNIT - amnt)); + unsigned char clear_mask = (~0U) << amnt; + + for (unsigned int i = 0; i < sz; i++) + { + unsigned prev_carry_over = carry_over; + carry_over + = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt); + + ptr[i] <<= amnt; + if (i != 0) + { + ptr[i] &= clear_mask; + ptr[i] |= prev_carry_over; + } + } +} + +/* In the byte array PTR clear the bit region starting at bit + START and is LEN bits wide. START should be within [0, BITS_PER_UNIT). + For regions spanning multiple bytes do this recursively until we reach + zero LEN or a region contained within a single byte. */ + +static void +clear_bit_region (unsigned char *ptr, unsigned int start, + unsigned int len) +{ + /* Degenerate base case. */ + if (len == 0) + return; + + /* Second base case. */ + if ((start + len) <= BITS_PER_UNIT) + { + unsigned char mask = (~0U) << ((unsigned char)(BITS_PER_UNIT - len)); + mask >>= BITS_PER_UNIT - (start + len); + + ptr[0] &= ~mask; + + return; + } + /* Clear most significant bits in a byte and proceed with the next byte. */ + else if (start != 0) + { + clear_bit_region (ptr, start, BITS_PER_UNIT - start); + clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start) + 1); + } + /* Whole bytes need to be cleared. */ + else if (start == 0 && len > BITS_PER_UNIT) + { + unsigned int nbytes = len / BITS_PER_UNIT; + /* We could recurse on each byte but do the loop here to to avoid + recuring too deep. */ + for (unsigned int i = 0; i < nbytes; i++) + ptr[i] = 0U; + /* Clear the remaining sub-byte region if there is one. */ + if (len % BITS_PER_UNIT != 0) + clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT); + } + else + gcc_unreachable (); +} + /* Write BITLEN bits of EXPR to the byte array PTR at bit position BITPOS. PTR should contain TOTAL_BYTES elements. Return true if the operation succeeded. */ @@ -207,77 +288,50 @@ static bool encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos, unsigned int total_bytes) { - unsigned int last_byte = (bitpos + bitlen - 1) / BITS_PER_UNIT; unsigned int first_byte = bitpos / BITS_PER_UNIT; tree tmp_int = expr; bool sub_byte_op_p = (bitlen % BITS_PER_UNIT) || (bitpos % BITS_PER_UNIT); - /* If the expression is not an integer encode it into the temporary buffer - and read it back as an integer. */ - if (TREE_CODE (expr) != INTEGER_CST && sub_byte_op_p) - { - unsigned char *tmpbuf = XALLOCAVEC (unsigned char, total_bytes); - memset (tmpbuf, 0, total_bytes); - native_encode_expr (expr, tmpbuf, total_bytes, 0); - tree read_back_type - = build_nonstandard_integer_type (tree_to_shwi ( - TYPE_SIZE (TREE_TYPE (expr))), UNSIGNED); - tmp_int - = native_interpret_expr (read_back_type, tmpbuf, total_bytes); - } - gcc_assert (tmp_int); - - /* If we're inserting a non-bytesized width or not at a byte boundary - use an intermediate wide_int to perform the bit-insertion correctly. */ - if (sub_byte_op_p - || (TREE_CODE (expr) == INTEGER_CST - && mode_for_size (bitlen, MODE_INT, 0) == BLKmode)) - { - unsigned int byte_size = last_byte - first_byte + 1; - - /* The functions native_encode_expr/native_interpret_expr use the - TYPE_MODE of the type to determine the number of bytes to write/read - so if we want to process a number of bytes that does not have a - TYPE_MODE of equal size we need to use a type that has a valid mode - for it. */ - - machine_mode mode - = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT); - tree dest_int_type - = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), UNSIGNED); - byte_size = GET_MODE_SIZE (mode); - /* The region from the byte array that we're inserting into. */ - tree ptr_wide_int - = native_interpret_expr (dest_int_type, ptr + first_byte, - total_bytes); - - gcc_assert (ptr_wide_int); - wide_int dest_wide_int - = wi::to_wide (ptr_wide_int, TYPE_PRECISION (dest_int_type)); - - wide_int expr_wide_int - = wi::to_wide (tmp_int, byte_size * BITS_PER_UNIT); - if (BYTES_BIG_ENDIAN) - { - unsigned int insert_pos - = byte_size * BITS_PER_UNIT - bitlen - (bitpos % BITS_PER_UNIT); - dest_wide_int - = wi::insert (dest_wide_int, expr_wide_int, insert_pos, bitlen); - } - else - dest_wide_int = wi::insert (dest_wide_int, expr_wide_int, - bitpos % BITS_PER_UNIT, bitlen); - - tree res = wide_int_to_tree (dest_int_type, dest_wide_int); - if (!native_encode_expr (res, ptr + first_byte, total_bytes, 0)) - return false; - - } - /* If we're inserting a "well-behaved" value at a normal position just - call native_encode_expr directly. */ - else if (!native_encode_expr (tmp_int, ptr + first_byte, - total_bytes, 0)) - return false; + if (!sub_byte_op_p) + return native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0) + != 0; + + /* We are writing a non byte-sized quantity or at a position that is not + at a byte boundary. + |--------||--------||--------| ptr + first_byte + ^ ^ + xxx xxxxxxxx xxx<-bp-> + |______EXPR______| + + First native_encode_expr EPXR into a temporary buffer and shift each + byte in the buffer by bitpos (carrying the bits over as necessary). + |00000000||00xxxxxx||xxxxxxxx| << bp = |000xxxxx||xxxxxxxx||xxx00000| + <------bitlen------><-bp-> + Then we clear the destination bits: + |---00000||00000000||000-----| ptr + first_byte + <-------bitlen-----><-bp-> + + Finally we ORR the bytes of the shifted EXPR into the cleared region: + |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte. */ + + unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1; + unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size); + zero_char_buf (tmpbuf, byte_size); + /* The store detection code should only have allowed constants that are + accepted by native_encode_expr. */ + if (native_encode_expr (expr, tmpbuf, byte_size, 0) == 0) + gcc_unreachable (); + + /* Create the shifted version of EXPR. */ + shift_bytes_in_array (tmpbuf, byte_size, bitpos % BITS_PER_UNIT); + + /* Clear the bit region in PTR where the bits from TMPBUF will be + inerted into. */ + clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen); + + /* Insert the bits from TMPBUF. */ + for (unsigned int i = 0; i < byte_size; i++) + ptr[first_byte + i] |= tmpbuf[i]; return true; }

[store,merging,RFA] Re-implement merging code

Commit Message

Comments

Patch