diff mbox

[2/2] HSA: handle alignment of string builtins (PR hsa/70391)

Message ID ee26df2ac2a3eeca531b75450fc75a28afb4b845.1459421035.git.mliska@suse.cz
State New
Headers show

Commit Message

Martin Liška March 23, 2016, 1:43 p.m. UTC
gcc/ChangeLog:

2016-03-23  Martin Liska  <mliska@suse.cz>

	PR hsa/70391
	* hsa-gen.c (hsa_function_representation::update_cfg): New
	function.
	(convert_addr_to_flat_segment): Likewise.
	(gen_hsa_memory_set): New alignment argument.
	(gen_hsa_ctor_assignment): Likewise.
	(gen_hsa_insns_for_single_assignment): Provide alignment
	to gen_hsa_ctor_assignment.
	(gen_hsa_insns_for_direct_call): Add new argument.
	(expand_lhs_of_string_op): New function.
	(expand_string_operation_builtin): Likewise.
	(expand_memory_copy): New function.
	(expand_memory_set): New function.
	(gen_hsa_insns_for_call): Use HOST_WIDE_INT.
	(convert_switch_statements): Change signature.
	(generate_hsa): Use a return value of the function.
	(pass_gen_hsail::execute): Do not call
	convert_switch_statements here.
	* hsa-regalloc.c (hsa_regalloc): Call update_cfg.
	* hsa.h (hsa_function_representation::m_need_cfg_update):
	New flag.
	(hsa_function_representation::update_cfg): New function.
---
 gcc/hsa-gen.c      | 372 ++++++++++++++++++++++++++++++++++++++---------------
 gcc/hsa-regalloc.c |   1 +
 gcc/hsa.h          |   9 +-
 3 files changed, 275 insertions(+), 107 deletions(-)

Comments

Martin Jambor March 31, 2016, 2:58 p.m. UTC | #1
Hi,

On Wed, Mar 23, 2016 at 02:43:17PM +0100, Martin Liska wrote:
> gcc/ChangeLog:
> 
> 2016-03-23  Martin Liska  <mliska@suse.cz>
> 
> 	PR hsa/70391
> 	* hsa-gen.c (hsa_function_representation::update_cfg): New
> 	function.
> 	(convert_addr_to_flat_segment): Likewise.
> 	(gen_hsa_memory_set): New alignment argument.
> 	(gen_hsa_ctor_assignment): Likewise.
> 	(gen_hsa_insns_for_single_assignment): Provide alignment
> 	to gen_hsa_ctor_assignment.
> 	(gen_hsa_insns_for_direct_call): Add new argument.
> 	(expand_lhs_of_string_op): New function.
> 	(expand_string_operation_builtin): Likewise.
> 	(expand_memory_copy): New function.
> 	(expand_memory_set): New function.
> 	(gen_hsa_insns_for_call): Use HOST_WIDE_INT.
> 	(convert_switch_statements): Change signature.
> 	(generate_hsa): Use a return value of the function.
> 	(pass_gen_hsail::execute): Do not call
> 	convert_switch_statements here.
> 	* hsa-regalloc.c (hsa_regalloc): Call update_cfg.
> 	* hsa.h (hsa_function_representation::m_need_cfg_update):
> 	New flag.
> 	(hsa_function_representation::update_cfg): New function.

As we already discussed, update_cfg and m_need_cfg_update should
really be called differently, because CFG has already been modified
and only dominance needs to be re-computed.  If you havent't thought
about any names yet, what about m_modified_cfg and update_dominance() ?


> ---
>  gcc/hsa-gen.c      | 372 ++++++++++++++++++++++++++++++++++++++---------------
>  gcc/hsa-regalloc.c |   1 +
>  gcc/hsa.h          |   9 +-
>  3 files changed, 275 insertions(+), 107 deletions(-)
> 
> diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
> index db39813..db7fc3d 100644
> --- a/gcc/hsa-gen.c
> +++ b/gcc/hsa-gen.c
> @@ -214,7 +214,7 @@ hsa_symbol::fillup_for_decl (tree decl)
>     should be set to number of SSA names used in the function.  */
>  
>  hsa_function_representation::hsa_function_representation
> -  (tree fdecl, bool kernel_p, unsigned ssa_names_count)
> +  (tree fdecl, bool kernel_p, unsigned ssa_names_count, bool need_cfg_update)
>    : m_name (NULL),
>      m_reg_count (0), m_input_args (vNULL),
>      m_output_arg (NULL), m_spill_symbols (vNULL), m_global_symbols (vNULL),
> @@ -223,7 +223,8 @@ hsa_function_representation::hsa_function_representation
>      m_in_ssa (true), m_kern_p (kernel_p), m_declaration_p (false),
>      m_decl (fdecl), m_internal_fn (NULL), m_shadow_reg (NULL),
>      m_kernel_dispatch_count (0), m_maximum_omp_data_size (0),
> -    m_seen_error (false), m_temp_symbol_count (0), m_ssa_map ()
> +    m_seen_error (false), m_temp_symbol_count (0), m_ssa_map (),
> +    m_need_cfg_update (need_cfg_update)
>  {
>    int sym_init_len = (vec_safe_length (cfun->local_decls) / 2) + 1;;
>    m_local_symbols = new hash_table <hsa_noop_symbol_hasher> (sym_init_len);
> @@ -319,6 +320,16 @@ hsa_function_representation::init_extra_bbs ()
>    hsa_init_new_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
>  }
>  
> +void
> +hsa_function_representation::update_cfg ()
> +{
> +  if (m_need_cfg_update)
> +    {
> +      free_dominance_info (CDI_DOMINATORS);
> +      calculate_dominance_info (CDI_DOMINATORS);
> +    }
> +}
> +
>  hsa_symbol *
>  hsa_function_representation::create_hsa_temporary (BrigType16_t type)
>  {
> @@ -2246,30 +2257,14 @@ gen_hsa_addr_for_arg (tree tree_type, int index)
>    return new hsa_op_address (sym);
>  }
>  
> -/* Generate HSA instructions that calculate address of VAL including all
> -   necessary conversions to flat addressing and place the result into DEST.
> +/* Generate HSA instructions that process all necessary conversions
> +   of an ADDR to flat addressing and place the result into DEST.
>     Instructions are appended to HBB.  */
>  
>  static void
> -gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
> +convert_addr_to_flat_segment (hsa_op_address *addr, hsa_op_reg *dest,
> +			      hsa_bb *hbb)
>  {
> -  /* Handle cases like tmp = NULL, where we just emit a move instruction
> -     to a register.  */
> -  if (TREE_CODE (val) == INTEGER_CST)
> -    {
> -      hsa_op_immed *c = new hsa_op_immed (val);
> -      hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
> -						 dest->m_type, dest, c);
> -      hbb->append_insn (insn);
> -      return;
> -    }
> -
> -  hsa_op_address *addr;
> -
> -  gcc_assert (dest->m_type == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
> -  if (TREE_CODE (val) == ADDR_EXPR)
> -    val = TREE_OPERAND (val, 0);
> -  addr = gen_hsa_addr (val, hbb);
>    hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_LDA);
>    insn->set_op (1, addr);
>    if (addr->m_symbol && addr->m_symbol->m_segment != BRIG_SEGMENT_GLOBAL)
> @@ -2298,6 +2293,34 @@ gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
>      }
>  }
>  
> +/* Generate HSA instructions that calculate address of VAL including all
> +   necessary conversions to flat addressing and place the result into DEST.
> +   Instructions are appended to HBB.  */
> +
> +static void
> +gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
> +{
> +  /* Handle cases like tmp = NULL, where we just emit a move instruction
> +     to a register.  */
> +  if (TREE_CODE (val) == INTEGER_CST)
> +    {
> +      hsa_op_immed *c = new hsa_op_immed (val);
> +      hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
> +						 dest->m_type, dest, c);
> +      hbb->append_insn (insn);
> +      return;
> +    }
> +
> +  hsa_op_address *addr;
> +
> +  gcc_assert (dest->m_type == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
> +  if (TREE_CODE (val) == ADDR_EXPR)
> +    val = TREE_OPERAND (val, 0);
> +  addr = gen_hsa_addr (val, hbb);
> +
> +  convert_addr_to_flat_segment (addr, dest, hbb);
> +}
> +
>  /* Return an HSA register or HSA immediate value operand corresponding to
>     gimple operand OP.  */
>  
> @@ -2728,9 +2751,9 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb)
>  }
>  
>  /* Generate memory copy instructions that are going to be used
> -   for copying a HSA symbol SRC_SYMBOL (or SRC_REG) to TARGET memory,
> +   for copying a SRC memory to TARGET memory,
>     represented by pointer in a register.  MIN_ALIGN is minimal alignment
> -   of provided HSA addresses. */
> +   of provided HSA addresses.  */
>  
>  static void
>  gen_hsa_memory_copy (hsa_bb *hbb, hsa_op_address *target, hsa_op_address *src,
> @@ -2792,17 +2815,19 @@ build_memset_value (unsigned HOST_WIDE_INT constant, unsigned byte_size)
>  }
>  
>  /* Generate memory set instructions that are going to be used
> -   for setting a CONSTANT byte value to TARGET memory of SIZE bytes.  */
> +   for setting a CONSTANT byte value to TARGET memory of SIZE bytes.
> +   MIN_ALIGN is minimal alignment of provided HSA addresses.  */
>  
>  static void
>  gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target,
>  		    unsigned HOST_WIDE_INT constant,
> -		    unsigned size)
> +		    unsigned size, BrigAlignment8_t min_align)
>  {
>    hsa_op_address *addr;
>    hsa_insn_mem *mem;
>  
>    unsigned offset = 0;
> +  unsigned min_byte_align = hsa_byte_alignment (min_align);
>  
>    while (size)
>      {
> @@ -2816,6 +2841,9 @@ gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target,
>        else
>  	s = 1;
>  
> +      if (s > min_byte_align)
> +	s = min_byte_align;
> +
>        addr = new hsa_op_address (target->m_symbol, target->m_reg,
>  				 target->m_imm_offset + offset);
>  
> @@ -2832,10 +2860,12 @@ gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target,
>  
>  /* Generate HSAIL instructions for a single assignment
>     of an empty constructor to an ADDR_LHS.  Constructor is passed as a
> -   tree RHS and all instructions are appended to HBB.  */
> +   tree RHS and all instructions are appended to HBB.  ALIGN is
> +   alignment of the address.  */
>  
>  void
> -gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb)
> +gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb,
> +			 BrigAlignment8_t align)
>  {
>    if (vec_safe_length (CONSTRUCTOR_ELTS (rhs)))
>      {
> @@ -2845,7 +2875,7 @@ gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb)
>      }
>  
>    unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs)));
> -  gen_hsa_memory_set (hbb, addr_lhs, 0, size);
> +  gen_hsa_memory_set (hbb, addr_lhs, 0, size, align);
>  }
>  
>  /* Generate HSA instructions for a single assignment of RHS to LHS.
> @@ -2879,7 +2909,7 @@ gen_hsa_insns_for_single_assignment (tree lhs, tree rhs, hsa_bb *hbb)
>  							  &lhs_align);
>  
>        if (TREE_CODE (rhs) == CONSTRUCTOR)
> -	gen_hsa_ctor_assignment (addr_lhs, rhs, hbb);
> +	gen_hsa_ctor_assignment (addr_lhs, rhs, hbb, lhs_align);
>        else
>  	{
>  	  BrigAlignment8_t rhs_align;
> @@ -3523,10 +3553,13 @@ get_format_argument_type (tree formal_arg_type, BrigType16_t actual_arg_type)
>  
>  /* Generate HSA instructions for a direct call instruction.
>     Instructions will be appended to HBB, which also needs to be the
> -   corresponding structure to the basic_block of STMT.  */
> +   corresponding structure to the basic_block of STMT.
> +   If ASSIGN_LHS is set to true, assignment to a LHS of the called function
> +   is not processed.  */

I'm not sure the last sentence is really helpful becasue the word
"processed" is not very informative and also because I think it means
the opposite of what the code actually does (it depends on what being
processed means, I suppose).  Can you perhaps replace it with "If
ASSIGN_LHS is false, do not copy HSA function result argument into the
corresponding HSA representation of the gimple statement LHS."

Otherwise, the patch is fine, thanks.

Martin
diff mbox

Patch

diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index db39813..db7fc3d 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -214,7 +214,7 @@  hsa_symbol::fillup_for_decl (tree decl)
    should be set to number of SSA names used in the function.  */
 
 hsa_function_representation::hsa_function_representation
-  (tree fdecl, bool kernel_p, unsigned ssa_names_count)
+  (tree fdecl, bool kernel_p, unsigned ssa_names_count, bool need_cfg_update)
   : m_name (NULL),
     m_reg_count (0), m_input_args (vNULL),
     m_output_arg (NULL), m_spill_symbols (vNULL), m_global_symbols (vNULL),
@@ -223,7 +223,8 @@  hsa_function_representation::hsa_function_representation
     m_in_ssa (true), m_kern_p (kernel_p), m_declaration_p (false),
     m_decl (fdecl), m_internal_fn (NULL), m_shadow_reg (NULL),
     m_kernel_dispatch_count (0), m_maximum_omp_data_size (0),
-    m_seen_error (false), m_temp_symbol_count (0), m_ssa_map ()
+    m_seen_error (false), m_temp_symbol_count (0), m_ssa_map (),
+    m_need_cfg_update (need_cfg_update)
 {
   int sym_init_len = (vec_safe_length (cfun->local_decls) / 2) + 1;;
   m_local_symbols = new hash_table <hsa_noop_symbol_hasher> (sym_init_len);
@@ -319,6 +320,16 @@  hsa_function_representation::init_extra_bbs ()
   hsa_init_new_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
 }
 
+void
+hsa_function_representation::update_cfg ()
+{
+  if (m_need_cfg_update)
+    {
+      free_dominance_info (CDI_DOMINATORS);
+      calculate_dominance_info (CDI_DOMINATORS);
+    }
+}
+
 hsa_symbol *
 hsa_function_representation::create_hsa_temporary (BrigType16_t type)
 {
@@ -2246,30 +2257,14 @@  gen_hsa_addr_for_arg (tree tree_type, int index)
   return new hsa_op_address (sym);
 }
 
-/* Generate HSA instructions that calculate address of VAL including all
-   necessary conversions to flat addressing and place the result into DEST.
+/* Generate HSA instructions that process all necessary conversions
+   of an ADDR to flat addressing and place the result into DEST.
    Instructions are appended to HBB.  */
 
 static void
-gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
+convert_addr_to_flat_segment (hsa_op_address *addr, hsa_op_reg *dest,
+			      hsa_bb *hbb)
 {
-  /* Handle cases like tmp = NULL, where we just emit a move instruction
-     to a register.  */
-  if (TREE_CODE (val) == INTEGER_CST)
-    {
-      hsa_op_immed *c = new hsa_op_immed (val);
-      hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
-						 dest->m_type, dest, c);
-      hbb->append_insn (insn);
-      return;
-    }
-
-  hsa_op_address *addr;
-
-  gcc_assert (dest->m_type == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
-  if (TREE_CODE (val) == ADDR_EXPR)
-    val = TREE_OPERAND (val, 0);
-  addr = gen_hsa_addr (val, hbb);
   hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_LDA);
   insn->set_op (1, addr);
   if (addr->m_symbol && addr->m_symbol->m_segment != BRIG_SEGMENT_GLOBAL)
@@ -2298,6 +2293,34 @@  gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
     }
 }
 
+/* Generate HSA instructions that calculate address of VAL including all
+   necessary conversions to flat addressing and place the result into DEST.
+   Instructions are appended to HBB.  */
+
+static void
+gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
+{
+  /* Handle cases like tmp = NULL, where we just emit a move instruction
+     to a register.  */
+  if (TREE_CODE (val) == INTEGER_CST)
+    {
+      hsa_op_immed *c = new hsa_op_immed (val);
+      hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
+						 dest->m_type, dest, c);
+      hbb->append_insn (insn);
+      return;
+    }
+
+  hsa_op_address *addr;
+
+  gcc_assert (dest->m_type == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
+  if (TREE_CODE (val) == ADDR_EXPR)
+    val = TREE_OPERAND (val, 0);
+  addr = gen_hsa_addr (val, hbb);
+
+  convert_addr_to_flat_segment (addr, dest, hbb);
+}
+
 /* Return an HSA register or HSA immediate value operand corresponding to
    gimple operand OP.  */
 
@@ -2728,9 +2751,9 @@  gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb)
 }
 
 /* Generate memory copy instructions that are going to be used
-   for copying a HSA symbol SRC_SYMBOL (or SRC_REG) to TARGET memory,
+   for copying a SRC memory to TARGET memory,
    represented by pointer in a register.  MIN_ALIGN is minimal alignment
-   of provided HSA addresses. */
+   of provided HSA addresses.  */
 
 static void
 gen_hsa_memory_copy (hsa_bb *hbb, hsa_op_address *target, hsa_op_address *src,
@@ -2792,17 +2815,19 @@  build_memset_value (unsigned HOST_WIDE_INT constant, unsigned byte_size)
 }
 
 /* Generate memory set instructions that are going to be used
-   for setting a CONSTANT byte value to TARGET memory of SIZE bytes.  */
+   for setting a CONSTANT byte value to TARGET memory of SIZE bytes.
+   MIN_ALIGN is minimal alignment of provided HSA addresses.  */
 
 static void
 gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target,
 		    unsigned HOST_WIDE_INT constant,
-		    unsigned size)
+		    unsigned size, BrigAlignment8_t min_align)
 {
   hsa_op_address *addr;
   hsa_insn_mem *mem;
 
   unsigned offset = 0;
+  unsigned min_byte_align = hsa_byte_alignment (min_align);
 
   while (size)
     {
@@ -2816,6 +2841,9 @@  gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target,
       else
 	s = 1;
 
+      if (s > min_byte_align)
+	s = min_byte_align;
+
       addr = new hsa_op_address (target->m_symbol, target->m_reg,
 				 target->m_imm_offset + offset);
 
@@ -2832,10 +2860,12 @@  gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target,
 
 /* Generate HSAIL instructions for a single assignment
    of an empty constructor to an ADDR_LHS.  Constructor is passed as a
-   tree RHS and all instructions are appended to HBB.  */
+   tree RHS and all instructions are appended to HBB.  ALIGN is
+   alignment of the address.  */
 
 void
-gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb)
+gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb,
+			 BrigAlignment8_t align)
 {
   if (vec_safe_length (CONSTRUCTOR_ELTS (rhs)))
     {
@@ -2845,7 +2875,7 @@  gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb)
     }
 
   unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs)));
-  gen_hsa_memory_set (hbb, addr_lhs, 0, size);
+  gen_hsa_memory_set (hbb, addr_lhs, 0, size, align);
 }
 
 /* Generate HSA instructions for a single assignment of RHS to LHS.
@@ -2879,7 +2909,7 @@  gen_hsa_insns_for_single_assignment (tree lhs, tree rhs, hsa_bb *hbb)
 							  &lhs_align);
 
       if (TREE_CODE (rhs) == CONSTRUCTOR)
-	gen_hsa_ctor_assignment (addr_lhs, rhs, hbb);
+	gen_hsa_ctor_assignment (addr_lhs, rhs, hbb, lhs_align);
       else
 	{
 	  BrigAlignment8_t rhs_align;
@@ -3523,10 +3553,13 @@  get_format_argument_type (tree formal_arg_type, BrigType16_t actual_arg_type)
 
 /* Generate HSA instructions for a direct call instruction.
    Instructions will be appended to HBB, which also needs to be the
-   corresponding structure to the basic_block of STMT.  */
+   corresponding structure to the basic_block of STMT.
+   If ASSIGN_LHS is set to true, assignment to a LHS of the called function
+   is not processed.  */
 
 static void
-gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb)
+gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb,
+			       bool assign_lhs = true)
 {
   tree decl = gimple_call_fndecl (stmt);
   verify_function_arguments (decl);
@@ -3608,7 +3641,7 @@  gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb)
 
       /* Even if result of a function call is unused, we have to emit
 	 declaration for the result.  */
-      if (result)
+      if (result && assign_lhs)
 	{
 	  tree lhs_type = TREE_TYPE (result);
 
@@ -4481,6 +4514,195 @@  get_address_from_value (tree val, hsa_bb *hbb)
     }
 }
 
+/* Expand assignment of a result of a string BUILTIN to DST.
+   Size of the operation is N bytes, where instructions
+   will be append to HBB.  */
+
+static void
+expand_lhs_of_string_op (gimple *stmt,
+			 unsigned HOST_WIDE_INT n, hsa_bb *hbb,
+			 enum built_in_function builtin)
+{
+  /* If LHS is expected, we need to emit a PHI instruction.  */
+  tree lhs = gimple_call_lhs (stmt);
+  if (!lhs)
+    return;
+
+  hsa_op_reg *lhs_reg = hsa_cfun->reg_for_gimple_ssa (lhs);
+
+  hsa_op_with_type *dst_reg
+    = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb);
+  hsa_op_with_type *tmp;
+
+  switch (builtin)
+    {
+    case BUILT_IN_MEMPCPY:
+      {
+	tmp = new hsa_op_reg (dst_reg->m_type);
+	hsa_insn_basic *add
+	  = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type,
+				tmp, dst_reg,
+				new hsa_op_immed (n, dst_reg->m_type));
+	hbb->append_insn (add);
+	break;
+      }
+    case BUILT_IN_MEMCPY:
+    case BUILT_IN_MEMSET:
+      tmp = dst_reg;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_MOV, lhs_reg->m_type,
+					lhs_reg, tmp));
+}
+
+#define HSA_MEMORY_BUILTINS_LIMIT     128
+
+/* Expand a string builtin (from a gimple STMT) in a way that
+   according to MISALIGNED_FLAG we process either direct emission
+   (a bunch of memory load and store instructions), or we emit a function call
+   of a library function (for instance 'memcpy'). Actually, a basic block
+   for direct emission is just prepared, where caller is responsible
+   for emission of corresponding instructions.
+   All instruction are appended to HBB.  */
+
+hsa_bb *
+expand_string_operation_builtin (gimple *stmt, hsa_bb *hbb,
+				 hsa_op_reg *misaligned_flag)
+{
+  edge e = split_block (hbb->m_bb, stmt);
+  basic_block condition_bb = e->src;
+  hbb->append_insn (new hsa_insn_br (misaligned_flag));
+
+  /* Prepare the control flow.  */
+  edge condition_edge = EDGE_SUCC (condition_bb, 0);
+  basic_block call_bb = split_edge (condition_edge);
+
+  basic_block expanded_bb = split_edge (EDGE_SUCC (call_bb, 0));
+  basic_block cont_bb = EDGE_SUCC (expanded_bb, 0)->dest;
+  basic_block merge_bb = split_edge (EDGE_PRED (cont_bb, 0));
+
+  condition_edge->flags &= ~EDGE_FALLTHRU;
+  condition_edge->flags |= EDGE_TRUE_VALUE;
+  make_edge (condition_bb, expanded_bb, EDGE_FALSE_VALUE);
+
+  redirect_edge_succ (EDGE_SUCC (call_bb, 0), merge_bb);
+
+  hsa_cfun->m_need_cfg_update = true;
+
+  hsa_init_new_bb (expanded_bb);
+
+  /* Slow path: function call.  */
+  gen_hsa_insns_for_direct_call (stmt, hsa_init_new_bb (call_bb), false);
+
+  return hsa_bb_for_bb (expanded_bb);
+}
+
+/* Expand a memory copy BUILTIN (BUILT_IN_MEMCPY, BUILT_IN_MEMPCPY) from
+   a gimple STMT and store all necessary instruction to HBB basic block.  */
+
+static void
+expand_memory_copy (gimple *stmt, hsa_bb *hbb, enum built_in_function builtin)
+{
+  tree byte_size = gimple_call_arg (stmt, 2);
+
+  if (!tree_fits_uhwi_p (byte_size))
+    {
+      gen_hsa_insns_for_direct_call (stmt, hbb);
+      return;
+    }
+
+  unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size);
+
+  if (n > HSA_MEMORY_BUILTINS_LIMIT)
+    {
+      gen_hsa_insns_for_direct_call (stmt, hbb);
+      return;
+    }
+
+  tree dst = gimple_call_arg (stmt, 0);
+  tree src = gimple_call_arg (stmt, 1);
+
+  hsa_op_address *dst_addr = get_address_from_value (dst, hbb);
+  hsa_op_address *src_addr = get_address_from_value (src, hbb);
+
+  /* As gen_hsa_memory_copy relies on memory alignment
+     greater or equal to 8 bytes, we need to verify the alignment.  */
+  BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
+  hsa_op_reg *src_addr_reg = new hsa_op_reg (addrtype);
+  hsa_op_reg *dst_addr_reg = new hsa_op_reg (addrtype);
+
+  convert_addr_to_flat_segment (src_addr, src_addr_reg, hbb);
+  convert_addr_to_flat_segment (dst_addr, dst_addr_reg, hbb);
+
+  /* Process BIT OR for source and destination addresses.  */
+  hsa_op_reg *or_reg = new hsa_op_reg (addrtype);
+  gen_hsa_binary_operation (BRIG_OPCODE_OR, or_reg, src_addr_reg,
+			    dst_addr_reg, hbb);
+
+  /* Process BIT AND with 0x7 to identify the desired alignment
+     of 8 bytes.  */
+  hsa_op_reg *masked = new hsa_op_reg (addrtype);
+
+  gen_hsa_binary_operation (BRIG_OPCODE_AND, masked, or_reg,
+			    new hsa_op_immed (7, addrtype), hbb);
+
+  hsa_op_reg *misaligned = new hsa_op_reg (BRIG_TYPE_B1);
+  hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_NE, misaligned->m_type,
+				      misaligned, masked,
+				      new hsa_op_immed (0, masked->m_type)));
+
+  hsa_bb *native_impl_bb
+    = expand_string_operation_builtin (stmt, hbb, misaligned);
+
+  gen_hsa_memory_copy (native_impl_bb, dst_addr, src_addr, n, BRIG_ALIGNMENT_8);
+  hsa_bb *merge_bb
+    = hsa_init_new_bb (EDGE_SUCC (native_impl_bb->m_bb, 0)->dest);
+  expand_lhs_of_string_op (stmt, n, merge_bb, builtin);
+}
+
+
+/* Expand a memory set BUILTIN (BUILT_IN_MEMSET, BUILT_IN_BZERO) from
+   a gimple STMT and store all necessary instruction to HBB basic block.
+   The operation set N bytes with a CONSTANT value.  */
+
+static void
+expand_memory_set (gimple *stmt, unsigned HOST_WIDE_INT n,
+		   unsigned HOST_WIDE_INT constant, hsa_bb *hbb,
+		   enum built_in_function builtin)
+{
+  tree dst = gimple_call_arg (stmt, 0);
+  hsa_op_address *dst_addr = get_address_from_value (dst, hbb);
+
+  /* As gen_hsa_memory_set relies on memory alignment
+     greater or equal to 8 bytes, we need to verify the alignment.  */
+  BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
+  hsa_op_reg *dst_addr_reg = new hsa_op_reg (addrtype);
+  convert_addr_to_flat_segment (dst_addr, dst_addr_reg, hbb);
+
+  /* Process BIT AND with 0x7 to identify the desired alignment
+     of 8 bytes.  */
+  hsa_op_reg *masked = new hsa_op_reg (addrtype);
+
+  gen_hsa_binary_operation (BRIG_OPCODE_AND, masked, dst_addr_reg,
+			    new hsa_op_immed (7, addrtype), hbb);
+
+  hsa_op_reg *misaligned = new hsa_op_reg (BRIG_TYPE_B1);
+  hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_NE, misaligned->m_type,
+				      misaligned, masked,
+				      new hsa_op_immed (0, masked->m_type)));
+
+  hsa_bb *native_impl_bb
+    = expand_string_operation_builtin (stmt, hbb, misaligned);
+
+  gen_hsa_memory_set (native_impl_bb, dst_addr, constant, n, BRIG_ALIGNMENT_8);
+  hsa_bb *merge_bb
+    = hsa_init_new_bb (EDGE_SUCC (native_impl_bb->m_bb, 0)->dest);
+  expand_lhs_of_string_op (stmt, n, merge_bb, builtin);
+}
+
 /* Return string for MEMMODEL.  */
 
 static const char *
@@ -4811,8 +5033,6 @@  gen_hsa_insn_for_internal_fn_call (gcall *stmt, hsa_bb *hbb)
     }
 }
 
-#define HSA_MEMORY_BUILTINS_LIMIT     128
-
 /* Generate HSA instructions for the given call statement STMT.  Instructions
    will be appended to HBB.  */
 
@@ -5169,58 +5389,11 @@  gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
     case BUILT_IN_MEMCPY:
     case BUILT_IN_MEMPCPY:
       {
-	tree byte_size = gimple_call_arg (stmt, 2);
-
-	if (!tree_fits_uhwi_p (byte_size))
-	  {
-	    gen_hsa_insns_for_direct_call (stmt, hbb);
-	    return;
-	  }
-
-	unsigned n = tree_to_uhwi (byte_size);
-
-	if (n > HSA_MEMORY_BUILTINS_LIMIT)
-	  {
-	    gen_hsa_insns_for_direct_call (stmt, hbb);
-	    return;
-	  }
-
-	tree dst = gimple_call_arg (stmt, 0);
-	tree src = gimple_call_arg (stmt, 1);
-
-	hsa_op_address *dst_addr = get_address_from_value (dst, hbb);
-	hsa_op_address *src_addr = get_address_from_value (src, hbb);
-
-	gen_hsa_memory_copy (hbb, dst_addr, src_addr, n, BRIG_ALIGNMENT_1);
-
-	tree lhs = gimple_call_lhs (stmt);
-	if (lhs)
-	  {
-	    hsa_op_reg *lhs_reg = hsa_cfun->reg_for_gimple_ssa (lhs);
-	    hsa_op_with_type *dst_reg = hsa_reg_or_immed_for_gimple_op (dst,
-									hbb);
-	    hsa_op_with_type *tmp;
-
-	    if (builtin == BUILT_IN_MEMPCPY)
-	      {
-		tmp = new hsa_op_reg (dst_reg->m_type);
-		hsa_insn_basic *add
-		  = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type,
-					tmp, dst_reg,
-					new hsa_op_immed (n, dst_reg->m_type));
-		hbb->append_insn (add);
-	      }
-	    else
-	      tmp = dst_reg;
-
-	    hsa_build_append_simple_mov (lhs_reg, tmp, hbb);
-	  }
-
+	expand_memory_copy (stmt, hbb, builtin);
 	break;
       }
     case BUILT_IN_MEMSET:
       {
-	tree dst = gimple_call_arg (stmt, 0);
 	tree c = gimple_call_arg (stmt, 1);
 
 	if (TREE_CODE (c) != INTEGER_CST)
@@ -5237,7 +5410,7 @@  gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
 	    return;
 	  }
 
-	unsigned n = tree_to_uhwi (byte_size);
+	unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size);
 
 	if (n > HSA_MEMORY_BUILTINS_LIMIT)
 	  {
@@ -5245,22 +5418,15 @@  gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
 	    return;
 	  }
 
-	hsa_op_address *dst_addr;
-	dst_addr = get_address_from_value (dst, hbb);
 	unsigned HOST_WIDE_INT constant
 	  = tree_to_uhwi (fold_convert (unsigned_char_type_node, c));
 
-	gen_hsa_memory_set (hbb, dst_addr, constant, n);
-
-	tree lhs = gimple_call_lhs (stmt);
-	if (lhs)
-	  gen_hsa_insns_for_single_assignment (lhs, dst, hbb);
+	expand_memory_set (stmt, n, constant, hbb, builtin);
 
 	break;
       }
     case BUILT_IN_BZERO:
       {
-	tree dst = gimple_call_arg (stmt, 0);
 	tree byte_size = gimple_call_arg (stmt, 1);
 
 	if (!tree_fits_uhwi_p (byte_size))
@@ -5269,7 +5435,7 @@  gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
 	    return;
 	  }
 
-	unsigned n = tree_to_uhwi (byte_size);
+	unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size);
 
 	if (n > HSA_MEMORY_BUILTINS_LIMIT)
 	  {
@@ -5277,10 +5443,7 @@  gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
 	    return;
 	  }
 
-	hsa_op_address *dst_addr;
-	dst_addr = get_address_from_value (dst, hbb);
-
-	gen_hsa_memory_set (hbb, dst_addr, 0, n);
+	expand_memory_set (stmt, n, 0, hbb, builtin);
 
 	break;
       }
@@ -5832,8 +5995,8 @@  LD:    hard_work_3 ();
 
 */
 
-static void
-convert_switch_statements ()
+static bool
+convert_switch_statements (void)
 {
   function *func = DECL_STRUCT_FUNCTION (current_function_decl);
   basic_block bb;
@@ -6023,11 +6186,7 @@  convert_switch_statements ()
   if (dump_file)
     dump_function_to_file (current_function_decl, dump_file, TDF_DETAILS);
 
-  if (need_update)
-    {
-      free_dominance_info (CDI_DOMINATORS);
-      calculate_dominance_info (CDI_DOMINATORS);
-    }
+  return need_update;
 }
 
 /* Expand builtins that can't be handled by HSA back-end.  */
@@ -6127,9 +6286,11 @@  generate_hsa (bool kernel)
   if (hsa_num_threads == NULL)
     emit_hsa_module_variables ();
 
+  bool need_update = convert_switch_statements ();
   /* Initialize hsa_cfun.  */
   hsa_cfun = new hsa_function_representation (cfun->decl, kernel,
-					      SSANAMES (cfun)->length ());
+					      SSANAMES (cfun)->length (),
+					      need_update);
   hsa_cfun->init_extra_bbs ();
 
   if (flag_tm)
@@ -6234,7 +6395,6 @@  pass_gen_hsail::execute (function *)
   hsa_function_summary *s
     = hsa_summaries->get (cgraph_node::get_create (current_function_decl));
 
-  convert_switch_statements ();
   expand_builtins ();
   generate_hsa (s->m_kind == HSA_KERNEL);
   TREE_ASM_WRITTEN (current_function_decl) = 1;
diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c
index 16cde8b..cee0c4a 100644
--- a/gcc/hsa-regalloc.c
+++ b/gcc/hsa-regalloc.c
@@ -700,6 +700,7 @@  regalloc (void)
 void
 hsa_regalloc (void)
 {
+  hsa_cfun->update_cfg ();
   naive_outof_ssa ();
 
   if (dump_file && (dump_flags & TDF_DETAILS))
diff --git a/gcc/hsa.h b/gcc/hsa.h
index ac692fb..3732700 100644
--- a/gcc/hsa.h
+++ b/gcc/hsa.h
@@ -1123,7 +1123,8 @@  class hsa_function_representation
 {
 public:
   hsa_function_representation (tree fdecl, bool kernel_p,
-			       unsigned ssa_names_count);
+			       unsigned ssa_names_count,
+			       bool need_cfg_update = false);
   hsa_function_representation (hsa_internal_fn *fn);
   ~hsa_function_representation ();
 
@@ -1139,6 +1140,9 @@  public:
      so we need hsa_bb instances of them.  */
   void init_extra_bbs ();
 
+  /* Update CFG dominators if m_need_cfg_update flag is set.  */
+  void update_cfg ();
+
   /* Return linkage of the representation.  */
   BrigLinkage8_t get_linkage ();
 
@@ -1219,6 +1223,9 @@  public:
 
   /* SSA names mapping.  */
   vec <hsa_op_reg_p> m_ssa_map;
+
+  /* Flag whether a function needs CFG update before RA.  */
+  bool m_need_cfg_update;
 };
 
 enum hsa_function_kind