diff mbox

[AArch64] Fix PR 63724 - Improve immediate generation

Message ID 20141107120230.GA14486@e105545-lin
State New
Headers show

Commit Message

Ramana Radhakrishnan Nov. 7, 2014, 12:02 p.m. UTC
Hi,

This patch fixes up immediate generation for the AArch64 backend
allowing for the RTL optimizers like CSE and loop hoisting to work
more effectively with immediates. I also took the oppurtunity to
rework this to also be used in the costs calculations. This patch only
deals with numerical constants and handling symbolic constants will be
the subject of another patch. There has been some talk about
restructuring the immediate generation with a trie but I'm going to
leave that for another time.

This requires another patch that James is working on to fix up bsl
code code generation with float mode which was discovered to be broken
causing regressions when testing this code. I've worked around those
failures by pulling out a bsl patch that restructures the floating
point versions with an unspec and found no other issues with this
patch.

The output now generated matches the expected output in the PR. Looked
at output in a number of other benchmarks and saw it making sense.

Tested cross with aarch64-none-elf + a BSL patch with no regressions.
Bootstrapped and regression tested with aarch64-none-linux-gnu.

Ok for trunk once James's BSL patch is committed ?

Ramana

<DATE>  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>

	PR target/63724
	* config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Split out
	   numerical immediate handling to...
	(aarch64_internal_mov_immediate): ...this. New.
   	(aarch64_rtx_costs): Use aarch64_internal_mov_immediate.
	(aarch64_mov_operand_p): Relax predicate.
   	* config/aarch64/aarch64.md (mov<mode>:GPI): Do not expand CONST_INTs.
	(*movsi_aarch64): Turn into define_insn_and_split and new alternative 
   	for 'n'.
	(*movdi_aarch64): Likewise.
commit 34392753bd7f1481eff6ff86e055981618a3d06e
Author: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
Date:   Thu Nov 6 16:08:27 2014 +0000

Comments

Richard Henderson Nov. 7, 2014, 1:36 p.m. UTC | #1
On 11/07/2014 01:02 PM, Ramana Radhakrishnan wrote:
> +	  *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
> +				 (gen_rtx_REG (mode, 0), x, false));
>  	}

Can't you pass NULL for the register when generate is false?


r~
Ramana Radhakrishnan Nov. 7, 2014, 2:09 p.m. UTC | #2
On 07/11/14 13:36, Richard Henderson wrote:
> On 11/07/2014 01:02 PM, Ramana Radhakrishnan wrote:
>> +	  *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
>> +				 (gen_rtx_REG (mode, 0), x, false));
>>   	}
>
> Can't you pass NULL for the register when generate is false?

True, that would work as well. Should have thought of that.

regards
Ramana


>
>
> r~
>
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 736ad90..20cbb2d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1046,8 +1046,8 @@  aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
   return plus_constant (mode, reg, offset);
 }
 
-void
-aarch64_expand_mov_immediate (rtx dest, rtx imm)
+static int
+aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate)
 {
   machine_mode mode = GET_MODE (dest);
   unsigned HOST_WIDE_INT mask;
@@ -1057,85 +1057,14 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
   bool subtargets;
   rtx subtarget;
   int one_match, zero_match, first_not_ffff_match;
-
-  gcc_assert (mode == SImode || mode == DImode);
-
-  /* Check on what type of symbol it is.  */
-  if (GET_CODE (imm) == SYMBOL_REF
-      || GET_CODE (imm) == LABEL_REF
-      || GET_CODE (imm) == CONST)
-    {
-      rtx mem, base, offset;
-      enum aarch64_symbol_type sty;
-
-      /* If we have (const (plus symbol offset)), separate out the offset
-	 before we start classifying the symbol.  */
-      split_const (imm, &base, &offset);
-
-      sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
-      switch (sty)
-	{
-	case SYMBOL_FORCE_TO_MEM:
-	  if (offset != const0_rtx
-	      && targetm.cannot_force_const_mem (mode, imm))
-	    {
-	      gcc_assert (can_create_pseudo_p ());
-	      base = aarch64_force_temporary (mode, dest, base);
-	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
-	      aarch64_emit_move (dest, base);
-	      return;
-	    }
-	  mem = force_const_mem (ptr_mode, imm);
-	  gcc_assert (mem);
-	  if (mode != ptr_mode)
-	    mem = gen_rtx_ZERO_EXTEND (mode, mem);
-	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
-	  return;
-
-        case SYMBOL_SMALL_TLSGD:
-        case SYMBOL_SMALL_TLSDESC:
-        case SYMBOL_SMALL_GOTTPREL:
-	case SYMBOL_SMALL_GOT:
-	case SYMBOL_TINY_GOT:
-	  if (offset != const0_rtx)
-	    {
-	      gcc_assert(can_create_pseudo_p ());
-	      base = aarch64_force_temporary (mode, dest, base);
-	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
-	      aarch64_emit_move (dest, base);
-	      return;
-	    }
-	  /* FALLTHRU */
-
-        case SYMBOL_SMALL_TPREL:
-	case SYMBOL_SMALL_ABSOLUTE:
-	case SYMBOL_TINY_ABSOLUTE:
-	  aarch64_load_symref_appropriately (dest, imm, sty);
-	  return;
-
-	default:
-	  gcc_unreachable ();
-	}
-    }
+  int num_insns = 0;
 
   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
     {
-      emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
-      return;
-    }
-
-  if (!CONST_INT_P (imm))
-    {
-      if (GET_CODE (imm) == HIGH)
+      if (generate)
 	emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
-      else
-        {
-	  rtx mem = force_const_mem (mode, imm);
-	  gcc_assert (mem);
-	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
-	}
-
-      return;
+      num_insns++;
+      return num_insns;
     }
 
   if (mode == SImode)
@@ -1143,10 +1072,15 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
       /* We know we can't do this in 1 insn, and we must be able to do it
 	 in two; so don't mess around looking for sequences that don't buy
 	 us anything.  */
-      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
-      emit_insn (gen_insv_immsi (dest, GEN_INT (16),
-				 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
-      return;
+      if (generate)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, dest,
+				  GEN_INT (INTVAL (imm) & 0xffff)));
+	  emit_insn (gen_insv_immsi (dest, GEN_INT (16),
+				     GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
+	}
+      num_insns += 2;
+      return num_insns;
     }
 
   /* Remaining cases are all for DImode.  */
@@ -1176,11 +1110,15 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
     {
       /* Set one of the quarters and then insert back into result.  */
       mask = 0xffffll << first_not_ffff_match;
-      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
-      emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
-				 GEN_INT ((val >> first_not_ffff_match)
-					  & 0xffff)));
-      return;
+      if (generate)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
+	  emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
+				     GEN_INT ((val >> first_not_ffff_match)
+					      & 0xffff)));
+	}
+      num_insns += 2;
+      return num_insns;
     }
 
   if (zero_match == 2)
@@ -1195,40 +1133,57 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	{
 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 
-	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
-	  emit_insn (gen_adddi3 (dest, subtarget,
-				 GEN_INT (val - (val & mask))));
-	  return;
+	  if (generate)
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				      GEN_INT (val & mask)));
+	      emit_insn (gen_adddi3 (dest, subtarget,
+				     GEN_INT (val - (val & mask))));
+	    }
+	  num_insns += 2;
+	  return num_insns;
 	}
       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
 	{
 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 
-	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-				  GEN_INT ((val + comp) & mask)));
-	  emit_insn (gen_adddi3 (dest, subtarget,
-				 GEN_INT (val - ((val + comp) & mask))));
-	  return;
+	  if (generate)
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				      GEN_INT ((val + comp) & mask)));
+	      emit_insn (gen_adddi3 (dest, subtarget,
+				     GEN_INT (val - ((val + comp) & mask))));
+	    }
+	  num_insns += 2;
+	  return num_insns;
 	}
       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
 	{
 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 
-	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-				  GEN_INT ((val - comp) | ~mask)));
-	  emit_insn (gen_adddi3 (dest, subtarget,
-				 GEN_INT (val - ((val - comp) | ~mask))));
-	  return;
+	  if (generate)
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				      GEN_INT ((val - comp) | ~mask)));
+	      emit_insn (gen_adddi3 (dest, subtarget,
+				     GEN_INT (val - ((val - comp) | ~mask))));
+	    }
+	  num_insns += 2;
+	  return num_insns;
 	}
       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
 	{
 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 
-	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-				  GEN_INT (val | ~mask)));
-	  emit_insn (gen_adddi3 (dest, subtarget,
-				 GEN_INT (val - (val | ~mask))));
-	  return;
+	  if (generate)
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				      GEN_INT (val | ~mask)));
+	      emit_insn (gen_adddi3 (dest, subtarget,
+				     GEN_INT (val - (val | ~mask))));
+	    }
+	  num_insns += 2;
+	  return num_insns;
 	}
     }
 
@@ -1243,22 +1198,30 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	  || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
 	{
 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-				  GEN_INT (aarch64_bitmasks[i])));
-	  emit_insn (gen_adddi3 (dest, subtarget,
-				 GEN_INT (val - aarch64_bitmasks[i])));
-	  return;
+	  if (generate)
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				      GEN_INT (aarch64_bitmasks[i])));
+	      emit_insn (gen_adddi3 (dest, subtarget,
+				     GEN_INT (val - aarch64_bitmasks[i])));
+	    }
+	  num_insns += 2;
+	  return num_insns;
 	}
 
       for (j = 0; j < 64; j += 16, mask <<= 16)
 	{
 	  if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
 	    {
-	      emit_insn (gen_rtx_SET (VOIDmode, dest,
-				      GEN_INT (aarch64_bitmasks[i])));
-	      emit_insn (gen_insv_immdi (dest, GEN_INT (j),
-					 GEN_INT ((val >> j) & 0xffff)));
-	      return;
+	      if (generate)
+		{
+		  emit_insn (gen_rtx_SET (VOIDmode, dest,
+					  GEN_INT (aarch64_bitmasks[i])));
+		  emit_insn (gen_insv_immdi (dest, GEN_INT (j),
+					     GEN_INT ((val >> j) & 0xffff)));
+		}
+	      num_insns += 2;
+	      return num_insns;
 	    }
 	}
     }
@@ -1274,11 +1237,15 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	    if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
 	      {
 		subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-		emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-					GEN_INT (aarch64_bitmasks[i])));
-		emit_insn (gen_iordi3 (dest, subtarget,
-				       GEN_INT (aarch64_bitmasks[j])));
-		return;
+		if (generate)
+		  {
+		    emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+					    GEN_INT (aarch64_bitmasks[i])));
+		    emit_insn (gen_iordi3 (dest, subtarget,
+					   GEN_INT (aarch64_bitmasks[j])));
+		  }
+		num_insns += 2;
+		return num_insns;
 	      }
 	}
       else if ((val & aarch64_bitmasks[i]) == val)
@@ -1290,11 +1257,15 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	      {
 
 		subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-		emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-					GEN_INT (aarch64_bitmasks[j])));
-		emit_insn (gen_anddi3 (dest, subtarget,
-				       GEN_INT (aarch64_bitmasks[i])));
-		return;
+		if (generate)
+		  {
+		    emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+					    GEN_INT (aarch64_bitmasks[j])));
+		    emit_insn (gen_anddi3 (dest, subtarget,
+					   GEN_INT (aarch64_bitmasks[i])));
+		  }
+		num_insns += 2;
+		return num_insns;
 	      }
 	}
     }
@@ -1303,18 +1274,24 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
     {
       /* Set either first three quarters or all but the third.	 */
       mask = 0xffffll << (16 - first_not_ffff_match);
-      emit_insn (gen_rtx_SET (VOIDmode, dest,
-			      GEN_INT (val | mask | 0xffffffff00000000ull)));
+      if (generate)
+	emit_insn (gen_rtx_SET (VOIDmode, dest,
+				GEN_INT (val | mask | 0xffffffff00000000ull)));
+      num_insns ++;
 
       /* Now insert other two quarters.	 */
       for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
 	   i < 64; i += 16, mask <<= 16)
 	{
 	  if ((val & mask) != mask)
-	    emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-				       GEN_INT ((val >> i) & 0xffff)));
+	    {
+	      if (generate)
+		emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+					   GEN_INT ((val >> i) & 0xffff)));
+	      num_insns ++;
+	    }
 	}
-      return;
+      return num_insns;
     }
 
  simple_sequence:
@@ -1326,15 +1303,106 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	{
 	  if (first)
 	    {
-	      emit_insn (gen_rtx_SET (VOIDmode, dest,
-				      GEN_INT (val & mask)));
+	      if (generate)
+		emit_insn (gen_rtx_SET (VOIDmode, dest,
+					GEN_INT (val & mask)));
+	      num_insns ++;
 	      first = false;
 	    }
 	  else
-	    emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-				       GEN_INT ((val >> i) & 0xffff)));
+	    {
+	      if (generate)
+		emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+					   GEN_INT ((val >> i) & 0xffff)));
+	      num_insns ++;
+	    }
 	}
     }
+
+  return num_insns;
+}
+
+
+void
+aarch64_expand_mov_immediate (rtx dest, rtx imm)
+{
+  machine_mode mode = GET_MODE (dest);
+
+  gcc_assert (mode == SImode || mode == DImode);
+
+  /* Check on what type of symbol it is.  */
+  if (GET_CODE (imm) == SYMBOL_REF
+      || GET_CODE (imm) == LABEL_REF
+      || GET_CODE (imm) == CONST)
+    {
+      rtx mem, base, offset;
+      enum aarch64_symbol_type sty;
+
+      /* If we have (const (plus symbol offset)), separate out the offset
+	 before we start classifying the symbol.  */
+      split_const (imm, &base, &offset);
+
+      sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
+      switch (sty)
+	{
+	case SYMBOL_FORCE_TO_MEM:
+	  if (offset != const0_rtx
+	      && targetm.cannot_force_const_mem (mode, imm))
+	    {
+	      gcc_assert (can_create_pseudo_p ());
+	      base = aarch64_force_temporary (mode, dest, base);
+	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
+	      aarch64_emit_move (dest, base);
+	      return;
+	    }
+	  mem = force_const_mem (ptr_mode, imm);
+	  gcc_assert (mem);
+	  if (mode != ptr_mode)
+	    mem = gen_rtx_ZERO_EXTEND (mode, mem);
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
+	  return;
+
+        case SYMBOL_SMALL_TLSGD:
+        case SYMBOL_SMALL_TLSDESC:
+        case SYMBOL_SMALL_GOTTPREL:
+	case SYMBOL_SMALL_GOT:
+	case SYMBOL_TINY_GOT:
+	  if (offset != const0_rtx)
+	    {
+	      gcc_assert(can_create_pseudo_p ());
+	      base = aarch64_force_temporary (mode, dest, base);
+	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
+	      aarch64_emit_move (dest, base);
+	      return;
+	    }
+	  /* FALLTHRU */
+
+        case SYMBOL_SMALL_TPREL:
+	case SYMBOL_SMALL_ABSOLUTE:
+	case SYMBOL_TINY_ABSOLUTE:
+	  aarch64_load_symref_appropriately (dest, imm, sty);
+	  return;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if (!CONST_INT_P (imm))
+    {
+      if (GET_CODE (imm) == HIGH)
+	emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
+      else
+        {
+	  rtx mem = force_const_mem (mode, imm);
+	  gcc_assert (mem);
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
+	}
+
+      return;
+    }
+
+  aarch64_internal_mov_immediate (dest, imm, true);
 }
 
 static bool
@@ -5234,9 +5302,8 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 	     proportionally expensive to the number of instructions
 	     required to build that constant.  This is true whether we
 	     are compiling for SPEED or otherwise.  */
-	  *cost = COSTS_N_INSNS (aarch64_build_constant (0,
-							 INTVAL (x),
-							 false));
+	  *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
+				 (gen_rtx_REG (mode, 0), x, false));
 	}
       return true;
 
@@ -8035,7 +8102,7 @@  aarch64_mov_operand_p (rtx x,
       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
     return true;
 
-  if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
+  if (CONST_INT_P (x))
     return true;
 
   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 17570ba..142e8b1 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -746,17 +746,20 @@ 
     if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
       operands[1] = force_reg (<MODE>mode, operands[1]);
 
-    if (CONSTANT_P (operands[1]))
-      {
-	aarch64_expand_mov_immediate (operands[0], operands[1]);
-	DONE;
-      }
+    /* FIXME: RR we still need to fix up what we are doing with
+       symbol_refs and other types of constants.  */
+    if (CONSTANT_P (operands[1])
+        && !CONST_INT_P (operands[1]))
+     {
+       aarch64_expand_mov_immediate (operands[0], operands[1]);
+       DONE;
+     }
   "
 )
 
-(define_insn "*movsi_aarch64"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m,  m,r,r  ,*w, r,*w")
-	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
+(define_insn_and_split "*movsi_aarch64"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r  ,*w, r,*w")
+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
   "(register_operand (operands[0], SImode)
     || aarch64_reg_or_zero (operands[1], SImode))"
   "@
@@ -764,6 +767,7 @@ 
    mov\\t%w0, %w1
    mov\\t%w0, %w1
    mov\\t%w0, %1
+   #
    ldr\\t%w0, %1
    ldr\\t%s0, %1
    str\\t%w1, %0
@@ -773,14 +777,20 @@ 
    fmov\\t%s0, %w1
    fmov\\t%w0, %s1
    fmov\\t%s0, %s1"
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
+   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)"
+   [(const_int 0)]
+   "{
+       aarch64_expand_mov_immediate (operands[0], operands[1]);
+       DONE;
+    }"
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
                      adr,adr,f_mcr,f_mrc,fmov")
-   (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
+   (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
 )
 
-(define_insn "*movdi_aarch64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m,  m,r,r,  *w, r,*w,w")
-	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
+(define_insn_and_split "*movdi_aarch64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r,  *w, r,*w,w")
+	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
   "(register_operand (operands[0], DImode)
     || aarch64_reg_or_zero (operands[1], DImode))"
   "@
@@ -788,6 +798,7 @@ 
    mov\\t%0, %x1
    mov\\t%x0, %1
    mov\\t%x0, %1
+   #
    ldr\\t%x0, %1
    ldr\\t%d0, %1
    str\\t%x1, %0
@@ -798,10 +809,16 @@ 
    fmov\\t%x0, %d1
    fmov\\t%d0, %d1
    movi\\t%d0, %1"
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
+   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))"
+   [(const_int 0)]
+   "{
+       aarch64_expand_mov_immediate (operands[0], operands[1]);
+       DONE;
+    }"
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
                      adr,adr,f_mcr,f_mrc,fmov,fmov")
-   (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
-   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
+   (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
 )
 
 (define_insn "insv_imm<mode>"