diff mbox

[AArch64] Use MOVN to generate 64-bit negative immediates where sensible

Message ID 53EB8477.80208@arm.com
State New
Headers show

Commit Message

Kyrylo Tkachov Aug. 13, 2014, 3:29 p.m. UTC
On 07/08/14 20:32, Richard Henderson wrote:
> On 08/07/2014 02:57 AM, Kyrill Tkachov wrote:
>> +  if (one_match > zero_match)
>> +    {
>> +      /* Set either first three quarters or all but the third.	 */
>> +      mask = 0xffffll << (16 - first_not_ffff_match);
>> +      emit_insn (gen_rtx_SET (VOIDmode, dest,
>> +			      GEN_INT (val | mask | 0xffffffff00000000ull)));
>> +
>> +      /* Now insert other two quarters.	 */
>> +      for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
>> +	   i < 64; i += 16, mask <<= 16)
>>   	{
>>   	  if ((val & mask) != mask)
>> +	    emit_insn (gen_insv_immdi (dest, GEN_INT (i),
>> +				       GEN_INT ((val >> i) & 0xffff)));
>>   	}
>> +      return;
>>       }
>>   
>>     if (zero_match == 2)
> You should not place this three instruction sequence before the two instruction
> sequences that follow.  I.e. place this just before simple_sequence.
Hi Richard,

Is the attached patch ok? It just moves the section as you suggested. I 
did a build of the Linux kernel with and without this patch to make sure 
no code-gen was accidentally affected.

>
> I do wonder if we should be memo-izing these computations so that we only have
> to do the complex search for a sequence only once for each constant...

We'd need to store a mapping from constant to RTXes and everytime we 
have a "cache hit" we'd have to tweak them to make sure the registers 
involved are correct. I had a quick play with this but ended up with LRA 
ICEs  :(
Might look at it later on, but it's not high on my priorities right now.

Thanks,
Kyrill

2014-08-13  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Move
     one_match > zero_match case to just before simple_sequence.

Comments

Richard Henderson Aug. 14, 2014, 5:31 p.m. UTC | #1
On 08/13/2014 05:29 AM, Kyrill Tkachov wrote:
> Is the attached patch ok? It just moves the section as you suggested. I did a
> build of the Linux kernel with and without this patch to make sure no code-gen
> was accidentally affected.

Looks good.

> We'd need to store a mapping from constant to RTXes and everytime we have a
> "cache hit" we'd have to tweak them to make sure the registers involved are
> correct. I had a quick play with this but ended up with LRA ICEs  :(

You mis-understand how I meant to memoize.  Have a look at how we cache
expansions for multiply in synth_mult: we don't record registers or rtx's, but
we do record the operation and arguments.

So you could consider building a trie, indexed by a hash.

struct imm_algorithm
{
  HOST_WIDE_INT op1;
  imm_algorithm *prev;

  enum operation {
    // op1 is accepted by aarch64_mov_operand.
    // prev should be null.
    mov,
    // op1 is to be inserted at the given position
    // to the value constructed by prev.
    movk_48, movk_32, movk_16, movk_0,
    // op1 is an arithmetic immediate to be applied
    // to the value constructed by prev
    add, sub,
    // op1 is a logical immediate to be applied to
    // the value constructed by prev
    and, ior,
  } code;
};


r~
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 20debb9..a4e7158 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1136,24 +1136,6 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
       return;
     }
 
-  if (one_match > zero_match)
-    {
-      /* Set either first three quarters or all but the third.	 */
-      mask = 0xffffll << (16 - first_not_ffff_match);
-      emit_insn (gen_rtx_SET (VOIDmode, dest,
-			      GEN_INT (val | mask | 0xffffffff00000000ull)));
-
-      /* Now insert other two quarters.	 */
-      for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
-	   i < 64; i += 16, mask <<= 16)
-	{
-	  if ((val & mask) != mask)
-	    emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-				       GEN_INT ((val >> i) & 0xffff)));
-	}
-      return;
-    }
-
   if (zero_match == 2)
     goto simple_sequence;
 
@@ -1270,6 +1252,24 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	}
     }
 
+  if (one_match > zero_match)
+    {
+      /* Set either first three quarters or all but the third.	 */
+      mask = 0xffffll << (16 - first_not_ffff_match);
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      GEN_INT (val | mask | 0xffffffff00000000ull)));
+
+      /* Now insert other two quarters.	 */
+      for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
+	   i < 64; i += 16, mask <<= 16)
+	{
+	  if ((val & mask) != mask)
+	    emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+				       GEN_INT ((val >> i) & 0xffff)));
+	}
+      return;
+    }
+
  simple_sequence:
   first = true;
   mask = 0xffff;