diff mbox

[AArch64] vec_pack_trunc_<mode> should split after register allocator

Message ID CO2PR07MB269349D65A1FA849EAEC224C83B80@CO2PR07MB2693.namprd07.prod.outlook.com
State New
Headers show

Commit Message

Hurugalawadi, Naveen July 25, 2017, 7:30 a.m. UTC
Hi,

>> I think we can split this whenever we like, and
>> that there isn't any benefit in keeping the pair together?

Thanks for the review and your views.

The patch is modified as per your suggestion.

Please review the patch and let me know if its okay?

Bootstrapped and Regression done on AArch64-Thunder-Linux.

Thanks,
Naveen

Comments

James Greenhalgh July 25, 2017, 8:57 a.m. UTC | #1
On Tue, Jul 25, 2017 at 07:30:49AM +0000, Hurugalawadi, Naveen wrote:
> Hi,
> 
> >> I think we can split this whenever we like, and
> >> that there isn't any benefit in keeping the pair together?
> 
> Thanks for the review and your views.

Thanks for the updated patch, and sorry that I haven't been clear in what
I was asking for.

I was wondering why we could not use an insn_and_split without the
reload_completed guard - there is probably still value in allowing other
optimisation passes to see that we can support:

   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "")
         (vec_concat:<VNARROWQ2>
	   (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
	   (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]

but an expand pattern is not going to permit that.

Could you switch this back to an insn_and_split as it was in the previous
patch, and just drop the && reload_completed ?

Thanks,
James

> 
> The patch is modified as per your suggestion.
> 
> Please review the patch and let me know if its okay?
> 
> Bootstrapped and Regression done on AArch64-Thunder-Linux.
> 
> Thanks,
> Naveen   

> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 1cb6eeb..a41edad 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -1291,6 +1291,18 @@
>    [(set_attr "type" "neon_shift_imm_narrow_q")]
>  )
>  
> +(define_insn "aarch64_simd_vec_pack_trunc_hi_<mode>"
> + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
> +       (vec_concat:<VNARROWQ2>
> +	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
> +	 (vec_select:<VNARROWQ>
> +	   (match_operand:<VNARROWQ2> 3 "register_operand" "0")
> +	   (match_operand:<VNARROWQ2> 2 "vect_par_cnst_hi_half" ""))))]
> + "TARGET_SIMD"
> + "xtn2\\t%0.<V2ntype>, %1.<Vtype>"
> +  [(set_attr "type" "neon_shift_imm_narrow_q")]
> +)
> +
>  (define_expand "vec_pack_trunc_<mode>"
>   [(match_operand:<VNARROWD> 0 "register_operand" "")
>    (match_operand:VDN 1 "register_operand" "")
> @@ -1309,20 +1321,39 @@
>  
>  ;; For quads.
>  
> -(define_insn "vec_pack_trunc_<mode>"
> - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
> +(define_expand "vec_pack_trunc_<mode>"
> + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "")
>         (vec_concat:<VNARROWQ2>
> -	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
> -	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
> +	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" ""))
> +	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" ""))))]
>   "TARGET_SIMD"
>   {
>     if (BYTES_BIG_ENDIAN)
> -     return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
> +     {
> +       rtx low_part = gen_lowpart (<VNARROWQ>mode, operands[0]);
> +       emit_insn (gen_aarch64_simd_vec_pack_trunc_<mode> (low_part,
> +							  operands[2]));
> +       rtx high_part = aarch64_simd_vect_par_cnst_half (<VNARROWQ2>mode,
> +							true);
> +       emit_insn (gen_aarch64_simd_vec_pack_trunc_hi_<mode> (operands[0],
> +							     operands[1],
> +							     high_part,
> +							     operands[0]));
> +     }
>     else
> -     return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
> +     {
> +       rtx low_part = gen_lowpart (<VNARROWQ>mode, operands[0]);
> +       emit_insn (gen_aarch64_simd_vec_pack_trunc_<mode> (low_part,
> +							  operands[1]));
> +       rtx high_part = aarch64_simd_vect_par_cnst_half (<VNARROWQ2>mode,
> +							true);
> +       emit_insn (gen_aarch64_simd_vec_pack_trunc_hi_<mode> (operands[0],
> +							     operands[2],
> +							     high_part,
> +							     operands[0]));
> +     }
> +   DONE;
>   }
> -  [(set_attr "type" "multiple")
> -   (set_attr "length" "8")]
>  )
>  
>  ;; Widening operations.
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1cb6eeb..a41edad 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1291,6 +1291,18 @@ 
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
+(define_insn "aarch64_simd_vec_pack_trunc_hi_<mode>"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+       (vec_concat:<VNARROWQ2>
+	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
+	 (vec_select:<VNARROWQ>
+	   (match_operand:<VNARROWQ2> 3 "register_operand" "0")
+	   (match_operand:<VNARROWQ2> 2 "vect_par_cnst_hi_half" ""))))]
+ "TARGET_SIMD"
+ "xtn2\\t%0.<V2ntype>, %1.<Vtype>"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
 (define_expand "vec_pack_trunc_<mode>"
  [(match_operand:<VNARROWD> 0 "register_operand" "")
   (match_operand:VDN 1 "register_operand" "")
@@ -1309,20 +1321,39 @@ 
 
 ;; For quads.
 
-(define_insn "vec_pack_trunc_<mode>"
- [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
+(define_expand "vec_pack_trunc_<mode>"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "")
        (vec_concat:<VNARROWQ2>
-	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
-	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
+	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" ""))
+	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" ""))))]
  "TARGET_SIMD"
  {
    if (BYTES_BIG_ENDIAN)
-     return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
+     {
+       rtx low_part = gen_lowpart (<VNARROWQ>mode, operands[0]);
+       emit_insn (gen_aarch64_simd_vec_pack_trunc_<mode> (low_part,
+							  operands[2]));
+       rtx high_part = aarch64_simd_vect_par_cnst_half (<VNARROWQ2>mode,
+							true);
+       emit_insn (gen_aarch64_simd_vec_pack_trunc_hi_<mode> (operands[0],
+							     operands[1],
+							     high_part,
+							     operands[0]));
+     }
    else
-     return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
+     {
+       rtx low_part = gen_lowpart (<VNARROWQ>mode, operands[0]);
+       emit_insn (gen_aarch64_simd_vec_pack_trunc_<mode> (low_part,
+							  operands[1]));
+       rtx high_part = aarch64_simd_vect_par_cnst_half (<VNARROWQ2>mode,
+							true);
+       emit_insn (gen_aarch64_simd_vec_pack_trunc_hi_<mode> (operands[0],
+							     operands[2],
+							     high_part,
+							     operands[0]));
+     }
+   DONE;
  }
-  [(set_attr "type" "multiple")
-   (set_attr "length" "8")]
 )
 
 ;; Widening operations.