Patchwork [AArch64] Implement vector float->double widening and double->float narrowing.

login
register
mail settings
Submitter James Greenhalgh
Date April 26, 2013, 1:25 p.m.
Message ID <1366982721-22789-1-git-send-email-james.greenhalgh@arm.com>
Download mbox | patch
Permalink /patch/239873/
State New
Headers show

Comments

James Greenhalgh - April 26, 2013, 1:25 p.m.
Hi,

gcc.dg/vect/vect-float-truncate-1.c and
gcc.dg/vect/vect-float-extend-1.c

Were failing because widening and narrowing of floats to doubles was
not wired up.

This patch fixes that by implementing the standard names:

vec_pack_trunc_v2df
Taking two vectors of V2DFmode and returning one vector of V4SF mode.

`vec_unpacks_float_hi_v4sf', `vec_unpacks_float_lo_v4sf'
Taking one vector of V4SF mode and splitting it to two vectors of V2DF mode.

Patch regression tested on aarch64-none-elf with no regressions,
and shown to fix the bug.

Thanks,
James
---
gcc/

2013-04-26  James Greenhalgh  <james.greenhalgh@arm.com>

	* config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi_): New.
	(float_truncate_hi_): Likewise.
	(float_extend_lo_): Likewise.
	(float_truncate_lo_): Likewise.
	* config/aarch64/aarch64-simd.md (vec_unpacks_lo_v4sf): New.
	(aarch64_float_extend_lo_v2df): Likewise.
	(vec_unpacks_hi_v4sf): Likewise.
	(aarch64_float_truncate_lo_v2sf): Likewise.
	(aarch64_float_truncate_hi_v4sf): Likewise.
	(vec_pack_trunc_v2df): Likewise.
	(vec_pack_trunc_df): Likewise.
Marcus Shawcroft - April 29, 2013, 9:54 a.m.
On 26 April 2013 14:25, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>
> Hi,
>
> gcc.dg/vect/vect-float-truncate-1.c and
> gcc.dg/vect/vect-float-extend-1.c
>
> Were failing because widening and narrowing of floats to doubles was
> not wired up.
>
> This patch fixes that by implementing the standard names:
>
> vec_pack_trunc_v2df
> Taking two vectors of V2DFmode and returning one vector of V4SF mode.
>
> `vec_unpacks_float_hi_v4sf', `vec_unpacks_float_lo_v4sf'
> Taking one vector of V4SF mode and splitting it to two vectors of V2DF mode.
>
> Patch regression tested on aarch64-none-elf with no regressions,
> and shown to fix the bug.
>
> Thanks,
> James
> ---
> gcc/
>
> 2013-04-26  James Greenhalgh  <james.greenhalgh@arm.com>
>
>         * config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi_): New.
>         (float_truncate_hi_): Likewise.
>         (float_extend_lo_): Likewise.
>         (float_truncate_lo_): Likewise.
>         * config/aarch64/aarch64-simd.md (vec_unpacks_lo_v4sf): New.
>         (aarch64_float_extend_lo_v2df): Likewise.
>         (vec_unpacks_hi_v4sf): Likewise.
>         (aarch64_float_truncate_lo_v2sf): Likewise.
>         (aarch64_float_truncate_hi_v4sf): Likewise.
>         (vec_pack_trunc_v2df): Likewise.
>         (vec_pack_trunc_df): Likewise.

OK
/Marcus

Patch

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 029e091..2aa9877 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -338,3 +338,9 @@ 
   BUILTIN_VDQF (BINOP, frecps, 0)
 
   BUILTIN_VDQF (UNOP, abs, 2)
+
+  VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
+  VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
+
+  VAR1 (UNOP, float_extend_lo_, 0, v2df)
+  VAR1 (UNOP, float_truncate_lo_, 0, v2sf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 067c849..4546094 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1267,6 +1267,108 @@ 
    (set_attr "simd_mode" "<MODE>")]
 )
 
+;; Conversions between vectors of floats and doubles.
+;; Contains a mix of patterns to match standard pattern names
+;; and those for intrinsics.
+
+;; Float widening operations.
+
+(define_insn "vec_unpacks_lo_v4sf"
+  [(set (match_operand:V2DF 0 "register_operand" "=w")
+	(float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "register_operand" "w")
+	    (parallel [(const_int 0) (const_int 1)])
+	  )))]
+  "TARGET_SIMD"
+  "fcvtl\\t%0.2d, %1.2s"
+  [(set_attr "simd_type" "simd_fcvtl")
+   (set_attr "simd_mode" "V2DF")]
+)
+
+(define_insn "aarch64_float_extend_lo_v2df"
+  [(set (match_operand:V2DF 0 "register_operand" "=w")
+	(float_extend:V2DF
+	  (match_operand:V2SF 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "fcvtl\\t%0.2d, %1.2s"
+  [(set_attr "simd_type" "simd_fcvtl")
+   (set_attr "simd_mode" "V2DF")]
+)
+
+(define_insn "vec_unpacks_hi_v4sf"
+  [(set (match_operand:V2DF 0 "register_operand" "=w")
+	(float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "register_operand" "w")
+	    (parallel [(const_int 2) (const_int 3)])
+	  )))]
+  "TARGET_SIMD"
+  "fcvtl2\\t%0.2d, %1.4s"
+  [(set_attr "simd_type" "simd_fcvtl")
+   (set_attr "simd_mode" "V2DF")]
+)
+
+;; Float narrowing operations.
+
+(define_insn "aarch64_float_truncate_lo_v2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=w")
+      (float_truncate:V2SF
+	(match_operand:V2DF 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "fcvtn\\t%0.2s, %1.2d"
+  [(set_attr "simd_type" "simd_fcvtl")
+   (set_attr "simd_mode" "V2SF")]
+)
+
+(define_insn "aarch64_float_truncate_hi_v4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+    (vec_concat:V4SF
+      (match_operand:V2SF 1 "register_operand" "0")
+      (float_truncate:V2SF
+	(match_operand:V2DF 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "fcvtn2\\t%0.4s, %2.2d"
+  [(set_attr "simd_type" "simd_fcvtl")
+   (set_attr "simd_mode" "V4SF")]
+)
+
+(define_expand "vec_pack_trunc_v2df"
+  [(set (match_operand:V4SF 0 "register_operand")
+      (vec_concat:V4SF
+	(float_truncate:V2SF
+	    (match_operand:V2DF 1 "register_operand"))
+	(float_truncate:V2SF
+	    (match_operand:V2DF 2 "register_operand"))
+	  ))]
+  "TARGET_SIMD"
+  {
+    rtx tmp = gen_reg_rtx (V2SFmode);
+    emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[1]));
+    emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
+						   tmp, operands[2]));
+    DONE;
+  }
+)
+
+(define_expand "vec_pack_trunc_df"
+  [(set (match_operand:V2SF 0 "register_operand")
+      (vec_concat:V2SF
+	(float_truncate:SF
+	    (match_operand:DF 1 "register_operand"))
+	(float_truncate:SF
+	    (match_operand:DF 2 "register_operand"))
+	  ))]
+  "TARGET_SIMD"
+  {
+    rtx tmp = gen_reg_rtx (V2SFmode);
+    emit_insn (gen_move_lo_quad_v2df (tmp, operands[1]));
+    emit_insn (gen_move_hi_quad_v2df (tmp, operands[2]));
+    emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
+    DONE;
+  }
+)
+
 (define_insn "aarch64_vmls<mode>"
   [(set (match_operand:VDQF 0 "register_operand" "=w")
        (minus:VDQF (match_operand:VDQF 1 "register_operand" "0")