diff mbox

[7/17,ARM] Add FP16 data movement instructions.

Message ID 573B2BD8.70901@foss.arm.com
State New
Headers show

Commit Message

Matthew Wahab May 17, 2016, 2:34 p.m. UTC
The ARMv8.2-A FP16 extension adds a number of instructions to support
data movement for FP16 values. This patch adds these instructions to the
backend, making them available to the compiler code generator.

The new instructions include VSEL which selects between two registers
depending on a condition. This is used to support conditional data
movement which can depend on the result of comparisons between
half-precision values. These comparisons are always done by conversion
to single-precision.

Tested the series for arm-none-linux-gnueabihf with native bootstrap and
make check and for arm-none-eabi and armeb-none-eabi with make check on
an ARMv8.2-A emulator. This patch also tested for
arm-none-linux-gnueabihf with native bootstrap and make check and for
arm-none-eabi with check-gcc on an ARMv8.2-A emulator.

Ok for trunk?
Matthew

2016-05-17  Matthew Wahab  <matthew.wahab@arm.com>
	    Jiong Wang  <jiong.wang@arm.com>

	* config/arm/arm.c (coproc_secondary_reload_class): Make HFmode
	available when FP16 instructions are available.
	(output_mov_vfp): Add support for 16-bit data moves.
	(arm_validize_comparison): Fix some white-space.  Support HFmode
	by conversion to SFmode.
	* config/arm/arm.md (truncdfhf2): Fix a comment.
	(extendhfdf2): Likewise.
	(cstorehf4): New.
	(movsicc): Fix some white-space.
	(movhfcc): New.
	(movsfcc): Fix some white-space.
	(*cmovhf): New.
	* config/arm/vfp.md (*arm_movhi_vfp): Disable when VFP FP16
	instructions are available.
	(*thumb_movhi_vfp): Likewise.
	(*arm_movhi_fp16): New.
	(*thumb_movhi_fp16): New.
	(*movhf_vfp_fp16): New.
	(*movhf_vfp_neon): Disable when VFP FP16 instructions are
	available.
	(*movhf_vfp): Likewise.
	(extendhfsf2): Enable when VFP FP16 instructions are available.
	(truncsfhf2):  Enable when VFP FP16 instructions are available.

testsuite/
2016-05-17  Matthew Wahab  <matthew.wahab@arm.com>

	* gcc.target/arm/armv8_2_fp16-move-1.c: New.
diff mbox

Patch

From 83268813cf9aa59940ed17d623606c9e485f6ecf Mon Sep 17 00:00:00 2001
From: Matthew Wahab <matthew.wahab@arm.com>
Date: Thu, 7 Apr 2016 13:35:04 +0100
Subject: [PATCH 07/17] [PATCH 7/17][ARM] Add FP16 data movement instructions.

2016-05-17  Matthew Wahab  <matthew.wahab@arm.com>
	    Jiong Wang  <jiong.wang@arm.com>

	* config/arm/arm.c (coproc_secondary_reload_class): Make HFmode
	available when FP16 instructions are available.
	(output_mov_vfp): Add support for 16-bit data moves.
	(arm_validize_comparison): Fix some white-space.  Support HFmode
	by conversion to SFmode.
	* config/arm/arm.md (truncdfhf2): Fix a comment.
	(extendhfdf2): Likewise.
	(cstorehf4_fp16): New.
	(movsicc): Fix some white-space.
	(movhfcc): New.
	(movsfcc): Fix some white-space.
	(*cmovhf): New.
	* config/arm/vfp.md (*arm_movhi_vfp): Disable when VFP FP16
	instructions are available.
	(*thumb_movhi_vfp): Likewise.
	(*arm_movhi_fp16): New.
	(*thumb_movhi_fp16): New.
	(*movhf_vfp_fp16): New.
	(*movhf_vfp_neon): Disable when VFP FP16 instructions are
	available.
	(*movhf_vfp): Likewise.
	(extendhfsf2): Enable when VFP FP16 instructions are available.
	(truncsfhf2):  Enable when VFP FP16 instructions are available.

testsuite/
2016-05-17  Matthew Wahab  <matthew.wahab@arm.com>

	* gcc.target/arm/armv8_2_fp16-move-1.c: New.
---
 gcc/config/arm/arm.c                               |  16 +-
 gcc/config/arm/arm.md                              |  81 ++++++++-
 gcc/config/arm/vfp.md                              | 182 ++++++++++++++++++++-
 gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c | 166 +++++++++++++++++++
 4 files changed, 433 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 6892040..187ebda 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -13162,7 +13162,7 @@  coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
 {
   if (mode == HFmode)
     {
-      if (!TARGET_NEON_FP16)
+      if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
 	return GENERAL_REGS;
       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
 	return NO_REGS;
@@ -18613,6 +18613,8 @@  output_move_vfp (rtx *operands)
   rtx reg, mem, addr, ops[2];
   int load = REG_P (operands[0]);
   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
+  int sp = (!TARGET_VFP_FP16INST
+	    || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
   const char *templ;
   char buff[50];
@@ -18659,7 +18661,7 @@  output_move_vfp (rtx *operands)
 
   sprintf (buff, templ,
 	   load ? "ld" : "st",
-	   dp ? "64" : "32",
+	   dp ? "64" : sp ? "32" : "16",
 	   dp ? "P" : "",
 	   integer_p ? "\t%@ int" : "");
   output_asm_insn (buff, ops);
@@ -29238,7 +29240,7 @@  arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
 {
   enum rtx_code code = GET_CODE (*comparison);
   int code_int;
-  machine_mode mode = (GET_MODE (*op1) == VOIDmode) 
+  machine_mode mode = (GET_MODE (*op1) == VOIDmode)
     ? GET_MODE (*op2) : GET_MODE (*op1);
 
   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
@@ -29266,6 +29268,14 @@  arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
 	*op2 = force_reg (mode, *op2);
       return true;
 
+    case HFmode:
+      if (!TARGET_VFP_FP16INST)
+	break;
+      /* FP16 comparisons are done in SF mode.  */
+      mode = SFmode;
+      *op1 = convert_to_mode (mode, *op1, 1);
+      *op2 = convert_to_mode (mode, *op2, 1);
+      /* Fall through.  */
     case SFmode:
     case DFmode:
       if (!arm_float_compare_operand (*op1, mode))
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 3e23178..224a72f 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -4857,7 +4857,7 @@ 
   ""
 )
 
-/* DFmode -> HFmode conversions have to go through SFmode.  */
+;; DFmode to HFmode conversions have to go through SFmode.
 (define_expand "truncdfhf2"
   [(set (match_operand:HF  0 "general_operand" "")
 	(float_truncate:HF
@@ -5364,7 +5364,7 @@ 
   ""
 )
 
-/* HFmode -> DFmode conversions have to go through SFmode.  */
+;; HFmode -> DFmode conversions have to go through SFmode.
 (define_expand "extendhfdf2"
   [(set (match_operand:DF                  0 "general_operand" "")
 	(float_extend:DF (match_operand:HF 1 "general_operand"  "")))]
@@ -7369,6 +7369,24 @@ 
   DONE;
 }")
 
+(define_expand "cstorehf4"
+  [(set (match_operand:SI 0 "s_register_operand")
+	(match_operator:SI 1 "expandable_comparison_operator"
+	 [(match_operand:HF 2 "s_register_operand")
+	  (match_operand:HF 3 "arm_float_compare_operand")]))]
+  "TARGET_VFP_FP16INST"
+  {
+    if (!arm_validize_comparison (&operands[1],
+				  &operands[2],
+				  &operands[3]))
+       FAIL;
+
+    emit_insn (gen_cstore_cc (operands[0], operands[1],
+			      operands[2], operands[3]));
+    DONE;
+  }
+)
+
 (define_expand "cstoresf4"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(match_operator:SI 1 "expandable_comparison_operator"
@@ -7421,9 +7439,31 @@ 
     rtx ccreg;
 
     if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), 
-       				  &XEXP (operands[1], 1)))
+				  &XEXP (operands[1], 1)))
       FAIL;
-    
+
+    code = GET_CODE (operands[1]);
+    ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0),
+				 XEXP (operands[1], 1), NULL_RTX);
+    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+  }"
+)
+
+(define_expand "movhfcc"
+  [(set (match_operand:HF 0 "s_register_operand")
+	(if_then_else:HF (match_operand 1 "arm_cond_move_operator")
+			 (match_operand:HF 2 "s_register_operand")
+			 (match_operand:HF 3 "s_register_operand")))]
+  "TARGET_VFP_FP16INST"
+  "
+  {
+    enum rtx_code code = GET_CODE (operands[1]);
+    rtx ccreg;
+
+    if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0),
+				  &XEXP (operands[1], 1)))
+      FAIL;
+
     code = GET_CODE (operands[1]);
     ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0),
 				 XEXP (operands[1], 1), NULL_RTX);
@@ -7442,7 +7482,7 @@ 
     enum rtx_code code = GET_CODE (operands[1]);
     rtx ccreg;
 
-    if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), 
+    if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0),
        				  &XEXP (operands[1], 1)))
        FAIL;
 
@@ -7507,6 +7547,37 @@ 
    (set_attr "type" "fcsel")]
 )
 
+(define_insn "*cmovhf"
+    [(set (match_operand:HF 0 "s_register_operand" "=t")
+	(if_then_else:HF (match_operator 1 "arm_vsel_comparison_operator"
+			 [(match_operand 2 "cc_register" "") (const_int 0)])
+			  (match_operand:HF 3 "s_register_operand" "t")
+			  (match_operand:HF 4 "s_register_operand" "t")))]
+  "TARGET_VFP_FP16INST"
+  "*
+  {
+    enum arm_cond_code code = maybe_get_arm_condition_code (operands[1]);
+    switch (code)
+      {
+      case ARM_GE:
+      case ARM_GT:
+      case ARM_EQ:
+      case ARM_VS:
+	return \"vsel%d1.f16\\t%0, %3, %4\";
+      case ARM_LT:
+      case ARM_LE:
+      case ARM_NE:
+      case ARM_VC:
+	return \"vsel%D1.f16\\t%0, %4, %3\";
+      default:
+	gcc_unreachable ();
+      }
+    return \"\";
+  }"
+  [(set_attr "conds" "use")
+   (set_attr "type" "fcsel")]
+)
+
 (define_insn_and_split "*movsicc_insn"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r")
 	(if_then_else:SI
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index d7c874a..b1c13fa 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -27,6 +27,7 @@ 
    (match_operand:HI 1 "general_operand"
     "rIk, K, n, r, mi, r, *t, *t"))]
  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
+  && !TARGET_VFP_FP16INST
   && (register_operand (operands[0], HImode)
        || register_operand (operands[1], HImode))"
 {
@@ -76,6 +77,7 @@ 
    (match_operand:HI 1 "general_operand"
     "rk, I, Py, n, r, m, r, *t, *t"))]
  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
+  && !TARGET_VFP_FP16INST
   && (register_operand (operands[0], HImode)
        || register_operand (operands[1], HImode))"
 {
@@ -111,6 +113,99 @@ 
   (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")]
 )
 
+;; Patterns for HI moves which provide more data transfer instructions when FP16
+;; instructions are available.
+(define_insn "*arm_movhi_fp16"
+ [(set
+   (match_operand:HI 0 "nonimmediate_operand"
+    "=r,  r, r, m, r, *t,  r, *t")
+   (match_operand:HI 1 "general_operand"
+    "rIk, K, n, r, mi, r, *t, *t"))]
+ "TARGET_ARM && TARGET_VFP_FP16INST
+  && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov%?\t%0, %1\t%@ movhi";
+    case 1:
+      return "mvn%?\t%0, #%B1\t%@ movhi";
+    case 2:
+      return "movw%?\t%0, %L1\t%@ movhi";
+    case 3:
+      return "strh%?\t%1, %0\t%@ movhi";
+    case 4:
+      return "ldrh%?\t%0, %1\t%@ movhi";
+    case 5:
+    case 6:
+      return "vmov%?.f16\t%0, %1\t%@ int";
+    case 7:
+      return "vmov%?.f32\t%0, %1\t%@ int";
+    default:
+      gcc_unreachable ();
+    }
+}
+ [(set_attr "predicable" "yes")
+  (set_attr_alternative "type"
+   [(if_then_else
+     (match_operand 1 "const_int_operand" "")
+     (const_string "mov_imm")
+     (const_string "mov_reg"))
+    (const_string "mvn_imm")
+    (const_string "mov_imm")
+    (const_string "store1")
+    (const_string "load1")
+    (const_string "f_mcr")
+    (const_string "f_mrc")
+    (const_string "fmov")])
+  (set_attr "pool_range" "*, *, *, *, 256, *, *, *")
+  (set_attr "neg_pool_range" "*, *, *, *, 244, *, *, *")
+  (set_attr "length" "4")]
+)
+
+(define_insn "*thumb2_movhi_fp16"
+ [(set
+   (match_operand:HI 0 "nonimmediate_operand"
+    "=rk, r, l, r, m, r, *t, r, *t")
+   (match_operand:HI 1 "general_operand"
+    "rk, I, Py, n, r, m, r, *t, *t"))]
+ "TARGET_THUMB2 && TARGET_VFP_FP16INST
+  && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+      return "mov%?\t%0, %1\t%@ movhi";
+    case 3:
+      return "movw%?\t%0, %L1\t%@ movhi";
+    case 4:
+      return "strh%?\t%1, %0\t%@ movhi";
+    case 5:
+      return "ldrh%?\t%0, %1\t%@ movhi";
+    case 6:
+    case 7:
+      return "vmov%?.f16\t%0, %1\t%@ int";
+    case 8:
+      return "vmov%?.f32\t%0, %1\t%@ int";
+    default:
+      gcc_unreachable ();
+    }
+}
+ [(set_attr "predicable" "yes")
+  (set_attr "predicable_short_it"
+   "yes, no, yes, no, no, no, no, no, no")
+  (set_attr "type"
+   "mov_reg, mov_imm, mov_imm, mov_imm, store1, load1,\
+    f_mcr, f_mrc, fmov")
+  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *")
+  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *")
+  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")]
+)
+
 ;; SImode moves
 ;; ??? For now do not allow loading constants into vfp regs.  This causes
 ;; problems because small constants get converted into adds.
@@ -304,10 +399,87 @@ 
  )
 
 ;; HFmode moves
+
+(define_insn "*movhf_vfp_fp16"
+  [(set (match_operand:HF 0 "nonimmediate_operand"
+			  "= r,m,t,r,t,r,t,t,Um,r")
+	(match_operand:HF 1 "general_operand"
+			  "  m,r,t,r,r,t,Dv,Um,t,F"))]
+  "TARGET_32BIT
+   && TARGET_VFP_FP16INST
+   && (s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+ {
+  switch (which_alternative)
+    {
+    case 0: /* ARM register from memory.  */
+      return \"ldrh%?\\t%0, %1\\t%@ __fp16\";
+    case 1: /* Memory from ARM register.  */
+      return \"strh%?\\t%1, %0\\t%@ __fp16\";
+    case 2: /* S register from S register.  */
+      return \"vmov\\t%0, %1\t%@ __fp16\";
+    case 3: /* ARM register from ARM register.  */
+      return \"mov%?\\t%0, %1\\t%@ __fp16\";
+    case 4: /* S register from ARM register.  */
+    case 5: /* ARM register from S register.  */
+    case 6: /* S register from immediate.  */
+      return \"vmov.f16\\t%0, %1\t%@ __fp16\";
+    case 7: /* S register from memory.  */
+      return \"vld1.16\\t{%z0}, %A1\";
+    case 8: /* Memory from S register.  */
+      return \"vst1.16\\t{%z1}, %A0\";
+    case 9: /* ARM register from constant.  */
+      {
+	long bits;
+	rtx ops[4];
+
+	bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]),
+			       HFmode);
+	ops[0] = operands[0];
+	ops[1] = GEN_INT (bits);
+	ops[2] = GEN_INT (bits & 0xff00);
+	ops[3] = GEN_INT (bits & 0x00ff);
+
+	if (arm_arch_thumb2)
+	  output_asm_insn (\"movw\\t%0, %1\", ops);
+	else
+	  output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops);
+	return \"\";
+       }
+    default:
+      gcc_unreachable ();
+    }
+ }
+  [(set_attr "predicable" "yes, yes, no, yes, no, no, no, no, no, no")
+   (set_attr "predicable_short_it" "no, no, no, yes,\
+				    no, no, no, no,\
+				    no, no")
+   (set_attr_alternative "type"
+    [(const_string "load1") (const_string "store1")
+     (const_string "fmov") (const_string "mov_reg")
+     (const_string "f_mcr") (const_string "f_mrc")
+     (const_string "fconsts") (const_string "neon_load1_1reg")
+     (const_string "neon_store1_1reg")
+     (if_then_else (match_test "arm_arch_thumb2")
+      (const_string "mov_imm")
+      (const_string "multiple"))])
+   (set_attr_alternative "length"
+    [(const_int 4) (const_int 4)
+     (const_int 4) (const_int 4)
+     (const_int 4) (const_int 4)
+     (const_int 4) (const_int 4)
+     (const_int 4)
+     (if_then_else (match_test "arm_arch_thumb2")
+      (const_int 4)
+      (const_int 8))])]
+)
+
 (define_insn "*movhf_vfp_neon"
   [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r")
 	(match_operand:HF 1 "general_operand"	   " Um, t,m,r,t,r,r,t,F"))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16
+  "TARGET_32BIT
+   && TARGET_HARD_FLOAT && TARGET_NEON_FP16
+   && !TARGET_VFP_FP16INST
    && (   s_register_operand (operands[0], HFmode)
        || s_register_operand (operands[1], HFmode))"
   "*
@@ -361,8 +533,10 @@ 
 (define_insn "*movhf_vfp"
   [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,t,r,t,r,r")
 	(match_operand:HF 1 "general_operand"	   " m,r,t,r,r,t,F"))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP
+  "TARGET_32BIT
+   && TARGET_HARD_FLOAT && TARGET_VFP
    && !TARGET_NEON_FP16
+   && !TARGET_VFP_FP16INST
    && (   s_register_operand (operands[0], HFmode)
        || s_register_operand (operands[1], HFmode))"
   "*
@@ -1095,7 +1269,7 @@ 
 (define_insn "extendhfsf2"
   [(set (match_operand:SF		   0 "s_register_operand" "=t")
 	(float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FP16 || TARGET_VFP_FP16INST)"
   "vcvtb%?.f32.f16\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
@@ -1105,7 +1279,7 @@ 
 (define_insn "truncsfhf2"
   [(set (match_operand:HF		   0 "s_register_operand" "=t")
 	(float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FP16 || TARGET_VFP_FP16INST)"
   "vcvtb%?.f16.f32\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c
new file mode 100644
index 0000000..7108703
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c
@@ -0,0 +1,166 @@ 
+/* { dg-do compile }  */
+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok }  */
+/* { dg-options "-O2" }  */
+/* { dg-add-options arm_v8_2a_fp16_scalar }  */
+
+__fp16
+test_load_1 (__fp16* a)
+{
+  return *a;
+}
+
+__fp16
+test_load_2 (__fp16* a, int i)
+{
+  return a[i];
+}
+
+/* { dg-final { scan-assembler-times {vld1\.16\t\{d[0-9]+\[[0-9]+\]\}, \[r[0-9]\]+} 2 } }  */
+
+void
+test_store_1 (__fp16* a, __fp16 b)
+{
+  *a = b;
+}
+
+void
+test_store_2 (__fp16* a, int i, __fp16 b)
+{
+  a[i] = b;
+}
+
+/* { dg-final { scan-assembler-times {vst1\.16\t\{d[0-9]+\[[0-9]+\]\}, \[r[0-9]\]+} 2 } }  */
+
+
+__fp16
+test_load_store_1 (__fp16* a, int i, __fp16* b)
+{
+  a[i] = b[i];
+}
+
+__fp16
+test_load_store_2 (__fp16* a, int i, __fp16* b)
+{
+  a[i] = b[i + 2];
+  return a[i];
+}
+/* { dg-final { scan-assembler-times {ldrh\tr[0-9]+} 2 } }  */
+/* { dg-final { scan-assembler-times {strh\tr[0-9]+} 2 } }  */
+
+__fp16
+test_select_1 (int sel, __fp16 a, __fp16 b)
+{
+  if (sel)
+    return a;
+  else
+    return b;
+}
+
+__fp16
+test_select_2 (int sel, __fp16 a, __fp16 b)
+{
+  return sel ? a : b;
+}
+
+__fp16
+test_select_3 (__fp16 a, __fp16 b, __fp16 c)
+{
+  return (a == b) ? b : c;
+}
+
+__fp16
+test_select_4 (__fp16 a, __fp16 b, __fp16 c)
+{
+  return (a != b) ? b : c;
+}
+
+__fp16
+test_select_5 (__fp16 a, __fp16 b, __fp16 c)
+{
+  return (a < b) ? b : c;
+}
+
+__fp16
+test_select_6 (__fp16 a, __fp16 b, __fp16 c)
+{
+  return (a <= b) ? b : c;
+}
+
+__fp16
+test_select_7 (__fp16 a, __fp16 b, __fp16 c)
+{
+  return (a > b) ? b : c;
+}
+
+__fp16
+test_select_8 (__fp16 a, __fp16 b, __fp16 c)
+{
+  return (a >= b) ? b : c;
+}
+
+/* { dg-final { scan-assembler-times {vseleq\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {vselgt\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } }  */
+/* { dg-final { scan-assembler-times {vselge\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } }  */
+
+/* { dg-final { scan-assembler-times {vmov\.f16\ts[0-9]+, r[0-9]+} 4 } }  */
+/* { dg-final { scan-assembler-times {vmov\.f16\tr[0-9]+, s[0-9]+} 4 } }  */
+
+int
+test_compare_1 (__fp16 a, __fp16 b)
+{
+  if (a == b)
+    return -1;
+  else
+    return 0;
+}
+
+int
+test_compare_ (__fp16 a, __fp16 b)
+{
+  if (a != b)
+    return -1;
+  else
+    return 0;
+}
+
+int
+test_compare_2 (__fp16 a, __fp16 b)
+{
+  if (a > b)
+    return -1;
+  else
+    return 0;
+}
+
+int
+test_compare_3 (__fp16 a, __fp16 b)
+{
+  if (a >= b)
+    return -1;
+  else
+    return 0;
+}
+
+int
+test_compare_4 (__fp16 a, __fp16 b)
+{
+  if (a < b)
+    return -1;
+  else
+    return 0;
+}
+
+int
+test_compare_5 (__fp16 a, __fp16 b)
+{
+  if (a <= b)
+    return -1;
+  else
+    return 0;
+}
+
+/* { dg-final { scan-assembler-not {vcmp\.f16} } }  */
+/* { dg-final { scan-assembler-not {vcmpe\.f16} } }  */
+
+/* { dg-final { scan-assembler-times {vcmp\.f32} 4 } }  */
+/* { dg-final { scan-assembler-times {vcmpe\.f32} 8 } }  */
-- 
2.1.4