diff mbox series

[rs6000] Add builtins for accessing the FPSCR

Message ID 1534524362.5679.11.camel@us.ibm.com
State New
Headers show
Series [rs6000] Add builtins for accessing the FPSCR | expand

Commit Message

Carl Love Aug. 17, 2018, 4:46 p.m. UTC
GCC maintainers:

The following patch adds builtins to change the value of the FPSCR.
Specifically, __builtin_set_fpscr_rn, __builtin_set_fpscr_drn,
__builtin_mffsl, __builtin_mtfsb0,  __builtin_mtfsb1.  I added
documentation on the builtins in extend.texi.  In addition to listing
the builtin, I added a C style comment to describe the builtin a
little.  I don't see any of the other builtins documented like this. 
But I felt some explanation of the builtins were helpful.  Suggestions
on a better way to add the comments on the builtins would be
appreciated.

The patch has been tested on 

    powerpc64le-unknown-linux-gnu (Power 8 LE) 
    powerpc64le-unknown-linux-gnu (Power 9 LE)

With no regressions.

Please let me know if the patch looks OK for trunk.

                         Carl Love
---------------------------------------------------------

gcc/ChangeLog:

2018-08-16  Carl Love  <cel@us.ibm.com>

	* config/rs6000/rs6000-builtin.def: Add definitions for __builtin_mffsl,
	__builtin_mtfsb0, __builtin_mtfsb1, __builtin_set_fpscr_rn,
	__builtin_set_fpscr_drn.
	* config/rs6000.c: Add functions rs6000_expand_mtfsb0_mtfsb1_builtin,
	rs6000_expand_set_fpscr_rn_builtin, rs6000_expand_set_fpscr_drn_builtin.
	Add case statement entries for the new builtins.
	* config/rs6000.md: Add define_insn for rs6000_mtfsb0_si,
	rs6000_mtfsb1_si, rs6000_mffscrn, rs6000_mffscdrn.
	Add define_expand for rs6000_set_fpscr_rn and rs6000_set_fpscr_drn.
	* doc/extend.texi: Add documentation for the builtins.

gcc/testsuite/ChangeLog:

2018-08-16  Carl Love  <cel@us.ibm.com>

	* testsuite/gcc.target/powerpc/test_mffsl-p9.c: New file.
	* testsuite/gcc.target/powerpc/test_fpscr_builtins.c: New file.
	* testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c: New file.
---
 gcc/config/rs6000/rs6000-builtin.def          |  23 ++
 gcc/config/rs6000/rs6000.c                    | 151 ++++++++++
 gcc/config/rs6000/rs6000.md                   | 149 ++++++++-
 gcc/doc/extend.texi                           |  20 ++
 .../gcc.target/powerpc/test_fpscr_builtins.c  | 282 ++++++++++++++++++
 .../powerpc/test_fpscr_builtins_error.c       |  26 ++
 .../gcc.target/powerpc/test_mffsl-p9.c        |  36 +++
 7 files changed, 686 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c

Comments

Carl Love Aug. 17, 2018, 6:46 p.m. UTC | #1
GCC maintainers:

> In addition to listing
> the builtin, I added a C style comment to describe the builtin a
> little.  I don't see any of the other builtins documented like this. 
> But I felt some explanation of the builtins were
> helpful.  Suggestions
> on a better way to add the comments on the builtins would be
> appreciated.
> 

I spent some more time looking at the documentation file and decided my
first approach really isn't correct.  The needed comments should be
placed in a paragraph below where the list of builtins.  I updated the
patch with this change to the doc/extend.texi file.  

                 Carl Love
-------------------------------------------------------------------

gcc/ChangeLog:

2018-08-17  Carl Love  <cel@us.ibm.com>

	* config/rs6000/rs6000-builtin.def: Add definitions for __builtin_mffsl,
	__builtin_mtfsb0, __builtin_mtfsb1, __builtin_set_fpscr_rn,
	__builtin_set_fpscr_drn.
	* config/rs6000.c: Add functions rs6000_expand_mtfsb0_mtfsb1_builtin,
	rs6000_expand_set_fpscr_rn_builtin, rs6000_expand_set_fpscr_drn_builtin.
	Add case statement entries for the new builtins.
	* config/rs6000.md: Add define_insn for rs6000_mtfsb0_si,
	rs6000_mtfsb1_si, rs6000_mffscrn, rs6000_mffscdrn.
	Add define_expand for rs6000_set_fpscr_rn and rs6000_set_fpscr_drn.
	* doc/extend.texi: Add documentation for the builtins.

gcc/testsuite/ChangeLog:

2018-08-16  Carl Love  <cel@us.ibm.com>

	* testsuite/gcc.target/powerpc/test_mffsl-p9.c: New file.
	* testsuite/gcc.target/powerpc/test_fpscr_builtins.c: New file.
	* testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c: New file.
---
 gcc/config/rs6000/rs6000-builtin.def          |  23 ++
 gcc/config/rs6000/rs6000.c                    | 151 ++++++++++
 gcc/config/rs6000/rs6000.md                   | 149 ++++++++-
 gcc/doc/extend.texi                           |  36 ++-
 .../gcc.target/powerpc/test_fpscr_builtins.c  | 282 ++++++++++++++++++
 .../powerpc/test_fpscr_builtins_error.c       |  26 ++
 .../gcc.target/powerpc/test_mffsl-p9.c        |  36 +++
 7 files changed, 701 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c

diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index f79968154..a50236e77 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2486,11 +2486,34 @@ BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb",
 BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs",
 	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
 
+BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl",
+	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
+
 RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf",
 	          RS6000_BTM_ALWAYS,
 	          RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID,
 		  CODE_FOR_rs6000_mtfsf)
 
+RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB0_SI, "__builtin_mtfsb0",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY,
+		  CODE_FOR_rs6000_mtfsb0_si)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB1_SI, "__builtin_mtfsb1",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY,
+		  CODE_FOR_rs6000_mtfsb1_si)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_RN, "__builtin_set_fpscr_rn",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY,
+		  CODE_FOR_rs6000_set_fpscr_rn)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_DRN, "__builtin_set_fpscr_drn",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY,
+		  CODE_FOR_rs6000_set_fpscr_drn)
+
 BU_SPECIAL_X (RS6000_BUILTIN_CPU_INIT, "__builtin_cpu_init",
 	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index aa707b255..7db9c10a9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -13356,6 +13356,113 @@ rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
   return NULL_RTX;
 }
 
+static rtx
+rs6000_expand_mtfsb0_mtfsb1_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  machine_mode mode0 = insn_data[icode].operand[0].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  /* Only allow bit numbers 0 to 31.  */
+  if (GET_CODE (op0) != CONST_INT || INTVAL (op0) < 0 || INTVAL (op0) > 31)
+    {
+       error ("Argument must be a constant between 0 and 31.");
+       return const0_rtx;
+     }
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (op0);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+static rtx
+rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  machine_mode mode0 = insn_data[icode].operand[0].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  /* If the argument is a constant, check the range. Agrument can only be a
+     2-bit value.  Unfortunately, can't check the range of the value at
+     compile time if the argument is a variable.
+  */
+  if (GET_CODE (op0) == CONST_INT && (INTVAL (op0) < 0 || INTVAL (op0) > 3))
+    {
+       error ("Argument must be a value between 0 and 3.");
+       return const0_rtx;
+    }
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (op0);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+static rtx
+rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  machine_mode mode0 = insn_data[icode].operand[0].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  /* If the argument is a constant, check the range. Agrument can only be a
+     3-bit value.  Unfortunately, can't check the range of the value at
+     compile time if the argument is a variable.
+  */
+  if (GET_CODE (op0) == CONST_INT && (INTVAL (op0) < 0 || INTVAL (op0) > 7))
+   {
+       error ("Argument must be a value between 0 and 7.");
+       return const0_rtx;
+    }
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (op0);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+
 static rtx
 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
 {
@@ -15987,6 +16094,26 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
     case RS6000_BUILTIN_MFFS:
       return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
 
+    case RS6000_BUILTIN_MTFSB0_SI:
+      return rs6000_expand_mtfsb0_mtfsb1_builtin (CODE_FOR_rs6000_mtfsb0_si,
+						  exp);
+
+    case RS6000_BUILTIN_MTFSB1_SI:
+      return rs6000_expand_mtfsb0_mtfsb1_builtin (CODE_FOR_rs6000_mtfsb1_si,
+						  exp);
+
+    case RS6000_BUILTIN_SET_FPSCR_RN:
+      return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
+						 exp);
+
+    case RS6000_BUILTIN_SET_FPSCR_DRN:
+      return
+        rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+					     exp);
+
+    case RS6000_BUILTIN_MFFSL:
+      return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
     case RS6000_BUILTIN_MTFSF:
       return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
 
@@ -16370,6 +16497,30 @@ rs6000_init_builtins (void)
   ftype = build_function_type_list (double_type_node, NULL_TREE);
   def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
 
+  ftype = build_function_type_list (double_type_node, NULL_TREE);
+  def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL);
+
+  ftype = build_function_type_list (void_type_node,
+				    intSI_type_node,
+				    NULL_TREE);
+
+  def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0_SI);
+
+  ftype = build_function_type_list (void_type_node,
+				    intSI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1_SI);
+
+  ftype = build_function_type_list (void_type_node,
+				    intDI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN);
+
+  ftype = build_function_type_list (void_type_node,
+				    intDI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN);
+
   ftype = build_function_type_list (void_type_node,
 				    intSI_type_node, double_type_node,
 				    NULL_TREE);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index d70b01b8c..7714aacd8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -163,7 +163,13 @@
    UNSPECV_MFTB			; move from time base
    UNSPECV_NLGR			; non-local goto receiver
    UNSPECV_MFFS			; Move from FPSCR
-   UNSPECV_MTFSF		; Move to FPSCR Fields
+   UNSPECV_MFFSL		; Move from FPSCR light instruction version
+   UNSPECV_MFFSCRN		; Move from FPSCR float rounding mode
+   UNSPECV_MFFSCDRN		; Move from FPSCR decimal float rounding mode
+   UNSPECV_MTFSF		; Move to FPSCR Fields 8 to 15
+   UNSPECV_MTFSF_L0W1		; Move to FPSCR Fields 0 to 7
+   UNSPECV_MTFSFB0		; Set FPSCR Field bit to 0
+   UNSPECV_MTFSFB1		; Set FPSCR Field bit to 1
    UNSPECV_SPLIT_STACK_RETURN   ; A camouflaged return
    UNSPECV_SPEC_BARRIER         ; Speculation barrier
   ])
@@ -5823,6 +5829,115 @@
    xscvdpuxds %x0,%x1"
   [(set_attr "type" "fp")])
 
+(define_insn "rs6000_mtfsb0_si"
+ [(use (match_operand:SI 0 "short_cint_operand" "n"))
+  (unspec_volatile:SI [(const_int 0)] UNSPECV_MTFSFB0)]
+  "TARGET_HARD_FLOAT"
+ "mtfsb0 %0")
+
+(define_insn "rs6000_mtfsb1_si"
+  [(use (match_operand:SI 0 "short_cint_operand" "n"))
+   (unspec_volatile:SI [(const_int 0)] UNSPECV_MTFSFB1)]
+   "TARGET_HARD_FLOAT"
+   "mtfsb1 %0")
+
+(define_insn "rs6000_mffscrn"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+   (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCRN))
+   (use (match_operand:DF 1 "gpc_reg_operand" "d"))]
+   "TARGET_HARD_FLOAT"
+   "mffscrn %0,%1")
+
+(define_insn "rs6000_mffscdrn"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+   (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCDRN))
+   (use (match_operand:DF 1 "gpc_reg_operand" "d"))]
+   "TARGET_HARD_FLOAT"
+   "mffscdrn %0,%1")
+
+(define_expand "rs6000_set_fpscr_rn"
+  [(match_operand:DI 0  "gpc_reg_operand")]
+  "TARGET_HARD_FLOAT"
+{
+  rtx tmp_df = gen_reg_rtx (DFmode);
+
+  /* The floating point rounding control bits are FPSCR[62:63]. Put the
+     new rounding mode bits from operands[0][62:63] into FPSCR[62:63].  */
+  if (TARGET_P9_VECTOR)
+    {
+      rtx src_df = gen_reg_rtx (DImode);
+
+      src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0);
+      emit_insn (gen_rs6000_mffscrn (tmp_df, src_df));
+    }
+  else
+    {
+      rtx tmp_rn = gen_reg_rtx (DImode);
+      rtx tmp_di = gen_reg_rtx (DImode);
+
+      /* Extract new RN mode from operand.  */
+      emit_insn (gen_anddi3_mask (tmp_rn, operands[0], GEN_INT (0x3)));
+
+      /* Insert new RN mode into FSCPR.  */
+      emit_insn (gen_rs6000_mffs (tmp_df));
+      tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
+      emit_insn (gen_anddi3_mask (tmp_di, tmp_di, GEN_INT (0xFFFFFFFC)));
+      emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn));
+
+      /* Need to write to field k=15.  The fields are [0:15].  Hence with L=0,
+         W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W).  FLM is an 8 bit
+         field[0:7]. Need to set the bit that corresponds to the value of i
+         that you want [0:7].
+      */
+      tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0);
+      emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df));
+    }
+  DONE;
+})
+
+(define_expand "rs6000_set_fpscr_drn"
+  [(match_operand:DI 0  "gpc_reg_operand")]
+  "TARGET_HARD_FLOAT"
+{
+  rtx tmp_df = gen_reg_rtx (DFmode);
+
+  /* The decimal floating point rounding control bits are FPSCR[29:31]. Put the
+     new rounding mode bits from operands[0][61:63] into FPSCR[29:31].  */
+
+  if (TARGET_P9_VECTOR)
+    {
+       rtx src_df = gen_reg_rtx (DFmode);
+
+       emit_insn (gen_ashldi3 (operands[0], operands[0], GEN_INT (32)));
+       src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0);
+       emit_insn (gen_rs6000_mffscdrn (tmp_df, src_df));
+    }
+  else
+    {
+      rtx tmp_rn = gen_reg_rtx (DImode);
+      rtx tmp_di = gen_reg_rtx (DImode);
+
+      /* Extract new DRN mode from operand.  */
+      emit_insn (gen_anddi3_mask (tmp_rn, operands[0], GEN_INT (0x7)));
+      emit_insn (gen_ashldi3 (tmp_rn, tmp_rn, GEN_INT (32)));
+
+      /* Insert new RN mode into FSCPR.  */
+      emit_insn (gen_rs6000_mffs (tmp_df));
+      tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
+      emit_insn (gen_anddi3_mask (tmp_di, tmp_di, GEN_INT (0xFFF8FFFFFFFF)));
+      emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn));
+
+      /* Need to write to field k=7.  The fields are [0:15].  Hence with L=0,
+         W=1, FLM_i must be equal to 7, 16 = i + 8*(1-W).  FLM is an 8 bit
+         field[0:7]. Need to set the bit that corresponds to the value of i
+         that you want [0:7].
+      */
+      tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0);
+      emit_insn (gen_rs6000_mtfsf_L0W1 (GEN_INT (0x01), tmp_df));
+    }
+  DONE;
+})
+
 ;; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ))
 ;; rather than (set (subreg:SI (reg)) (fix:SI ...))
 ;; because the first makes it clear that operand 0 is not live
@@ -13602,6 +13717,31 @@
 })
 
 
+;; The ISA 3.0 mffsl instruction is a lower latency instruction
+;; for reading the FPSCR
+(define_insn "rs6000_mffsl0"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+        (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))]
+  "TARGET_HARD_FLOAT && TARGET_P9_MISC"
+  "mffsl %0")
+
+(define_expand "rs6000_mffsl"
+  [(set (match_operand:DF 0 "gpc_reg_operand")
+	(unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))]
+  "TARGET_HARD_FLOAT && TARGET_P9_MISC"
+{
+  /* If the low latency mffsl instruction (ISA 3.0) is available use it,
+     otherwise fall back to the older mffs instruction which does the same
+     thing but with a little more latency.  */
+
+  if (TARGET_P9_VECTOR)
+    emit_insn (gen_rs6000_mffsl0 (operands[0]));
+  else
+    emit_insn (gen_rs6000_mffs (operands[0]));
+
+  DONE;
+})
+
 (define_insn "rs6000_mffs"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
 	(unspec_volatile:DF [(const_int 0)] UNSPECV_MFFS))]
@@ -13615,6 +13755,13 @@
   "TARGET_HARD_FLOAT"
   "mtfsf %0,%1")
 
+(define_insn "rs6000_mtfsf_L0W1"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i")
+		     (match_operand:DF 1 "gpc_reg_operand" "d")]
+		    UNSPECV_MTFSF_L0W1)]
+  "TARGET_HARD_FLOAT"
+  "mtfsf %0,%1,0,1")
+
 
 ;; Power8 fusion support for fusing an addis instruction with a D-form load of
 ;; a GPR.  The addis instruction must be adjacent to the load, and use the same
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 130f6a671..e654f41be 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -15745,6 +15745,10 @@ uint64_t __builtin_ppc_get_timebase ();
 unsigned long __builtin_ppc_mftb ();
 __ibm128 __builtin_unpack_ibm128 (__ibm128, int);
 __ibm128 __builtin_pack_ibm128 (double, double);
+double __builtin_mffs(void);
+void __builtin_mtfsb0(const int);
+void __builtin_mtfsb1(const int);
+void __builtin_set_fpscr_rn(int);
 @end smallexample
 
 The @code{__builtin_ppc_get_timebase} and @code{__builtin_ppc_mftb}
@@ -15753,7 +15757,19 @@ functions generate instructions to read the Time Base Register.  The
 instructions and always returns the 64 bits of the Time Base Register.
 The @code{__builtin_ppc_mftb} function always generates one instruction and
 returns the Time Base Register value as an unsigned long, throwing away
-the most significant word on 32-bit environments.
+the most significant word on 32-bit environments.  The @code{__builtin_mffs}
+return the value of the FPSCR register.  Note, ISA 3.0 supports the
+@code{__builtin_mffsl()} which is a lower latency version of this builtin.  The
+@code{__builtin_mtfsb0} and @code{__builtin_mtfsb1} take the bit to change
+as an argument.  The valid bit range is between 0 and 31.  The builtins map to
+the @code{mtfsb0} and @code{mtfsb1} instructions which take the argument and
+add 32.  Hence these instructions only modify the FPSCR[32:63] bits by
+changing the specified bit to a zero or one respectively.  The
+@code{__builtin_set_fpscr_rn} builtin allows changing both of the floating
+point rounding mode bits.  The argument is a 2-bit value.  The argument can
+either be a const int or stored in a variable. The builtin uses the ISA 3.0
+instruction @code{mffscrn} if available, otherwise it reads the FPSCR, masks
+the current rounding mode bits out and OR's in the new value.
 
 @node Basic PowerPC Built-in Functions Available on ISA 2.05
 @subsubsection Basic PowerPC Built-in Functions Available on ISA 2.05
@@ -15789,6 +15805,7 @@ The following built-in functions are available
 when hardware decimal floating point
 (@option{-mhard-dfp}) is available:
 @smallexample
+void __builtin_set_fpscr_drn(int);
 _Decimal64 __builtin_ddedpd (int, _Decimal64);
 _Decimal128 __builtin_ddedpdq (int, _Decimal128);
 _Decimal64 __builtin_denbcd (int, _Decimal64);
@@ -15803,6 +15820,14 @@ long long __builtin_dxex (_Decimal64);
 long long __builtin_dxexq (_Decimal128);
 _Decimal128 __builtin_pack_dec128 (unsigned long long, unsigned long long);
 unsigned long long __builtin_unpack_dec128 (_Decimal128, int);
+
+The @code{__builtin_set_fpscr_drn} builtin allows changing the three decimal
+floating point rounding mode bits.  The argument is a 3-bit value.  The
+argument can either be a const int or the value can be stored in a variable.
+The builtin uses the ISA 3.0 instruction @code{mffscdrn} if available.
+Otherwise the builtin reads the FPSCR, masks the current decimal rounding
+mode bits out and OR's in the new value.
+
 @end smallexample
 
 The following functions require @option{-mhard-float},
@@ -16004,6 +16029,9 @@ int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal64 value);
 int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal128 value);
 int __builtin_dfp_dtstsfi_ov_dd (unsigned int comparison, _Decimal64 value);
 int __builtin_dfp_dtstsfi_ov_td (unsigned int comparison, _Decimal128 value);
+
+double __builtin_mffsl(void);
+
 @end smallexample
 The @code{__builtin_byte_in_set} function requires a
 64-bit environment supporting ISA 3.0 or later.  This function returns
@@ -16055,6 +16083,12 @@ The @code{__builtin_dfp_dtstsfi_ov_dd} and
 require that the type of the @code{value} argument be
 @code{__Decimal64} and @code{__Decimal128} respectively.
 
+The @code{__builtin_mffsl} uses the ISA 3.0 @code{mffsl} instruction to read
+the FPSCR.  The instruction is a lower latency version of the @code{mffs}
+instruction.  If the @code{mffsl} instruction is not available, then the
+builtin uses the older @code{mffs} instruction to read the FPSCR.
+
+
 @node PowerPC AltiVec/VSX Built-in Functions
 @subsection PowerPC AltiVec/VSX Built-in Functions
 
diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c
new file mode 100644
index 000000000..4f77078c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c
@@ -0,0 +1,282 @@
+/* { dg-do run { target { powerpc64*-*-* && lp64 } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-pedantic" } */
+
+#include <altivec.h>
+
+#define DEBUG 1
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#define RN_MASK  0x3LL             /* RN field mask */
+#define DRN_MASK 0x700000000LL     /* DRN field mask */
+
+void abort (void);
+
+int main ()
+{
+  int i;
+  int val, bit;
+  double fpscr_val;
+  union blah {
+    double d;
+    unsigned long long ll;
+  } conv_val;
+  
+  unsigned long long ll_value;
+  register double  f14;
+
+  /* __builtin_set_fpscr_rn() builtin can take a const or a variable
+     value between 0 and 3 as the argument.
+     __builtin_set_fpscr_drn() builtin can take a const or a variable
+     value between 0 and 7 as the argument.
+     __builtin_mtfsb0 and __builtin_mtfsb1 argument must be a constant 
+     30 or 31.
+  */
+
+  /* Test reading the FPSCR register */
+  asm volatile ("mffs %0" : "=f"(f14));
+  conv_val.d = f14;
+
+  if (conv_val.d != __builtin_mffs())
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mffs() returned 0x%llx, not the expecected value 0x%llx\n",
+	      __builtin_mffs(), conv_val.d);
+#else
+       abort();
+#endif
+    }		  
+
+  /* Test float rounding mode builtin with const value argument.  */
+  __builtin_set_fpscr_rn(3);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != 3)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(3) returned 0x%llx, not the expecected value 0x%x\n",
+	      ll_value, 3);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 2;
+  __builtin_set_fpscr_rn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != val)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(val=%d) returned 0x%llx, not the expecected value 0x%x\n",
+	      val, ll_value, val);
+#else
+       abort();
+#endif
+    }		  
+
+  /* Reset to 0 for testing */
+  val = 0;
+  __builtin_set_fpscr_rn(val);
+
+  __builtin_mtfsb1(31);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x1LL;
+
+  if (ll_value != 1)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb0(31);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x1LL;
+
+  if (ll_value != 0)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb0(31) did not set the bit to a 0.\n");
+#else
+       abort();
+#endif
+    }		  
+
+ __builtin_mtfsb1(30);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x2LL;
+
+  if (ll_value != 2)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb0(30);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x2LL;
+
+  if (ll_value != 0)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 0.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb1(0);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & (0x1LL << (31-0));
+
+  if (ll_value != (0x1LL << (31-0)))
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(0) did not set the bit to a 1.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb0(0);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & (0x1LL << (31-0));
+
+  if (ll_value != 0)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb0(0) did not set the bit to a 0.\n");
+#else
+       abort();
+#endif
+    }		  
+
+
+  /* Test builtin float rounding mode with variable as argument.  */
+  val = 0;
+  __builtin_set_fpscr_rn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != val)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 3;
+  __builtin_set_fpscr_rn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != val)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  /* Test builtin decimal float rounding mode with const argument.  */
+  __builtin_set_fpscr_drn(7);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != 0x700000000)
+    {
+#ifdef DEBUG
+	    printf("ERROR, __builtin_set_fpscr_drn(7) did not set rounding mode to 7.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_set_fpscr_drn(2);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != 0x200000000)
+    {
+#ifdef DEBUG
+	    printf("ERROR, __builtin_set_fpscr_drn(2) did not set rounding mode to 2.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_set_fpscr_drn(5);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != 0x500000000)
+    {
+#ifdef DEBUG
+	    printf("ERROR, __builtin_set_fpscr_drn(5) did not set rounding mode to 5.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  /* Test builtin decimal float rounding mode with variable as argument.  */
+  val = 7;
+  __builtin_set_fpscr_drn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != ((unsigned long long)val << 32))
+    {
+#ifdef DEBUG
+	    printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n",
+		   val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 0;
+  __builtin_set_fpscr_drn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != ((unsigned long long)val << 32))
+    {
+#ifdef DEBUG
+	    printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n",
+		   val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 2;
+  __builtin_set_fpscr_drn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != ((unsigned long long)val << 32))
+    {
+#ifdef DEBUG
+	    printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n",
+		   val, val);
+#else
+       abort();
+#endif
+    }		  
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c
new file mode 100644
index 000000000..10de0be44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+
+#include <altivec.h>
+
+int main ()
+{
+
+  /* Test builin with out of range arguments. Can only test for constant
+     int arguments.  The builtins __builtin_set_fpscr_rn(),
+     __builtin_set_fpscr_drn() also support a variable as an argument but
+     can't test variable value at compile time.  */
+
+  __builtin_mtfsb0(-1); /* { dg-error "Argument must be a constant between 0 and 31." } */ 
+  __builtin_mtfsb0(32); /* { dg-error "Argument must be a constant between 0 and 31." } */ 
+
+  __builtin_mtfsb1(-1);  /* { dg-error "Argument must be a constant between 0 and 31." } */
+  __builtin_mtfsb1(32);  /* { dg-error "Argument must be a constant between 0 and 31." } */ 
+
+  __builtin_set_fpscr_rn(-1);  /* { dg-error "Argument must be a value between 0 and 3." } */ 
+  __builtin_set_fpscr_rn(4);   /* { dg-error "Argument must be a value between 0 and 3." } */ 
+
+  __builtin_set_fpscr_drn(-1);  /* { dg-error "Argument must be a value between 0 and 7." } */ 
+  __builtin_set_fpscr_drn(8);   /* { dg-error "Argument must be a value between 0 and 7." } */ 
+
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c b/gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c
new file mode 100644
index 000000000..dc4f863ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-pedantic -mcpu=power9" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+void abort (void);
+
+int main ()
+{
+
+  register double  f14;
+  union blah {
+    double d;
+    unsigned long long ll;
+  } conv_val;
+
+  /* Test reading the FPSCR register.  */
+  asm volatile ("mffs %0" : "=f"(f14));
+  conv_val.d = f14;
+
+  if (conv_val.d != __builtin_mffsl())
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mffsl() returned 0x%llx, not the expecected value 0x%llx\n",
+	      __builtin_mffsl(), conv_val.d);
+#else
+       abort();
+#endif
+    }		  
+}
Segher Boessenkool Aug. 17, 2018, 9:34 p.m. UTC | #2
Hi Carl,

On Fri, Aug 17, 2018 at 11:46:06AM -0700, Carl Love wrote:
> > In addition to listing
> > the builtin, I added a C style comment to describe the builtin a
> > little.  I don't see any of the other builtins documented like this. 
> > But I felt some explanation of the builtins were
> > helpful.  Suggestions
> > on a better way to add the comments on the builtins would be
> > appreciated.

I think this is fine.

> 	* config/rs6000/rs6000-builtin.def: Add definitions for __builtin_mffsl,
> 	__builtin_mtfsb0, __builtin_mtfsb1, __builtin_set_fpscr_rn,
> 	__builtin_set_fpscr_drn.

	* config/rs6000/rs6000-builtin.def (__builtin_mffsl): New.
	(__builtin_mtfsb0): New.
	(__builtin_mtfsb1): New.
	(__builtin_set_fpscr_rn): New.
	(__builtin_set_fpscr_drn): New.

or

	* config/rs6000/rs6000-builtin.def (__builtin_mffsl, __builtin_mtfsb0,
	__builtin_mtfsb1, __builtin_set_fpscr_rn, __builtin_set_fpscr_drn): New.

> 	* config/rs6000.c: Add functions rs6000_expand_mtfsb0_mtfsb1_builtin,
> 	rs6000_expand_set_fpscr_rn_builtin, rs6000_expand_set_fpscr_drn_builtin.

Same here (and further on).

> 	Add case statement entries for the new builtins.

To what function(s)?

> 	* testsuite/gcc.target/powerpc/test_mffsl-p9.c: New file.
> 	* testsuite/gcc.target/powerpc/test_fpscr_builtins.c: New file.
> 	* testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c: New file.

testsuite/ has its own changelog.  Entries in there do not include
"testsuite/".

> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -2486,11 +2486,34 @@ BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb",
>  BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs",
>  	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
>  
> +BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl",
> +	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)

Should this be RS6000_BTM_MISC_P9 (or similar) instead?  Same for the other
ISA 3.0 ops.

> +static rtx
> +rs6000_expand_mtfsb0_mtfsb1_builtin (enum insn_code icode, tree exp)

I'd call this rs6000_expand_mtfsb_builtin, but please use which you think
is clearest.

> +  /* Only allow bit numbers 0 to 31.  */
> +  if (GET_CODE (op0) != CONST_INT || INTVAL (op0) < 0 || INTVAL (op0) > 31)

if (!u5bit_cint_operand (op0, VOIDmode))

should do the trick I think.

> +    {
> +       error ("Argument must be a constant between 0 and 31.");
> +       return const0_rtx;
> +     }
> +
> +  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
> +    op0 = copy_to_mode_reg (mode0, op0);

Is this correct?  It must be a constant integer already, and if it fails
copying it into a register is surely not the right thing to do.

> +  /* If the argument is a constant, check the range. Agrument can only be a
> +     2-bit value.  Unfortunately, can't check the range of the value at
> +     compile time if the argument is a variable.
> +  */
> +  if (GET_CODE (op0) == CONST_INT && (INTVAL (op0) < 0 || INTVAL (op0) > 3))

const_0_to_3_operand

> +    /* Builtin not supported on this processor.  */
> +    return 0;
> +
> +  /* If we got invalid arguments bail out before generating bad rtl.  */
> +  if (arg0 == error_mark_node)
> +    return const0_rtx;
> +
> +  /* If the argument is a constant, check the range. Agrument can only be a
> +     3-bit value.  Unfortunately, can't check the range of the value at
> +     compile time if the argument is a variable.
> +  */
> +  if (GET_CODE (op0) == CONST_INT && (INTVAL (op0) < 0 || INTVAL (op0) > 7))

(Typo, "argument").  const_0_to_7_operand or u3bit_cint_operand (both exist,
and they are identical.  Hrm.)

>  
> @@ -16370,6 +16497,30 @@ rs6000_init_builtins (void)
>    ftype = build_function_type_list (double_type_node, NULL_TREE);
>    def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
>  
> +  ftype = build_function_type_list (double_type_node, NULL_TREE);
> +  def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL);
> +
> +  ftype = build_function_type_list (void_type_node,
> +				    intSI_type_node,
> +				    NULL_TREE);
> +
> +  def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0_SI);

No blank line between ftype and def_builtin please?

> +(define_insn "rs6000_mtfsb0_si"

Why the _si?  Won't just rs6000_mtfsb0 do?

> + [(use (match_operand:SI 0 "short_cint_operand" "n"))
> +  (unspec_volatile:SI [(const_int 0)] UNSPECV_MTFSFB0)]

UNSPECV_MTFSB0 please.

operands[0] should be an argument of the unspec... so something like

(define_insn "rs6000_mtfsb0"
  [(unspec_volatile [(match_operand:SI 0 "u5bit_cint_operand" "n")]
		    UNSPECV_MTFSB0)]
  "TARGET_HARD_FLOAT"
  "mtfsb0 %0")

(and you should set the "type" attribute to something useful, ideally).

> +(define_insn "rs6000_mffscrn"
> +  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
> +   (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCRN))
> +   (use (match_operand:DF 1 "gpc_reg_operand" "d"))]
> +   "TARGET_HARD_FLOAT"
> +   "mffscrn %0,%1")

(define_insn "rs6000_mffscrn"
  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
	(unspec_volatile:DF [(match_operand:DF 1 "gpc_reg_operand" "d")]
			    UNSPECV_MFFSCRN))]
   "TARGET_HARD_FLOAT"
   "mffscrn %0,%1")

(you also need a check for ISA 3.0).

> +(define_expand "rs6000_set_fpscr_rn"
> +  [(match_operand:DI 0  "gpc_reg_operand")]
> +  "TARGET_HARD_FLOAT"
> +{
> +  rtx tmp_df = gen_reg_rtx (DFmode);
> +
> +  /* The floating point rounding control bits are FPSCR[62:63]. Put the
> +     new rounding mode bits from operands[0][62:63] into FPSCR[62:63].  */
> +  if (TARGET_P9_VECTOR)

It does not depend on vector stuff (say, you use -mcpu=power9 -mno-altivec).

> +    {
> +      rtx tmp_rn = gen_reg_rtx (DImode);
> +      rtx tmp_di = gen_reg_rtx (DImode);
> +
> +      /* Extract new RN mode from operand.  */
> +      emit_insn (gen_anddi3_mask (tmp_rn, operands[0], GEN_INT (0x3)));

This doesn't work for -m32 afaics.  Either disallow it, or make it work?

> +      /* Insert new RN mode into FSCPR.  */
> +      emit_insn (gen_rs6000_mffs (tmp_df));
> +      tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
> +      emit_insn (gen_anddi3_mask (tmp_di, tmp_di, GEN_INT (0xFFFFFFFC)));
> +      emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn));
> +
> +      /* Need to write to field k=15.  The fields are [0:15].  Hence with L=0,
> +         W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W).  FLM is an 8 bit
> +         field[0:7]. Need to set the bit that corresponds to the value of i
> +         that you want [0:7].
> +      */

(The */ should not go on a new line).
The derivation isn't super clear to me, but 1 is the correct mask, yes.

> +      tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0);
> +      emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df));

> +(define_expand "rs6000_set_fpscr_drn"

> +      /* Insert new RN mode into FSCPR.  */
> +      emit_insn (gen_rs6000_mffs (tmp_df));
> +      tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
> +      emit_insn (gen_anddi3_mask (tmp_di, tmp_di, GEN_INT (0xFFF8FFFFFFFF)));
> +      emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn));

Why is this masking off the top 16 bits of the original contents?  That
seems wrong.

> +;; The ISA 3.0 mffsl instruction is a lower latency instruction
> +;; for reading the FPSCR

For reading _a part_ of the FPSCR, the other bits are set to 0 in the result.

This matters, because otherwise we should just use __builtin_mffs always;
but it does not do the same thing, so we cannot.

> +(define_insn "rs6000_mffsl0"
> +  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
> +        (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))]
> +  "TARGET_HARD_FLOAT && TARGET_P9_MISC"
> +  "mffsl %0")

(Please use a better name than that "0"...  We have used "_hw" before).

> +(define_expand "rs6000_mffsl"
> +  [(set (match_operand:DF 0 "gpc_reg_operand")
> +	(unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))]
> +  "TARGET_HARD_FLOAT && TARGET_P9_MISC"

You don't want the latter...

> +{
> +  /* If the low latency mffsl instruction (ISA 3.0) is available use it,
> +     otherwise fall back to the older mffs instruction which does the same
> +     thing but with a little more latency.  */
> +
> +  if (TARGET_P9_VECTOR)

... but you want it here (instead of the _VECTOR).

> +    emit_insn (gen_rs6000_mffsl0 (operands[0]));
> +  else
> +    emit_insn (gen_rs6000_mffs (operands[0]));

> +(define_insn "rs6000_mtfsf_L0W1"
> +  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i")
> +		     (match_operand:DF 1 "gpc_reg_operand" "d")]
> +		    UNSPECV_MTFSF_L0W1)]
> +  "TARGET_HARD_FLOAT"
> +  "mtfsf %0,%1,0,1")

Maybe name it rs6000_mtsfs_high?  L0W1 reads like "low" :-)

> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -15745,6 +15745,10 @@ uint64_t __builtin_ppc_get_timebase ();
>  unsigned long __builtin_ppc_mftb ();
>  __ibm128 __builtin_unpack_ibm128 (__ibm128, int);
>  __ibm128 __builtin_pack_ibm128 (double, double);
> +double __builtin_mffs(void);
> +void __builtin_mtfsb0(const int);
> +void __builtin_mtfsb1(const int);
> +void __builtin_set_fpscr_rn(int);
>  @end smallexample

(space before opening paren)

> +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c
> @@ -0,0 +1,282 @@
> +/* { dg-do run { target { powerpc64*-*-* && lp64 } } } */
> +/* { dg-require-effective-target lp64 } */

You have "lp64" in the selector already, repeating it here doesn't do
anything.

> +/* { dg-options "-pedantic" } */

Why is this?


Segher
Carl Love Sept. 17, 2018, 10:03 p.m. UTC | #3
Segher:

This is an updated patch to add the following builtins:
__builtin_mffsl, __builtin_set_fpscr_rn, __builtin_set_fpscr_rn,
__builtin_mtfsb0, __builtin_mtfsb1.

I have addressed you comments with regards to the change log entries. 
I have also addressed the various comments about the code, function
names etc.  

I have also addressed your comment about the builtins not working in
32-bit mode.  The builtins __builtin_mffsl, __builtin_set_fpscr_rn,
__builtin_mtfsb0, __builtin_mtfsb1 are supported in 32-bit mode. 
Builtin __builtin_set_fpscr_drn is only supported in 64-bit mode.

Note, rs6000_mffsl, builtins __builtin_set_fpscr_rn,
__builtin_set_fpscr_drn use the ISA 3.0 instructions if compiling for
ISA 3.0 or beyond.  Otherwise, they use logical operations to emulate
the ISA 3.0 instructions.

The the tests for the __builtin_set_fpscr_drn builtin were separated
into separate files since they are only supported in 64-bit mode.


The patch has been tested on 

    powerpc64le-unknown-linux-gnu (Power 8 LE  64-bit mode only) 
    powerpc64-unknown-linux-gnu (Power 8 BE  32-bit and 64-bit modes) 
    powerpc64le-unknown-linux-gnu (Power 9 LE  64-bit mode only)

With no regressions.

Please let me know if the patch looks OK for trunk.

                         Carl Love

--------------------------------------------------------------------

gcc/ChangeLog:

2018-09-17  Carl Love  <cel@us.ibm.com>

	* config/rs6000/rs6000-builtin.def (__builtin_mffsl): New.
	(__builtin_mtfsb0): New.
	(__builtin_mtfsb1): New.
	( __builtin_set_fpscr_rn): New.
	(__builtin_set_fpscr_drn): New.
	* config/rs6000.c (rs6000_expand_mtfsb0_mtfsb1_builtin): Add.
	(rs6000_expand_set_fpscr_rn_builtin): Add.
	(rs6000_expand_set_fpscr_drn_builtin): Add.
	(rs6000_expand_builtin): Add case statement entries for
	RS6000_BUILTIN_MTFSB0, RS6000_BUILTIN_MTFSB1,
	RS6000_BUILTIN_SET_FPSCR_RN, RS6000_BUILTIN_SET_FPSCR_DRN,
	RS6000_BUILTIN_MFFSL.
	(rs6000_init_builtins): Add ftype initialization and def_builtin
	calls for __builtin_mffsl, __builtin_mtfsb0, __builtin_mtfsb1,
	__builtin_set_fpscr_rn, __builtin_set_fpscr_drn.
	* config/rs6000.md (rs6000_mtfsb0, rs6000_mtfsb1, rs6000_mffscrn,
	rs6000_mffscdrn): Add define_insn.
	(rs6000_set_fpscr_rn, rs6000_set_fpscr_drn): Add define_expand.
	* doc/extend.texi: Add documentation for the builtins.

gcc/testsuite/ChangeLog:

2018-09-17  Carl Love  <cel@us.ibm.com>

	* gcc.target/powerpc/test_mffsl-p9.c: New file.
	* gcc.target/powerpc/test_fpscr_rn_builtin.c: New file.
	* gcc.target/powerpc/test_fpscr_drn_builtin.c: New file.
	* gcc.target/powerpc/test_fpscr_rn_builtin_error.c: New file.
	* gcc.target/powerpc/test_fpscr_drn_builtin_error.c: New file.
---
 gcc/config/rs6000/rs6000-builtin.def               |  23 +++
 gcc/config/rs6000/rs6000.c                         | 148 ++++++++++++++++
 gcc/config/rs6000/rs6000.md                        | 160 ++++++++++++++++-
 gcc/doc/extend.texi                                |  36 +++-
 .../gcc.target/powerpc/test_fpscr_drn_builtin.c    | 116 +++++++++++++
 .../powerpc/test_fpscr_drn_builtin_error.c         |  17 ++
 .../gcc.target/powerpc/test_fpscr_rn_builtin.c     | 190 +++++++++++++++++++++
 .../powerpc/test_fpscr_rn_builtin_error.c          |  22 +++
 gcc/testsuite/gcc.target/powerpc/test_mffsl.c      |  34 ++++
 9 files changed, 744 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_mffsl.c

diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index f799681..9e960eb 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2486,11 +2486,34 @@ BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb",
 BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs",
 	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
 
+BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl",
+	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
+
 RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf",
 	          RS6000_BTM_ALWAYS,
 	          RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID,
 		  CODE_FOR_rs6000_mtfsf)
 
+RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB0, "__builtin_mtfsb0",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY,
+		  CODE_FOR_rs6000_mtfsb0)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB1, "__builtin_mtfsb1",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY,
+		  CODE_FOR_rs6000_mtfsb1)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_RN, "__builtin_set_fpscr_rn",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY,
+		  CODE_FOR_rs6000_set_fpscr_rn)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_DRN, "__builtin_set_fpscr_drn",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTM_64BIT | RS6000_BTC_UNARY,
+		  CODE_FOR_rs6000_set_fpscr_drn)
+
 BU_SPECIAL_X (RS6000_BUILTIN_CPU_INIT, "__builtin_cpu_init",
 	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 2b736d7..2c39f80 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -13600,6 +13600,113 @@ rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
 }
 
 static rtx
+rs6000_expand_mtfsb_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  /* Only allow bit numbers 0 to 31.  */
+  if (!u5bit_cint_operand (op0, VOIDmode))
+    {
+       error ("Argument must be a constant between 0 and 31.");
+       return const0_rtx;
+     }
+
+  pat = GEN_FCN (icode) (op0);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+static rtx
+rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  machine_mode mode0 = insn_data[icode].operand[0].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  /* If the argument is a constant, check the range. Argument can only be a
+     2-bit value.  Unfortunately, can't check the range of the value at
+     compile time if the argument is a variable.  */
+  if (GET_CODE (op0) == CONST_INT && !const_0_to_3_operand(op0, VOIDmode))
+    {
+       error ("Argument must be a value between 0 and 3.");
+       return const0_rtx;
+    }
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (op0);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+static rtx
+rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  machine_mode mode0 = insn_data[icode].operand[0].mode;
+
+  if (TARGET_32BIT)
+    /* Builtin not supported in 32-bit mode.  */
+    fatal_error (input_location,
+		 "__builtin_set_fpscr_drn is not supported in 32-bit mode.");
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  /* If the argument is a constant, check the range. Agrument can only be a
+     3-bit value.  Unfortunately, can't check the range of the value at
+     compile time if the argument is a variable.
+  */
+  if (GET_CODE (op0) == CONST_INT && !const_0_to_7_operand(op0, VOIDmode ))
+   {
+      error ("Argument must be a value between 0 and 7.");
+      return const0_rtx;
+    }
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (op0);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+static rtx
 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
 {
   rtx pat;
@@ -16069,6 +16176,24 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
     case RS6000_BUILTIN_MFFS:
       return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
 
+    case RS6000_BUILTIN_MTFSB0:
+      return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+
+    case RS6000_BUILTIN_MTFSB1:
+      return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+
+    case RS6000_BUILTIN_SET_FPSCR_RN:
+      return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
+						 exp);
+
+    case RS6000_BUILTIN_SET_FPSCR_DRN:
+      return
+        rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+					     exp);
+
+    case RS6000_BUILTIN_MFFSL:
+      return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
     case RS6000_BUILTIN_MTFSF:
       return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
 
@@ -16452,6 +16577,29 @@ rs6000_init_builtins (void)
   ftype = build_function_type_list (double_type_node, NULL_TREE);
   def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
 
+  ftype = build_function_type_list (double_type_node, NULL_TREE);
+  def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL);
+
+  ftype = build_function_type_list (void_type_node,
+				    intSI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0);
+
+  ftype = build_function_type_list (void_type_node,
+				    intSI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1);
+
+  ftype = build_function_type_list (void_type_node,
+				    intDI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN);
+
+  ftype = build_function_type_list (void_type_node,
+				    intDI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN);
+
   ftype = build_function_type_list (void_type_node,
 				    intSI_type_node, double_type_node,
 				    NULL_TREE);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 68ba5fd..e2c0142 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -164,7 +164,13 @@
    UNSPECV_MFTB			; move from time base
    UNSPECV_NLGR			; non-local goto receiver
    UNSPECV_MFFS			; Move from FPSCR
-   UNSPECV_MTFSF		; Move to FPSCR Fields
+   UNSPECV_MFFSL		; Move from FPSCR light instruction version
+   UNSPECV_MFFSCRN		; Move from FPSCR float rounding mode
+   UNSPECV_MFFSCDRN		; Move from FPSCR decimal float rounding mode
+   UNSPECV_MTFSF		; Move to FPSCR Fields 8 to 15
+   UNSPECV_MTFSF_HI		; Move to FPSCR Fields 0 to 7
+   UNSPECV_MTFSB0		; Set FPSCR Field bit to 0
+   UNSPECV_MTFSB1		; Set FPSCR Field bit to 1
    UNSPECV_SPLIT_STACK_RETURN   ; A camouflaged return
    UNSPECV_SPEC_BARRIER         ; Speculation barrier
   ])
@@ -5824,6 +5830,115 @@
    xscvdpuxds %x0,%x1"
   [(set_attr "type" "fp")])
 
+(define_insn "rs6000_mtfsb0"
+  [(unspec_volatile [(match_operand:SI 0 "u5bit_cint_operand" "n")]
+		    UNSPECV_MTFSB0)]
+  "TARGET_HARD_FLOAT"
+  "mtfsb0 %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "rs6000_mtfsb1"
+  [(unspec_volatile [(match_operand:SI 0 "u5bit_cint_operand" "n")]
+		    UNSPECV_MTFSB1)]
+  "TARGET_HARD_FLOAT"
+  "mtfsb1 %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "rs6000_mffscrn"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(unspec_volatile:DF [(match_operand:DF 1 "gpc_reg_operand" "d")]
+			    UNSPECV_MFFSCRN))]
+   "TARGET_P9_MISC"
+   "mffscrn %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "rs6000_mffscdrn"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+   (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCDRN))
+   (use (match_operand:DF 1 "gpc_reg_operand" "d"))]
+  "TARGET_P9_MISC"
+  "mffscdrn %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "rs6000_set_fpscr_rn"
+  [(match_operand:DI 0  "gpc_reg_operand")]
+  "TARGET_HARD_FLOAT"
+{
+  rtx tmp_df = gen_reg_rtx (DFmode);
+
+  /* The floating point rounding control bits are FPSCR[62:63]. Put the
+     new rounding mode bits from operands[0][62:63] into FPSCR[62:63].  */
+  if (TARGET_P9_MISC)
+    {
+       rtx src_df = gen_reg_rtx (DImode);
+
+       src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0);
+       emit_insn (gen_rs6000_mffscrn (tmp_df, src_df));
+    }
+  else
+    {
+       rtx tmp_rn = gen_reg_rtx (DImode);
+       rtx tmp_di = gen_reg_rtx (DImode);
+
+       /* Extract new RN mode from operand.  */
+       emit_insn (gen_anddi3 (tmp_rn, operands[0], GEN_INT (0x3)));
+
+       /* Insert new RN mode into FSCPR.  */
+       emit_insn (gen_rs6000_mffs (tmp_df));
+       tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
+       emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFFFFFC)));
+       emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn));
+
+       /* Need to write to field k=15.  The fields are [0:15].  Hence with L=0,
+          W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W).  FLM is an 8 bit
+          field[0:7]. Need to set the bit that corresponds to the value of i
+          that you want [0:7].  */
+       tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0);
+       emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df));
+    }
+  DONE;
+})
+
+(define_expand "rs6000_set_fpscr_drn"
+  [(match_operand:DI 0  "gpc_reg_operand")]
+  "TARGET_HARD_FLOAT"
+{
+  rtx tmp_df = gen_reg_rtx (DFmode);
+
+  /* The decimal floating point rounding control bits are FPSCR[29:31]. Put the
+     new rounding mode bits from operands[0][61:63] into FPSCR[29:31].  */
+  if (TARGET_P9_MISC)
+    {
+       rtx src_df = gen_reg_rtx (DFmode);
+
+       emit_insn (gen_ashldi3 (operands[0], operands[0], GEN_INT (32)));
+       src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0);
+       emit_insn (gen_rs6000_mffscdrn (tmp_df, src_df));
+    }
+  else
+    {
+       rtx tmp_rn = gen_reg_rtx (DImode);
+       rtx tmp_di = gen_reg_rtx (DImode);
+
+       /* Extract new DRN mode from operand.  */
+       emit_insn (gen_anddi3 (tmp_rn, operands[0], GEN_INT (0x7)));
+       emit_insn (gen_ashldi3 (tmp_rn, tmp_rn, GEN_INT (32)));
+
+       /* Insert new RN mode into FSCPR.  */
+       emit_insn (gen_rs6000_mffs (tmp_df));
+       tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
+       emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFFFFF8FFFFFFFF)));
+       emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn));
+
+       /* Need to write to field 7.  The fields are [0:15].  The equation to
+	  select the field is i + 8*(1-W). Hence with L=0 and W=1, need to set
+	  i to 0x1 to get field 7 where i selects the field.  */
+       tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0);
+       emit_insn (gen_rs6000_mtfsf_hi (GEN_INT (0x01), tmp_df));
+    }
+  DONE;
+})
+
 ;; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ))
 ;; rather than (set (subreg:SI (reg)) (fix:SI ...))
 ;; because the first makes it clear that operand 0 is not live
@@ -13603,6 +13718,42 @@
 })
 
 
+;; The ISA 3.0 mffsl instruction is a lower latency instruction
+;; for reading bits [29:31], [45:51] and [56:63] of the FPSCR.
+(define_insn "rs6000_mffsl_hw"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+        (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))]
+  "TARGET_HARD_FLOAT"
+  "mffsl %0")
+
+(define_expand "rs6000_mffsl"
+  [(set (match_operand:DF 0 "gpc_reg_operand")
+	(unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))]
+  "TARGET_HARD_FLOAT"
+{
+  /* If the low latency mffsl instruction (ISA 3.0) is available use it,
+     otherwise fall back to the older mffs instruction to emulate the mffsl
+     instruction.  */
+
+  if (TARGET_P9_MISC)
+       emit_insn (gen_rs6000_mffsl_hw (operands[0]));
+  else
+    {
+       rtx tmp_di = gen_reg_rtx (DImode);
+       rtx tmp_df = gen_reg_rtx (DFmode);
+
+       /* The mffs instruction reads the entire FPSCR.  Emulate the mffsl
+          instruction using the mffs instruction and masking off the the bits
+          the mmsl instruciton actually reads, .  */
+       emit_insn (gen_rs6000_mffs (tmp_df));
+       tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
+       emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0x70007F0FFLL)));
+
+       operands[0] = simplify_gen_subreg (DFmode, tmp_di, DImode, 0);
+    }
+  DONE;
+})
+
 (define_insn "rs6000_mffs"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
 	(unspec_volatile:DF [(const_int 0)] UNSPECV_MFFS))]
@@ -13616,6 +13767,13 @@
   "TARGET_HARD_FLOAT"
   "mtfsf %0,%1")
 
+(define_insn "rs6000_mtfsf_hi"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i")
+		     (match_operand:DF 1 "gpc_reg_operand" "d")]
+		    UNSPECV_MTFSF_HI)]
+  "TARGET_HARD_FLOAT"
+  "mtfsf %0,%1,0,1")
+
 
 ;; Power8 fusion support for fusing an addis instruction with a D-form load of
 ;; a GPR.  The addis instruction must be adjacent to the load, and use the same
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7b471ec..a6ff134 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -15632,6 +15632,10 @@ uint64_t __builtin_ppc_get_timebase ();
 unsigned long __builtin_ppc_mftb ();
 __ibm128 __builtin_unpack_ibm128 (__ibm128, int);
 __ibm128 __builtin_pack_ibm128 (double, double);
+double __builtin_mffs (void);
+void __builtin_mtfsb0 (const int);
+void __builtin_mtfsb1 (const int);
+void __builtin_set_fpscr_rn (int);
 @end smallexample
 
 The @code{__builtin_ppc_get_timebase} and @code{__builtin_ppc_mftb}
@@ -15640,7 +15644,19 @@ functions generate instructions to read the Time Base Register.  The
 instructions and always returns the 64 bits of the Time Base Register.
 The @code{__builtin_ppc_mftb} function always generates one instruction and
 returns the Time Base Register value as an unsigned long, throwing away
-the most significant word on 32-bit environments.
+the most significant word on 32-bit environments.  The @code{__builtin_mffs}
+return the value of the FPSCR register.  Note, ISA 3.0 supports the
+@code{__builtin_mffsl()} which is a lower latency version of this builtin.  The
+@code{__builtin_mtfsb0} and @code{__builtin_mtfsb1} take the bit to change
+as an argument.  The valid bit range is between 0 and 31.  The builtins map to
+the @code{mtfsb0} and @code{mtfsb1} instructions which take the argument and
+add 32.  Hence these instructions only modify the FPSCR[32:63] bits by
+changing the specified bit to a zero or one respectively.  The
+@code{__builtin_set_fpscr_rn} builtin allows changing both of the floating
+point rounding mode bits.  The argument is a 2-bit value.  The argument can
+either be a const int or stored in a variable. The builtin uses the ISA 3.0
+instruction @code{mffscrn} if available, otherwise it reads the FPSCR, masks
+the current rounding mode bits out and OR's in the new value.
 
 @node Basic PowerPC Built-in Functions Available on ISA 2.05
 @subsubsection Basic PowerPC Built-in Functions Available on ISA 2.05
@@ -15676,6 +15692,7 @@ The following built-in functions are available
 when hardware decimal floating point
 (@option{-mhard-dfp}) is available:
 @smallexample
+void __builtin_set_fpscr_drn(int);
 _Decimal64 __builtin_ddedpd (int, _Decimal64);
 _Decimal128 __builtin_ddedpdq (int, _Decimal128);
 _Decimal64 __builtin_denbcd (int, _Decimal64);
@@ -15690,6 +15707,14 @@ long long __builtin_dxex (_Decimal64);
 long long __builtin_dxexq (_Decimal128);
 _Decimal128 __builtin_pack_dec128 (unsigned long long, unsigned long long);
 unsigned long long __builtin_unpack_dec128 (_Decimal128, int);
+
+The @code{__builtin_set_fpscr_drn} builtin allows changing the three decimal
+floating point rounding mode bits.  The argument is a 3-bit value.  The
+argument can either be a const int or the value can be stored in a variable.
+The builtin uses the ISA 3.0 instruction @code{mffscdrn} if available.
+Otherwise the builtin reads the FPSCR, masks the current decimal rounding
+mode bits out and OR's in the new value.
+
 @end smallexample
 
 The following functions require @option{-mhard-float},
@@ -15891,6 +15916,9 @@ int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal64 value);
 int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal128 value);
 int __builtin_dfp_dtstsfi_ov_dd (unsigned int comparison, _Decimal64 value);
 int __builtin_dfp_dtstsfi_ov_td (unsigned int comparison, _Decimal128 value);
+
+double __builtin_mffsl(void);
+
 @end smallexample
 The @code{__builtin_byte_in_set} function requires a
 64-bit environment supporting ISA 3.0 or later.  This function returns
@@ -15942,6 +15970,12 @@ The @code{__builtin_dfp_dtstsfi_ov_dd} and
 require that the type of the @code{value} argument be
 @code{__Decimal64} and @code{__Decimal128} respectively.
 
+The @code{__builtin_mffsl} uses the ISA 3.0 @code{mffsl} instruction to read
+the FPSCR.  The instruction is a lower latency version of the @code{mffs}
+instruction.  If the @code{mffsl} instruction is not available, then the
+builtin uses the older @code{mffs} instruction to read the FPSCR.
+
+
 @node PowerPC AltiVec/VSX Built-in Functions
 @subsection PowerPC AltiVec/VSX Built-in Functions
 
diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c
new file mode 100644
index 0000000..13933c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c
@@ -0,0 +1,116 @@
+/* { dg-do run { target { powerpc*-*-linux* &&  lp64 } } } */
+/* { dg-options "-std=c99" } */
+
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#define DRN_MASK 0x700000000LL     /* DRN field mask */
+
+void abort (void);
+
+int main ()
+{
+  int i;
+  int val, bit;
+  double fpscr_val;
+  union blah {
+    double d;
+    unsigned long long ll;
+  } conv_val;
+  
+  unsigned long long ll_value;
+  register double  f14;
+
+  /* __builtin_set_fpscr_drn() builtin can take a const or a variable
+     value between 0 and 7 as the argument.
+  */
+
+  /* Test builtin decimal float rounding mode with const argument.  */
+  __builtin_set_fpscr_drn(7);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != 0x700000000)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_drn(7) did not set rounding mode to 7.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_set_fpscr_drn(2);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != 0x200000000)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_drn(2) did not set rounding mode to 2.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_set_fpscr_drn(5);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != 0x500000000)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_drn(5) did not set rounding mode to 5.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  /* Test builtin decimal float rounding mode with variable as argument.  */
+  val = 7;
+  __builtin_set_fpscr_drn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != ((unsigned long long)val << 32))
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 0;
+  __builtin_set_fpscr_drn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != ((unsigned long long)val << 32))
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 2;
+  __builtin_set_fpscr_drn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != ((unsigned long long)val << 32))
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }	  
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c
new file mode 100644
index 0000000..04e9f03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-std=c99" } */
+
+#include <altivec.h>
+
+int main ()
+{
+
+  /* Test builin with out of range arguments. The builtin
+     __builtin_set_fpscr_drn() also support a variable as an argument but
+     can't test variable value at compile time.  */
+
+  __builtin_set_fpscr_drn(-1);  /* { dg-error "Argument must be a value between 0 and 7" } */ 
+  __builtin_set_fpscr_drn(8);   /* { dg-error "Argument must be a value between 0 and 7" } */ 
+
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c
new file mode 100644
index 0000000..2a15585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c
@@ -0,0 +1,190 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-options "-std=c99" } */
+
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#define RN_MASK  0x3LL             /* RN field mask */
+
+void abort (void);
+
+int main ()
+{
+  int i;
+  int val, bit;
+  double fpscr_val;
+  union blah {
+    double d;
+    unsigned long long ll;
+  } conv_val;
+  
+  unsigned long long ll_value;
+  register double  f14;
+
+  /* __builtin_set_fpscr_rn() builtin can take a const or a variable
+     value between 0 and 3 as the argument.
+     __builtin_mtfsb0 and __builtin_mtfsb1 argument must be a constant 
+     30 or 31.
+  */
+
+  /* Test reading the FPSCR register */
+  __asm __volatile ("mffs %0" : "=f"(f14));
+  conv_val.d = f14;
+
+  if (conv_val.d != __builtin_mffs())
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mffs() returned 0x%llx, not the expecected value 0x%llx\n",
+	      __builtin_mffs(), conv_val.d);
+#else
+       abort();
+#endif
+    }		  
+
+  /* Test float rounding mode builtin with const value argument.  */
+  __builtin_set_fpscr_rn(3);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != 3)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(3) returned 0x%llx, not the expecected value 0x%x\n",
+	      ll_value, 3);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 2;
+  __builtin_set_fpscr_rn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != val)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(val=%d) returned 0x%llx, not the expecected value 0x%x\n",
+	      val, ll_value, val);
+#else
+       abort();
+#endif
+    }		  
+
+  /* Reset to 0 for testing */
+  val = 0;
+  __builtin_set_fpscr_rn(val);
+
+  __builtin_mtfsb1(31);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x1LL;
+
+  if (ll_value != 1)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb0(31);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x1LL;
+
+  if (ll_value != 0)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb0(31) did not set the bit to a 0.\n");
+#else
+       abort();
+#endif
+    }		  
+
+ __builtin_mtfsb1(30);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x2LL;
+
+  if (ll_value != 2)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb0(30);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x2LL;
+
+  if (ll_value != 0)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 0.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb1(0);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & (0x1LL << (31-0));
+
+  if (ll_value != (0x1LL << (31-0)))
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(0) did not set the bit to a 1.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb0(0);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & (0x1LL << (31-0));
+
+  if (ll_value != 0)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb0(0) did not set the bit to a 0.\n");
+#else
+       abort();
+#endif
+    }		  
+
+
+  /* Test builtin float rounding mode with variable as argument.  */
+  val = 0;
+  __builtin_set_fpscr_rn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != val)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 3;
+  __builtin_set_fpscr_rn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != val)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }		  
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c
new file mode 100644
index 0000000..4835dce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-std=c99" } */
+
+#include <altivec.h>
+
+int main ()
+{
+
+  /* Test builin with out of range arguments. Can only test for constant
+     int arguments.  The builtins __builtin_set_fpscr_rn() also supports a
+     variable as an argument but can't test variable value at compile time.  */
+
+  __builtin_mtfsb0(-1);  /* { dg-error "Argument must be a constant between 0 and 31" } */
+  __builtin_mtfsb0(32);  /* { dg-error "Argument must be a constant between 0 and 31" } */
+
+  __builtin_mtfsb1(-1);  /* { dg-error "Argument must be a constant between 0 and 31" } */
+  __builtin_mtfsb1(32);  /* { dg-error "Argument must be a constant between 0 and 31" } */ 
+
+  __builtin_set_fpscr_rn(-1);  /* { dg-error "Argument must be a value between 0 and 3" } */ 
+  __builtin_set_fpscr_rn(4);   /* { dg-error "Argument must be a value between 0 and 3" } */ 
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/test_mffsl.c b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c
new file mode 100644
index 0000000..9a4d86b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-options "-std=c99" } */
+
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+void abort (void);
+
+int main ()
+{
+
+  register double  f14;
+  union blah {
+    double d;
+    unsigned long long ll;
+  } conv_val;
+
+  /* Test reading the FPSCR register.  */
+  __asm __volatile ("mffs %0" : "=f"(f14));
+  conv_val.d = f14;
+
+  if (conv_val.d != __builtin_mffsl())
+    {
+#ifdef DEBUG
+      printf("ERROR, __builtin_mffsl() returned 0x%llx, not the expecected value 0x%llx\n",
+	     __builtin_mffsl(), conv_val.d);
+#else
+      abort();
+#endif
+    }		  
+}
Segher Boessenkool Sept. 21, 2018, 4:16 p.m. UTC | #4
Hi Carl,

Sorry for the late review.

On Mon, Sep 17, 2018 at 03:03:28PM -0700, Carl Love wrote:
> 	* config/rs6000.c (rs6000_expand_mtfsb0_mtfsb1_builtin): Add.

config/rs6000/rs6000.c

> +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB0, "__builtin_mtfsb0",
> +		  RS6000_BTM_ALWAYS,
> +		  RS6000_BTC_MISC | RS6000_BTC_UNARY,
> +		  CODE_FOR_rs6000_mtfsb0)

I think you need RS6000_BTC_VOID on most of these calls?

>  static rtx
> +rs6000_expand_mtfsb_builtin (enum insn_code icode, tree exp)

The changelog has the function name wrong?

> +  pat = GEN_FCN (icode) (op0);
> +  if (! pat)

No space after ! (or any other prefix operator that doesn't have letters
in its name, i.e. casts, sizeof, etc.)

> +static rtx
> +rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp)

> +  /* If the argument is a constant, check the range. Argument can only be a
> +     2-bit value.  Unfortunately, can't check the range of the value at
> +     compile time if the argument is a variable.  */

So what do we do for variable args?  Mask off the bits?

> +static rtx
> +rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp)

> +  /* If the argument is a constant, check the range. Agrument can only be a
> +     3-bit value.  Unfortunately, can't check the range of the value at
> +     compile time if the argument is a variable.
> +  */

Don't put */ on a separate line please.

> +  if (GET_CODE (op0) == CONST_INT && !const_0_to_7_operand(op0, VOIDmode ))

Stray space after VOIDmode.

> +(define_insn "rs6000_mtfsb0"
> +  [(unspec_volatile [(match_operand:SI 0 "u5bit_cint_operand" "n")]
> +		    UNSPECV_MTFSB0)]
> +  "TARGET_HARD_FLOAT"
> +  "mtfsb0 %0"
> +  [(set_attr "type" "fp")])

Hrm...  Does all of this work with -msoft-float?  Does it not ICE at least?

> +(define_expand "rs6000_set_fpscr_rn"
> +  [(match_operand:DI 0  "gpc_reg_operand")]

(Two spaces.)

You could handle immediate operands separately: you need only two mtfsbN
instructions for that, which is smaller and faster than the "variable"
sequence.  Well, for non-P9 anyway.

> +(define_expand "rs6000_mffsl"
> +  [(set (match_operand:DF 0 "gpc_reg_operand")
> +	(unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))]
> +  "TARGET_HARD_FLOAT"
> +{
> +  /* If the low latency mffsl instruction (ISA 3.0) is available use it,
> +     otherwise fall back to the older mffs instruction to emulate the mffsl
> +     instruction.  */
> +
> +  if (TARGET_P9_MISC)
> +       emit_insn (gen_rs6000_mffsl_hw (operands[0]));

The indent is incorrect.  But you could just not do anything if
TARGET_P9_MISC: the RTL pattern above already is exactly what you need.
So "if (!TARGET_P9_MISC)" and then that block with the DONE moved in there,
and the TARGET_P9_MISC case can just fall through.

> +       emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0x70007F0FFLL)));

Write numbers in lower case please (for readability).  0x70007f0ffLL


> +(define_insn "rs6000_mtfsf_hi"
> +  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i")

const_int_operand is "n": an actual number, not e.g. a constant address
(like "i" allows).  It doesn't make a real difference in some cases, but
it's best to get it right.

> +the most significant word on 32-bit environments.  The @code{__builtin_mffs}
> +return the value of the FPSCR register.  Note, ISA 3.0 supports the
> +@code{__builtin_mffsl()} which is a lower latency version of this builtin.  The

mffsl does not return the whole fpscr, just the more useful (and cheaper
to access!) fields: rn and drn, the exception enables, the non-sticky
exception flags.

> +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c
> @@ -0,0 +1,116 @@
> +/* { dg-do run { target { powerpc*-*-linux* &&  lp64 } } } */

Why only linux?


Okay for trunk with the nits fixed; the things that are more like
extensions can happen later (if at all).  Thanks!


Segher
Carl Love Sept. 27, 2018, 11:17 p.m. UTC | #5
Segher:

I have addressed the various formatting and other minor issues.  

I checked to see if the builtins worked with -msoft-float, which they
didn't.  I added checks for the -msoft-float into the functions:

rs6000_expand_set_fpscr_rn_builtin(), 
rs6000_expand_set_fpscr_drn_builtin(),
rs6000_expand_mtfsb_builtin()  

in file cc/config/rs6000/rs6000.c to exit if the option was set on the
command line.  

Per your suggestion, I added code to use the mtfsb[0|1] instructions in
define_expand rs6000_set_fpscr_rn if the argument is a constant.

I verified that mtfsb[0|1] instructions are generated for constant
arguments vi objdump.

I have rerun the regression tests on

    powerpc64le-unknown-linux-gnu (Power 8 LE  64-bit mode only) 
    powerpc64-unknown-linux-gnu (Power 8 BE  32-bit and 64-bit modes) 
    powerpc64le-unknown-linux-gnu (Power 9 LE  64-bit mode only)

With no regressions.

Please let me know if the patch looks OK for trunk.  Thanks for your
help on this patch.

                      Carl Love

---------------------------------------------------------------

gcc/ChangeLog:

2018-09-27  Carl Love  <cel@us.ibm.com>

	* config/rs6000/rs6000-builtin.def (__builtin_mffsl): New.
	(__builtin_mtfsb0): New.
	(__builtin_mtfsb1): New.
	( __builtin_set_fpscr_rn): New.
	(__builtin_set_fpscr_drn): New.
	* config/rs6000/rs6000.c (rs6000_expand_mtfsb_builtin): Add.
	(rs6000_expand_set_fpscr_rn_builtin): Add.
	(rs6000_expand_set_fpscr_drn_builtin): Add.
	(rs6000_expand_builtin): Add case statement entries for
	RS6000_BUILTIN_MTFSB0, RS6000_BUILTIN_MTFSB1,
	RS6000_BUILTIN_SET_FPSCR_RN, RS6000_BUILTIN_SET_FPSCR_DRN,
	RS6000_BUILTIN_MFFSL.
	(rs6000_init_builtins): Add ftype initialization and def_builtin
	calls for __builtin_mffsl, __builtin_mtfsb0, __builtin_mtfsb1,
	__builtin_set_fpscr_rn, __builtin_set_fpscr_drn.
	* config/rs6000.md (rs6000_mtfsb0, rs6000_mtfsb1, rs6000_mffscrn,
	rs6000_mffscdrn): Add define_insn.
	(rs6000_set_fpscr_rn, rs6000_set_fpscr_drn): Add define_expand.
	* doc/extend.texi: Add documentation for the builtins.

gcc/testsuite/ChangeLog:

2018-09-27  Carl Love  <cel@us.ibm.com>

	* gcc.target/powerpc/test_mffsl-p9.c: New file.
	* gcc.target/powerpc/test_fpscr_rn_builtin.c: New file.
	* gcc.target/powerpc/test_fpscr_drn_builtin.c: New file.
	* gcc.target/powerpc/test_fpscr_rn_builtin_error.c: New file.
	* gcc.target/powerpc/test_fpscr_drn_builtin_error.c: New file.
---
 gcc/config/rs6000/rs6000-builtin.def               |  24 +++
 gcc/config/rs6000/rs6000.c                         | 168 ++++++++++++++++++
 gcc/config/rs6000/rs6000.md                        | 176 ++++++++++++++++++-
 gcc/doc/extend.texi                                |  38 ++++-
 .../gcc.target/powerpc/test_fpscr_drn_builtin.c    | 116 +++++++++++++
 .../powerpc/test_fpscr_drn_builtin_error.c         |  17 ++
 .../gcc.target/powerpc/test_fpscr_rn_builtin.c     | 190 +++++++++++++++++++++
 .../powerpc/test_fpscr_rn_builtin_error.c          |  22 +++
 gcc/testsuite/gcc.target/powerpc/test_mffsl.c      |  34 ++++
 9 files changed, 783 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/test_mffsl.c

diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index f799681..976c36b 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2486,11 +2486,35 @@ BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb",
 BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs",
 	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
 
+BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl",
+	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
+
 RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf",
 	          RS6000_BTM_ALWAYS,
 	          RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID,
 		  CODE_FOR_rs6000_mtfsf)
 
+RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB0, "__builtin_mtfsb0",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID,
+		  CODE_FOR_rs6000_mtfsb0)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB1, "__builtin_mtfsb1",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID,
+		  CODE_FOR_rs6000_mtfsb1)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_RN, "__builtin_set_fpscr_rn",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY| RS6000_BTC_VOID,
+		  CODE_FOR_rs6000_set_fpscr_rn)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_DRN, "__builtin_set_fpscr_drn",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTM_64BIT | RS6000_BTC_UNARY
+		  | RS6000_BTC_VOID,
+		  CODE_FOR_rs6000_set_fpscr_drn)
+
 BU_SPECIAL_X (RS6000_BUILTIN_CPU_INIT, "__builtin_cpu_init",
 	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 2b736d7..3ab8920 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -13544,6 +13544,11 @@ rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
     /* Builtin not supported on this processor.  */
     return 0;
 
+  if (icode == CODE_FOR_rs6000_mffsl
+      && rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT)
+    fatal_error (input_location,
+		 "__builtin_mffsl() not supported with -msoft-float");
+
   if (target == 0
       || GET_MODE (target) != tmode
       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
@@ -13592,6 +13597,128 @@ rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
     op1 = copy_to_mode_reg (mode1, op1);
 
   pat = GEN_FCN (icode) (op0, op1);
+  if (!pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+static rtx
+rs6000_expand_mtfsb_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  if (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT)
+    fatal_error (input_location,
+		 "__builtin_mtfsb0 and __builtin_mtfsb1 not supported with -msoft-float");
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  /* Only allow bit numbers 0 to 31.  */
+  if (!u5bit_cint_operand (op0, VOIDmode))
+    {
+       error ("Argument must be a constant between 0 and 31.");
+       return const0_rtx;
+     }
+
+  pat = GEN_FCN (icode) (op0);
+  if (!pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+static rtx
+rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  machine_mode mode0 = insn_data[icode].operand[0].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  if (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT)
+    fatal_error (input_location,
+		 "__builtin_set_fpscr_rn not supported with -msoft-float");
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  /* If the argument is a constant, check the range. Argument can only be a
+     2-bit value.  Unfortunately, can't check the range of the value at
+     compile time if the argument is a variable.  The least significant two
+     bits of the argument, regardless of type, are used to set the rounding
+     mode.  All other bits are ignored.  */
+  if (GET_CODE (op0) == CONST_INT && !const_0_to_3_operand(op0, VOIDmode))
+    {
+       error ("Argument must be a value between 0 and 3.");
+       return const0_rtx;
+    }
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (op0);
+  if (!pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+static rtx
+rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  machine_mode mode0 = insn_data[icode].operand[0].mode;
+
+  if (TARGET_32BIT)
+    /* Builtin not supported in 32-bit mode.  */
+    fatal_error (input_location,
+		 "__builtin_set_fpscr_drn is not supported in 32-bit mode.");
+
+  if (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT)
+    fatal_error (input_location,
+		 "__builtin_set_fpscr_drn not supported with -msoft-float");
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  /* If the argument is a constant, check the range. Agrument can only be a
+     3-bit value.  Unfortunately, can't check the range of the value at
+     compile time if the argument is a variable. The least significant two
+     bits of the argument, regardless of type, are used to set the rounding
+     mode.  All other bits are ignored.  */
+  if (GET_CODE (op0) == CONST_INT && !const_0_to_7_operand(op0, VOIDmode))
+   {
+      error ("Argument must be a value between 0 and 7.");
+      return const0_rtx;
+    }
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (op0);
   if (! pat)
     return const0_rtx;
   emit_insn (pat);
@@ -16069,6 +16196,24 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
     case RS6000_BUILTIN_MFFS:
       return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
 
+    case RS6000_BUILTIN_MTFSB0:
+      return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+
+    case RS6000_BUILTIN_MTFSB1:
+      return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+
+    case RS6000_BUILTIN_SET_FPSCR_RN:
+      return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
+						 exp);
+
+    case RS6000_BUILTIN_SET_FPSCR_DRN:
+      return
+        rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+					     exp);
+
+    case RS6000_BUILTIN_MFFSL:
+      return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
     case RS6000_BUILTIN_MTFSF:
       return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
 
@@ -16452,6 +16597,29 @@ rs6000_init_builtins (void)
   ftype = build_function_type_list (double_type_node, NULL_TREE);
   def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
 
+  ftype = build_function_type_list (double_type_node, NULL_TREE);
+  def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL);
+
+  ftype = build_function_type_list (void_type_node,
+				    intSI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0);
+
+  ftype = build_function_type_list (void_type_node,
+				    intSI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1);
+
+  ftype = build_function_type_list (void_type_node,
+				    intDI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN);
+
+  ftype = build_function_type_list (void_type_node,
+				    intDI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN);
+
   ftype = build_function_type_list (void_type_node,
 				    intSI_type_node, double_type_node,
 				    NULL_TREE);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 68ba5fd..0535075 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -164,7 +164,13 @@
    UNSPECV_MFTB			; move from time base
    UNSPECV_NLGR			; non-local goto receiver
    UNSPECV_MFFS			; Move from FPSCR
-   UNSPECV_MTFSF		; Move to FPSCR Fields
+   UNSPECV_MFFSL		; Move from FPSCR light instruction version
+   UNSPECV_MFFSCRN		; Move from FPSCR float rounding mode
+   UNSPECV_MFFSCDRN		; Move from FPSCR decimal float rounding mode
+   UNSPECV_MTFSF		; Move to FPSCR Fields 8 to 15
+   UNSPECV_MTFSF_HI		; Move to FPSCR Fields 0 to 7
+   UNSPECV_MTFSB0		; Set FPSCR Field bit to 0
+   UNSPECV_MTFSB1		; Set FPSCR Field bit to 1
    UNSPECV_SPLIT_STACK_RETURN   ; A camouflaged return
    UNSPECV_SPEC_BARRIER         ; Speculation barrier
   ])
@@ -5824,6 +5830,130 @@
    xscvdpuxds %x0,%x1"
   [(set_attr "type" "fp")])
 
+(define_insn "rs6000_mtfsb0"
+  [(unspec_volatile [(match_operand:SI 0 "u5bit_cint_operand" "n")]
+		    UNSPECV_MTFSB0)]
+  "TARGET_HARD_FLOAT"
+  "mtfsb0 %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "rs6000_mtfsb1"
+  [(unspec_volatile [(match_operand:SI 0 "u5bit_cint_operand" "n")]
+		    UNSPECV_MTFSB1)]
+  "TARGET_HARD_FLOAT"
+  "mtfsb1 %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "rs6000_mffscrn"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(unspec_volatile:DF [(match_operand:DF 1 "gpc_reg_operand" "d")]
+			    UNSPECV_MFFSCRN))]
+   "TARGET_P9_MISC"
+   "mffscrn %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "rs6000_mffscdrn"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+   (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCDRN))
+   (use (match_operand:DF 1 "gpc_reg_operand" "d"))]
+  "TARGET_P9_MISC"
+  "mffscdrn %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "rs6000_set_fpscr_rn"
+ [(match_operand 0 "reg_or_cint_operand")]
+  "TARGET_HARD_FLOAT"
+{
+  rtx tmp_df = gen_reg_rtx (DFmode);
+
+  /* The floating point rounding control bits are FPSCR[62:63]. Put the
+     new rounding mode bits from operands[0][62:63] into FPSCR[62:63].  */
+  if (TARGET_P9_MISC)
+    {
+       rtx src_df = gen_reg_rtx (DImode);
+
+       src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0);
+       emit_insn (gen_rs6000_mffscrn (tmp_df, src_df));
+    }
+  else
+    {
+      if (CONST_INT_P (operands[0]))
+	{
+	  if ((INTVAL (operands[0]) & 0x1) == 0x1)
+	    emit_insn (gen_rs6000_mtfsb1 (GEN_INT (31)));
+	  else
+	    emit_insn (gen_rs6000_mtfsb0 (GEN_INT (31)));
+
+	  if ((INTVAL (operands[0]) & 0x2) == 0x2)
+	    emit_insn (gen_rs6000_mtfsb1 (GEN_INT (30)));
+	  else
+	    emit_insn (gen_rs6000_mtfsb0 (GEN_INT (30)));
+	}
+      else
+	{
+	  rtx tmp_rn = gen_reg_rtx (DImode);
+	  rtx tmp_di = gen_reg_rtx (DImode);
+
+	  /* Extract new RN mode from operand.  */
+	  emit_insn (gen_anddi3 (tmp_rn, operands[0], GEN_INT (0x3)));
+
+	  /* Insert new RN mode into FSCPR.  */
+	  emit_insn (gen_rs6000_mffs (tmp_df));
+	  tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
+	  emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFFFFFC)));
+	  emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn));
+
+	  /* Need to write to field k=15.  The fields are [0:15].  Hence with
+	     L=0, W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W).  FLM is an
+	     8-bit field[0:7]. Need to set the bit that corresponds to the
+	     value of i that you want [0:7].  */
+	  tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0);
+	  emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df));
+	}
+    }
+  DONE;
+})
+
+(define_expand "rs6000_set_fpscr_drn"
+  [(match_operand:DI 0  "gpc_reg_operand")]
+  "TARGET_HARD_FLOAT"
+{
+  rtx tmp_df = gen_reg_rtx (DFmode);
+
+  /* The decimal floating point rounding control bits are FPSCR[29:31]. Put the
+     new rounding mode bits from operands[0][61:63] into FPSCR[29:31].  */
+  if (TARGET_P9_MISC)
+    {
+       rtx src_df = gen_reg_rtx (DFmode);
+
+       emit_insn (gen_ashldi3 (operands[0], operands[0], GEN_INT (32)));
+       src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0);
+       emit_insn (gen_rs6000_mffscdrn (tmp_df, src_df));
+    }
+  else
+    {
+       rtx tmp_rn = gen_reg_rtx (DImode);
+       rtx tmp_di = gen_reg_rtx (DImode);
+
+       /* Extract new DRN mode from operand.  */
+       emit_insn (gen_anddi3 (tmp_rn, operands[0], GEN_INT (0x7)));
+       emit_insn (gen_ashldi3 (tmp_rn, tmp_rn, GEN_INT (32)));
+
+       /* Insert new RN mode into FSCPR.  */
+       emit_insn (gen_rs6000_mffs (tmp_df));
+       tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
+       emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFFFFF8FFFFFFFF)));
+       emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn));
+
+       /* Need to write to field 7.  The fields are [0:15].  The equation to
+	  select the field is i + 8*(1-W). Hence with L=0 and W=1, need to set
+	  i to 0x1 to get field 7 where i selects the field.  */
+       tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0);
+       emit_insn (gen_rs6000_mtfsf_hi (GEN_INT (0x01), tmp_df));
+    }
+  DONE;
+})
+
 ;; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ))
 ;; rather than (set (subreg:SI (reg)) (fix:SI ...))
 ;; because the first makes it clear that operand 0 is not live
@@ -13603,6 +13733,43 @@
 })
 
 
+;; The ISA 3.0 mffsl instruction is a lower latency instruction
+;; for reading bits [29:31], [45:51] and [56:63] of the FPSCR.
+(define_insn "rs6000_mffsl_hw"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+        (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))]
+  "TARGET_HARD_FLOAT"
+  "mffsl %0")
+
+(define_expand "rs6000_mffsl"
+  [(set (match_operand:DF 0 "gpc_reg_operand")
+	(unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))]
+  "TARGET_HARD_FLOAT"
+{
+  /* If the low latency mffsl instruction (ISA 3.0) is available use it,
+     otherwise fall back to the older mffs instruction to emulate the mffsl
+     instruction.  */
+
+  if (!TARGET_P9_MISC)
+    {
+       rtx tmp_di = gen_reg_rtx (DImode);
+       rtx tmp_df = gen_reg_rtx (DFmode);
+
+       /* The mffs instruction reads the entire FPSCR.  Emulate the mffsl
+          instruction using the mffs instruction and masking off the bits
+          the mmsl instruciton actually reads.  */
+       emit_insn (gen_rs6000_mffs (tmp_df));
+       tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
+       emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0x70007f0ffLL)));
+
+       operands[0] = simplify_gen_subreg (DFmode, tmp_di, DImode, 0);
+       DONE;
+    }
+
+    emit_insn (gen_rs6000_mffsl_hw (operands[0]));
+    DONE;
+})
+
 (define_insn "rs6000_mffs"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
 	(unspec_volatile:DF [(const_int 0)] UNSPECV_MFFS))]
@@ -13616,6 +13783,13 @@
   "TARGET_HARD_FLOAT"
   "mtfsf %0,%1")
 
+(define_insn "rs6000_mtfsf_hi"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "n")
+		     (match_operand:DF 1 "gpc_reg_operand" "d")]
+		    UNSPECV_MTFSF_HI)]
+  "TARGET_HARD_FLOAT"
+  "mtfsf %0,%1,0,1")
+
 
 ;; Power8 fusion support for fusing an addis instruction with a D-form load of
 ;; a GPR.  The addis instruction must be adjacent to the load, and use the same
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7b471ec..817c899 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -15632,6 +15632,10 @@ uint64_t __builtin_ppc_get_timebase ();
 unsigned long __builtin_ppc_mftb ();
 __ibm128 __builtin_unpack_ibm128 (__ibm128, int);
 __ibm128 __builtin_pack_ibm128 (double, double);
+double __builtin_mffs (void);
+void __builtin_mtfsb0 (const int);
+void __builtin_mtfsb1 (const int);
+void __builtin_set_fpscr_rn (int);
 @end smallexample
 
 The @code{__builtin_ppc_get_timebase} and @code{__builtin_ppc_mftb}
@@ -15640,7 +15644,21 @@ functions generate instructions to read the Time Base Register.  The
 instructions and always returns the 64 bits of the Time Base Register.
 The @code{__builtin_ppc_mftb} function always generates one instruction and
 returns the Time Base Register value as an unsigned long, throwing away
-the most significant word on 32-bit environments.
+the most significant word on 32-bit environments.  The @code{__builtin_mffs}
+return the value of the FPSCR register.  Note, ISA 3.0 supports the
+@code{__builtin_mffsl()} which permits software to read the control and
+non-sticky status bits in the FSPCR without the higher latency associated with
+accessing the sticky status bits.  The
+@code{__builtin_mtfsb0} and @code{__builtin_mtfsb1} take the bit to change
+as an argument.  The valid bit range is between 0 and 31.  The builtins map to
+the @code{mtfsb0} and @code{mtfsb1} instructions which take the argument and
+add 32.  Hence these instructions only modify the FPSCR[32:63] bits by
+changing the specified bit to a zero or one respectively.  The
+@code{__builtin_set_fpscr_rn} builtin allows changing both of the floating
+point rounding mode bits.  The argument is a 2-bit value.  The argument can
+either be a const int or stored in a variable. The builtin uses the ISA 3.0
+instruction @code{mffscrn} if available, otherwise it reads the FPSCR, masks
+the current rounding mode bits out and OR's in the new value.
 
 @node Basic PowerPC Built-in Functions Available on ISA 2.05
 @subsubsection Basic PowerPC Built-in Functions Available on ISA 2.05
@@ -15676,6 +15694,7 @@ The following built-in functions are available
 when hardware decimal floating point
 (@option{-mhard-dfp}) is available:
 @smallexample
+void __builtin_set_fpscr_drn(int);
 _Decimal64 __builtin_ddedpd (int, _Decimal64);
 _Decimal128 __builtin_ddedpdq (int, _Decimal128);
 _Decimal64 __builtin_denbcd (int, _Decimal64);
@@ -15690,6 +15709,14 @@ long long __builtin_dxex (_Decimal64);
 long long __builtin_dxexq (_Decimal128);
 _Decimal128 __builtin_pack_dec128 (unsigned long long, unsigned long long);
 unsigned long long __builtin_unpack_dec128 (_Decimal128, int);
+
+The @code{__builtin_set_fpscr_drn} builtin allows changing the three decimal
+floating point rounding mode bits.  The argument is a 3-bit value.  The
+argument can either be a const int or the value can be stored in a variable.
+The builtin uses the ISA 3.0 instruction @code{mffscdrn} if available.
+Otherwise the builtin reads the FPSCR, masks the current decimal rounding
+mode bits out and OR's in the new value.
+
 @end smallexample
 
 The following functions require @option{-mhard-float},
@@ -15891,6 +15918,9 @@ int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal64 value);
 int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal128 value);
 int __builtin_dfp_dtstsfi_ov_dd (unsigned int comparison, _Decimal64 value);
 int __builtin_dfp_dtstsfi_ov_td (unsigned int comparison, _Decimal128 value);
+
+double __builtin_mffsl(void);
+
 @end smallexample
 The @code{__builtin_byte_in_set} function requires a
 64-bit environment supporting ISA 3.0 or later.  This function returns
@@ -15942,6 +15972,12 @@ The @code{__builtin_dfp_dtstsfi_ov_dd} and
 require that the type of the @code{value} argument be
 @code{__Decimal64} and @code{__Decimal128} respectively.
 
+The @code{__builtin_mffsl} uses the ISA 3.0 @code{mffsl} instruction to read
+the FPSCR.  The instruction is a lower latency version of the @code{mffs}
+instruction.  If the @code{mffsl} instruction is not available, then the
+builtin uses the older @code{mffs} instruction to read the FPSCR.
+
+
 @node PowerPC AltiVec/VSX Built-in Functions
 @subsection PowerPC AltiVec/VSX Built-in Functions
 
diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c
new file mode 100644
index 0000000..0fb554a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c
@@ -0,0 +1,116 @@
+/* { dg-do run { target { powerpc*-*-* &&  lp64 } } } */
+/* { dg-options "-std=c99" } */
+
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#define DRN_MASK 0x700000000LL     /* DRN field mask */
+
+void abort (void);
+
+int main ()
+{
+  int i;
+  int val, bit;
+  double fpscr_val;
+  union blah {
+    double d;
+    unsigned long long ll;
+  } conv_val;
+  
+  unsigned long long ll_value;
+  register double  f14;
+
+  /* __builtin_set_fpscr_drn() builtin can take a const or a variable
+     value between 0 and 7 as the argument.
+  */
+
+  /* Test builtin decimal float rounding mode with const argument.  */
+  __builtin_set_fpscr_drn(7);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != 0x700000000)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_drn(7) did not set rounding mode to 7.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_set_fpscr_drn(2);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != 0x200000000)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_drn(2) did not set rounding mode to 2.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_set_fpscr_drn(5);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != 0x500000000)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_drn(5) did not set rounding mode to 5.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  /* Test builtin decimal float rounding mode with variable as argument.  */
+  val = 7;
+  __builtin_set_fpscr_drn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != ((unsigned long long)val << 32))
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 0;
+  __builtin_set_fpscr_drn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != ((unsigned long long)val << 32))
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 2;
+  __builtin_set_fpscr_drn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != ((unsigned long long)val << 32))
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }	  
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c
new file mode 100644
index 0000000..04e9f03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-std=c99" } */
+
+#include <altivec.h>
+
+int main ()
+{
+
+  /* Test builin with out of range arguments. The builtin
+     __builtin_set_fpscr_drn() also support a variable as an argument but
+     can't test variable value at compile time.  */
+
+  __builtin_set_fpscr_drn(-1);  /* { dg-error "Argument must be a value between 0 and 7" } */ 
+  __builtin_set_fpscr_drn(8);   /* { dg-error "Argument must be a value between 0 and 7" } */ 
+
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c
new file mode 100644
index 0000000..2a15585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c
@@ -0,0 +1,190 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-options "-std=c99" } */
+
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#define RN_MASK  0x3LL             /* RN field mask */
+
+void abort (void);
+
+int main ()
+{
+  int i;
+  int val, bit;
+  double fpscr_val;
+  union blah {
+    double d;
+    unsigned long long ll;
+  } conv_val;
+  
+  unsigned long long ll_value;
+  register double  f14;
+
+  /* __builtin_set_fpscr_rn() builtin can take a const or a variable
+     value between 0 and 3 as the argument.
+     __builtin_mtfsb0 and __builtin_mtfsb1 argument must be a constant 
+     30 or 31.
+  */
+
+  /* Test reading the FPSCR register */
+  __asm __volatile ("mffs %0" : "=f"(f14));
+  conv_val.d = f14;
+
+  if (conv_val.d != __builtin_mffs())
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mffs() returned 0x%llx, not the expecected value 0x%llx\n",
+	      __builtin_mffs(), conv_val.d);
+#else
+       abort();
+#endif
+    }		  
+
+  /* Test float rounding mode builtin with const value argument.  */
+  __builtin_set_fpscr_rn(3);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != 3)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(3) returned 0x%llx, not the expecected value 0x%x\n",
+	      ll_value, 3);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 2;
+  __builtin_set_fpscr_rn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != val)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(val=%d) returned 0x%llx, not the expecected value 0x%x\n",
+	      val, ll_value, val);
+#else
+       abort();
+#endif
+    }		  
+
+  /* Reset to 0 for testing */
+  val = 0;
+  __builtin_set_fpscr_rn(val);
+
+  __builtin_mtfsb1(31);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x1LL;
+
+  if (ll_value != 1)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb0(31);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x1LL;
+
+  if (ll_value != 0)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb0(31) did not set the bit to a 0.\n");
+#else
+       abort();
+#endif
+    }		  
+
+ __builtin_mtfsb1(30);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x2LL;
+
+  if (ll_value != 2)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb0(30);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x2LL;
+
+  if (ll_value != 0)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 0.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb1(0);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & (0x1LL << (31-0));
+
+  if (ll_value != (0x1LL << (31-0)))
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(0) did not set the bit to a 1.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb0(0);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & (0x1LL << (31-0));
+
+  if (ll_value != 0)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb0(0) did not set the bit to a 0.\n");
+#else
+       abort();
+#endif
+    }		  
+
+
+  /* Test builtin float rounding mode with variable as argument.  */
+  val = 0;
+  __builtin_set_fpscr_rn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != val)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 3;
+  __builtin_set_fpscr_rn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != val)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }		  
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c
new file mode 100644
index 0000000..4835dce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-std=c99" } */
+
+#include <altivec.h>
+
+int main ()
+{
+
+  /* Test builin with out of range arguments. Can only test for constant
+     int arguments.  The builtins __builtin_set_fpscr_rn() also supports a
+     variable as an argument but can't test variable value at compile time.  */
+
+  __builtin_mtfsb0(-1);  /* { dg-error "Argument must be a constant between 0 and 31" } */
+  __builtin_mtfsb0(32);  /* { dg-error "Argument must be a constant between 0 and 31" } */
+
+  __builtin_mtfsb1(-1);  /* { dg-error "Argument must be a constant between 0 and 31" } */
+  __builtin_mtfsb1(32);  /* { dg-error "Argument must be a constant between 0 and 31" } */ 
+
+  __builtin_set_fpscr_rn(-1);  /* { dg-error "Argument must be a value between 0 and 3" } */ 
+  __builtin_set_fpscr_rn(4);   /* { dg-error "Argument must be a value between 0 and 3" } */ 
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/test_mffsl.c b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c
new file mode 100644
index 0000000..9a4d86b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-options "-std=c99" } */
+
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+void abort (void);
+
+int main ()
+{
+
+  register double  f14;
+  union blah {
+    double d;
+    unsigned long long ll;
+  } conv_val;
+
+  /* Test reading the FPSCR register.  */
+  __asm __volatile ("mffs %0" : "=f"(f14));
+  conv_val.d = f14;
+
+  if (conv_val.d != __builtin_mffsl())
+    {
+#ifdef DEBUG
+      printf("ERROR, __builtin_mffsl() returned 0x%llx, not the expecected value 0x%llx\n",
+	     __builtin_mffsl(), conv_val.d);
+#else
+      abort();
+#endif
+    }		  
+}
Segher Boessenkool Sept. 28, 2018, 12:36 a.m. UTC | #6
Hi!

On Thu, Sep 27, 2018 at 04:17:57PM -0700, Carl Love wrote:
> +  if (icode == CODE_FOR_rs6000_mffsl
> +      && rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT)
> +    fatal_error (input_location,
> +		 "__builtin_mffsl() not supported with -msoft-float");

Please use plain "error ()" instead.  To keep whatever else here from
wreaking havoc, also immediately after the error() do "return const0_rtx"?

(Same for all other fatal_error, of course.  fatal_error is for when the
compiler needs to go down ungracefully, _now_.  It is nicer to still try
to continue for a little while).

> +  /* If the argument is a constant, check the range. Argument can only be a
> +     2-bit value.  Unfortunately, can't check the range of the value at
> +     compile time if the argument is a variable.  The least significant two
> +     bits of the argument, regardless of type, are used to set the rounding
> +     mode.  All other bits are ignored.  */
> +  if (GET_CODE (op0) == CONST_INT && !const_0_to_3_operand(op0, VOIDmode))
> +    {
> +       error ("Argument must be a value between 0 and 3.");
> +       return const0_rtx;
> +    }

These are indented a char too many.

> +  if (TARGET_P9_MISC)
> +    {
> +       rtx src_df = gen_reg_rtx (DImode);
> +
> +       src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0);
> +       emit_insn (gen_rs6000_mffscrn (tmp_df, src_df));
> +    }
> +  else

This is easier if you write it like:

  if (...)
    {
      emit this;
      emit that;
      DONE;
    }

  if (...)
    {
      emit this;
      emit that;
      DONE;
    }

etc.
With that style, code that is semantically at the same level has the same
indent, instead of wandering further and further to the right.

> +	{
> +	  rtx tmp_rn = gen_reg_rtx (DImode);
> +	  rtx tmp_di = gen_reg_rtx (DImode);
> +
> +	  /* Extract new RN mode from operand.  */
> +	  emit_insn (gen_anddi3 (tmp_rn, operands[0], GEN_INT (0x3)));
> +
> +	  /* Insert new RN mode into FSCPR.  */
> +	  emit_insn (gen_rs6000_mffs (tmp_df));
> +	  tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
> +	  emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFFFFFC)));

This loses bits 0..31 (the top half of the register).  Maybe use
GEN_INT (-4) ?

> +	  emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn));
> +
> +	  /* Need to write to field k=15.  The fields are [0:15].  Hence with
> +	     L=0, W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W).  FLM is an
> +	     8-bit field[0:7]. Need to set the bit that corresponds to the
> +	     value of i that you want [0:7].  */
> +	  tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0);
> +	  emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df));
> +    }

:-)

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c
> @@ -0,0 +1,116 @@
> +/* { dg-do run { target { powerpc*-*-* &&  lp64 } } } */
> +/* { dg-options "-std=c99" } */

You need to require a system that implements the DRN bits...  I think
you'll need the "dfp_hw" selector.  (That's power6 and later, may not
be so easy to test this ;-) )

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c
> @@ -0,0 +1,34 @@
> +/* { dg-do run { target { powerpc*-*-* } } } */
> +/* { dg-options "-std=c99" } */

Maybe you should do the run tests with -O2?  Maybe compile tests, too,
come to think of it.


With those details fixed, okay for trunk.  Thanks!


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index f79968154..a50236e77 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2486,11 +2486,34 @@  BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb",
 BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs",
 	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
 
+BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl",
+	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
+
 RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf",
 	          RS6000_BTM_ALWAYS,
 	          RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID,
 		  CODE_FOR_rs6000_mtfsf)
 
+RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB0_SI, "__builtin_mtfsb0",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY,
+		  CODE_FOR_rs6000_mtfsb0_si)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB1_SI, "__builtin_mtfsb1",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY,
+		  CODE_FOR_rs6000_mtfsb1_si)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_RN, "__builtin_set_fpscr_rn",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY,
+		  CODE_FOR_rs6000_set_fpscr_rn)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_DRN, "__builtin_set_fpscr_drn",
+		  RS6000_BTM_ALWAYS,
+		  RS6000_BTC_MISC | RS6000_BTC_UNARY,
+		  CODE_FOR_rs6000_set_fpscr_drn)
+
 BU_SPECIAL_X (RS6000_BUILTIN_CPU_INIT, "__builtin_cpu_init",
 	      RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index aa707b255..7db9c10a9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -13356,6 +13356,113 @@  rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
   return NULL_RTX;
 }
 
+static rtx
+rs6000_expand_mtfsb0_mtfsb1_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  machine_mode mode0 = insn_data[icode].operand[0].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  /* Only allow bit numbers 0 to 31.  */
+  if (GET_CODE (op0) != CONST_INT || INTVAL (op0) < 0 || INTVAL (op0) > 31)
+    {
+       error ("Argument must be a constant between 0 and 31.");
+       return const0_rtx;
+     }
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (op0);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+static rtx
+rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  machine_mode mode0 = insn_data[icode].operand[0].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  /* If the argument is a constant, check the range. Agrument can only be a
+     2-bit value.  Unfortunately, can't check the range of the value at
+     compile time if the argument is a variable.
+  */
+  if (GET_CODE (op0) == CONST_INT && (INTVAL (op0) < 0 || INTVAL (op0) > 3))
+    {
+       error ("Argument must be a value between 0 and 3.");
+       return const0_rtx;
+    }
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (op0);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+static rtx
+rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  machine_mode mode0 = insn_data[icode].operand[0].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  /* If the argument is a constant, check the range. Agrument can only be a
+     3-bit value.  Unfortunately, can't check the range of the value at
+     compile time if the argument is a variable.
+  */
+  if (GET_CODE (op0) == CONST_INT && (INTVAL (op0) < 0 || INTVAL (op0) > 7))
+   {
+       error ("Argument must be a value between 0 and 7.");
+       return const0_rtx;
+    }
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (op0);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  return NULL_RTX;
+}
+
 static rtx
 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
 {
@@ -15987,6 +16094,26 @@  rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
     case RS6000_BUILTIN_MFFS:
       return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
 
+    case RS6000_BUILTIN_MTFSB0_SI:
+      return rs6000_expand_mtfsb0_mtfsb1_builtin (CODE_FOR_rs6000_mtfsb0_si,
+						  exp);
+
+    case RS6000_BUILTIN_MTFSB1_SI:
+      return rs6000_expand_mtfsb0_mtfsb1_builtin (CODE_FOR_rs6000_mtfsb1_si,
+						  exp);
+
+    case RS6000_BUILTIN_SET_FPSCR_RN:
+      return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
+						 exp);
+
+    case RS6000_BUILTIN_SET_FPSCR_DRN:
+      return
+        rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+					     exp);
+
+    case RS6000_BUILTIN_MFFSL:
+      return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
     case RS6000_BUILTIN_MTFSF:
       return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
 
@@ -16370,6 +16497,30 @@  rs6000_init_builtins (void)
   ftype = build_function_type_list (double_type_node, NULL_TREE);
   def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
 
+  ftype = build_function_type_list (double_type_node, NULL_TREE);
+  def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL);
+
+  ftype = build_function_type_list (void_type_node,
+				    intSI_type_node,
+				    NULL_TREE);
+
+  def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0_SI);
+
+  ftype = build_function_type_list (void_type_node,
+				    intSI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1_SI);
+
+  ftype = build_function_type_list (void_type_node,
+				    intDI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN);
+
+  ftype = build_function_type_list (void_type_node,
+				    intDI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN);
+
   ftype = build_function_type_list (void_type_node,
 				    intSI_type_node, double_type_node,
 				    NULL_TREE);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index d70b01b8c..7714aacd8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -163,7 +163,13 @@ 
    UNSPECV_MFTB			; move from time base
    UNSPECV_NLGR			; non-local goto receiver
    UNSPECV_MFFS			; Move from FPSCR
-   UNSPECV_MTFSF		; Move to FPSCR Fields
+   UNSPECV_MFFSL		; Move from FPSCR light instruction version
+   UNSPECV_MFFSCRN		; Move from FPSCR float rounding mode
+   UNSPECV_MFFSCDRN		; Move from FPSCR decimal float rounding mode
+   UNSPECV_MTFSF		; Move to FPSCR Fields 8 to 15
+   UNSPECV_MTFSF_L0W1		; Move to FPSCR Fields 0 to 7
+   UNSPECV_MTFSFB0		; Set FPSCR Field bit to 0
+   UNSPECV_MTFSFB1		; Set FPSCR Field bit to 1
    UNSPECV_SPLIT_STACK_RETURN   ; A camouflaged return
    UNSPECV_SPEC_BARRIER         ; Speculation barrier
   ])
@@ -5823,6 +5829,115 @@ 
    xscvdpuxds %x0,%x1"
   [(set_attr "type" "fp")])
 
+(define_insn "rs6000_mtfsb0_si"
+ [(use (match_operand:SI 0 "short_cint_operand" "n"))
+  (unspec_volatile:SI [(const_int 0)] UNSPECV_MTFSFB0)]
+  "TARGET_HARD_FLOAT"
+ "mtfsb0 %0")
+
+(define_insn "rs6000_mtfsb1_si"
+  [(use (match_operand:SI 0 "short_cint_operand" "n"))
+   (unspec_volatile:SI [(const_int 0)] UNSPECV_MTFSFB1)]
+   "TARGET_HARD_FLOAT"
+   "mtfsb1 %0")
+
+(define_insn "rs6000_mffscrn"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+   (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCRN))
+   (use (match_operand:DF 1 "gpc_reg_operand" "d"))]
+   "TARGET_HARD_FLOAT"
+   "mffscrn %0,%1")
+
+(define_insn "rs6000_mffscdrn"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+   (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCDRN))
+   (use (match_operand:DF 1 "gpc_reg_operand" "d"))]
+   "TARGET_HARD_FLOAT"
+   "mffscdrn %0,%1")
+
+(define_expand "rs6000_set_fpscr_rn"
+  [(match_operand:DI 0  "gpc_reg_operand")]
+  "TARGET_HARD_FLOAT"
+{
+  rtx tmp_df = gen_reg_rtx (DFmode);
+
+  /* The floating point rounding control bits are FPSCR[62:63]. Put the
+     new rounding mode bits from operands[0][62:63] into FPSCR[62:63].  */
+  if (TARGET_P9_VECTOR)
+    {
+      rtx src_df = gen_reg_rtx (DImode);
+
+      src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0);
+      emit_insn (gen_rs6000_mffscrn (tmp_df, src_df));
+    }
+  else
+    {
+      rtx tmp_rn = gen_reg_rtx (DImode);
+      rtx tmp_di = gen_reg_rtx (DImode);
+
+      /* Extract new RN mode from operand.  */
+      emit_insn (gen_anddi3_mask (tmp_rn, operands[0], GEN_INT (0x3)));
+
+      /* Insert new RN mode into FSCPR.  */
+      emit_insn (gen_rs6000_mffs (tmp_df));
+      tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
+      emit_insn (gen_anddi3_mask (tmp_di, tmp_di, GEN_INT (0xFFFFFFFC)));
+      emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn));
+
+      /* Need to write to field k=15.  The fields are [0:15].  Hence with L=0,
+         W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W).  FLM is an 8 bit
+         field[0:7]. Need to set the bit that corresponds to the value of i
+         that you want [0:7].
+      */
+      tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0);
+      emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df));
+    }
+  DONE;
+})
+
+(define_expand "rs6000_set_fpscr_drn"
+  [(match_operand:DI 0  "gpc_reg_operand")]
+  "TARGET_HARD_FLOAT"
+{
+  rtx tmp_df = gen_reg_rtx (DFmode);
+
+  /* The decimal floating point rounding control bits are FPSCR[29:31]. Put the
+     new rounding mode bits from operands[0][61:63] into FPSCR[29:31].  */
+
+  if (TARGET_P9_VECTOR)
+    {
+       rtx src_df = gen_reg_rtx (DFmode);
+
+       emit_insn (gen_ashldi3 (operands[0], operands[0], GEN_INT (32)));
+       src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0);
+       emit_insn (gen_rs6000_mffscdrn (tmp_df, src_df));
+    }
+  else
+    {
+      rtx tmp_rn = gen_reg_rtx (DImode);
+      rtx tmp_di = gen_reg_rtx (DImode);
+
+      /* Extract new DRN mode from operand.  */
+      emit_insn (gen_anddi3_mask (tmp_rn, operands[0], GEN_INT (0x7)));
+      emit_insn (gen_ashldi3 (tmp_rn, tmp_rn, GEN_INT (32)));
+
+      /* Insert new RN mode into FSCPR.  */
+      emit_insn (gen_rs6000_mffs (tmp_df));
+      tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
+      emit_insn (gen_anddi3_mask (tmp_di, tmp_di, GEN_INT (0xFFF8FFFFFFFF)));
+      emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn));
+
+      /* Need to write to field k=7.  The fields are [0:15].  Hence with L=0,
+         W=1, FLM_i must be equal to 7, 16 = i + 8*(1-W).  FLM is an 8 bit
+         field[0:7]. Need to set the bit that corresponds to the value of i
+         that you want [0:7].
+      */
+      tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0);
+      emit_insn (gen_rs6000_mtfsf_L0W1 (GEN_INT (0x01), tmp_df));
+    }
+  DONE;
+})
+
 ;; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ))
 ;; rather than (set (subreg:SI (reg)) (fix:SI ...))
 ;; because the first makes it clear that operand 0 is not live
@@ -13602,6 +13717,31 @@ 
 })
 
 
+;; The ISA 3.0 mffsl instruction is a lower latency instruction
+;; for reading the FPSCR
+(define_insn "rs6000_mffsl0"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+        (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))]
+  "TARGET_HARD_FLOAT && TARGET_P9_MISC"
+  "mffsl %0")
+
+(define_expand "rs6000_mffsl"
+  [(set (match_operand:DF 0 "gpc_reg_operand")
+	(unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))]
+  "TARGET_HARD_FLOAT && TARGET_P9_MISC"
+{
+  /* If the low latency mffsl instruction (ISA 3.0) is available use it,
+     otherwise fall back to the older mffs instruction which does the same
+     thing but with a little more latency.  */
+
+  if (TARGET_P9_VECTOR)
+    emit_insn (gen_rs6000_mffsl0 (operands[0]));
+  else
+    emit_insn (gen_rs6000_mffs (operands[0]));
+
+  DONE;
+})
+
 (define_insn "rs6000_mffs"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
 	(unspec_volatile:DF [(const_int 0)] UNSPECV_MFFS))]
@@ -13615,6 +13755,13 @@ 
   "TARGET_HARD_FLOAT"
   "mtfsf %0,%1")
 
+(define_insn "rs6000_mtfsf_L0W1"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i")
+		     (match_operand:DF 1 "gpc_reg_operand" "d")]
+		    UNSPECV_MTFSF_L0W1)]
+  "TARGET_HARD_FLOAT"
+  "mtfsf %0,%1,0,1")
+
 
 ;; Power8 fusion support for fusing an addis instruction with a D-form load of
 ;; a GPR.  The addis instruction must be adjacent to the load, and use the same
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 130f6a671..7c6279ec2 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -15745,6 +15745,16 @@  uint64_t __builtin_ppc_get_timebase ();
 unsigned long __builtin_ppc_mftb ();
 __ibm128 __builtin_unpack_ibm128 (__ibm128, int);
 __ibm128 __builtin_pack_ibm128 (double, double);
+double __builtin_mffs(void);       /* Return value of the FPSCR register.
+                                      Note, ISA 3.0 supports __builtin_mffsl() 
+                                      which is a lower latency version of this
+                                      builtin.  */
+void __builtin_mtfsb0(const int);  /* Argument can be 0 to 31.  Sets
+                                      the specified bit in the FPSCR to 0.  */
+void __builtin_mtfsb1(const int);  /* Argument can be 0 to 31.  Sets
+                                      the specified bit in the FPSCR to 1.  */
+void __builtin_set_fpscr_rn(int);  /* The argument is a the 2-bit value to set
+                                      the Floating point rounding mode to.  */
 @end smallexample
 
 The @code{__builtin_ppc_get_timebase} and @code{__builtin_ppc_mftb}
@@ -15803,6 +15813,10 @@  long long __builtin_dxex (_Decimal64);
 long long __builtin_dxexq (_Decimal128);
 _Decimal128 __builtin_pack_dec128 (unsigned long long, unsigned long long);
 unsigned long long __builtin_unpack_dec128 (_Decimal128, int);
+void __builtin_set_fpscr_drn(int);  /* The argument is a the 3-bit value to
+                                       set the Decimal Floating point rounding
+                                       mode to.  */
+
 @end smallexample
 
 The following functions require @option{-mhard-float},
@@ -16004,6 +16018,12 @@  int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal64 value);
 int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal128 value);
 int __builtin_dfp_dtstsfi_ov_dd (unsigned int comparison, _Decimal64 value);
 int __builtin_dfp_dtstsfi_ov_td (unsigned int comparison, _Decimal128 value);
+
+double __builtin_mffsl(void);   /* Return value of the FPSCR register.  Uses
+                                   lower latency version of the mffs
+                                   instruction. Defaults to mffs for pre
+                                   ISA 3.0.  */
+
 @end smallexample
 The @code{__builtin_byte_in_set} function requires a
 64-bit environment supporting ISA 3.0 or later.  This function returns
diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c
new file mode 100644
index 000000000..4f77078c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c
@@ -0,0 +1,282 @@ 
+/* { dg-do run { target { powerpc64*-*-* && lp64 } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-pedantic" } */
+
+#include <altivec.h>
+
+#define DEBUG 1
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#define RN_MASK  0x3LL             /* RN field mask */
+#define DRN_MASK 0x700000000LL     /* DRN field mask */
+
+void abort (void);
+
+int main ()
+{
+  int i;
+  int val, bit;
+  double fpscr_val;
+  union blah {
+    double d;
+    unsigned long long ll;
+  } conv_val;
+  
+  unsigned long long ll_value;
+  register double  f14;
+
+  /* __builtin_set_fpscr_rn() builtin can take a const or a variable
+     value between 0 and 3 as the argument.
+     __builtin_set_fpscr_drn() builtin can take a const or a variable
+     value between 0 and 7 as the argument.
+     __builtin_mtfsb0 and __builtin_mtfsb1 argument must be a constant 
+     30 or 31.
+  */
+
+  /* Test reading the FPSCR register */
+  asm volatile ("mffs %0" : "=f"(f14));
+  conv_val.d = f14;
+
+  if (conv_val.d != __builtin_mffs())
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mffs() returned 0x%llx, not the expecected value 0x%llx\n",
+	      __builtin_mffs(), conv_val.d);
+#else
+       abort();
+#endif
+    }		  
+
+  /* Test float rounding mode builtin with const value argument.  */
+  __builtin_set_fpscr_rn(3);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != 3)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(3) returned 0x%llx, not the expecected value 0x%x\n",
+	      ll_value, 3);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 2;
+  __builtin_set_fpscr_rn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != val)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(val=%d) returned 0x%llx, not the expecected value 0x%x\n",
+	      val, ll_value, val);
+#else
+       abort();
+#endif
+    }		  
+
+  /* Reset to 0 for testing */
+  val = 0;
+  __builtin_set_fpscr_rn(val);
+
+  __builtin_mtfsb1(31);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x1LL;
+
+  if (ll_value != 1)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb0(31);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x1LL;
+
+  if (ll_value != 0)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb0(31) did not set the bit to a 0.\n");
+#else
+       abort();
+#endif
+    }		  
+
+ __builtin_mtfsb1(30);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x2LL;
+
+  if (ll_value != 2)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb0(30);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & 0x2LL;
+
+  if (ll_value != 0)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 0.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb1(0);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & (0x1LL << (31-0));
+
+  if (ll_value != (0x1LL << (31-0)))
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb1(0) did not set the bit to a 1.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_mtfsb0(0);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & (0x1LL << (31-0));
+
+  if (ll_value != 0)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mtfsb0(0) did not set the bit to a 0.\n");
+#else
+       abort();
+#endif
+    }		  
+
+
+  /* Test builtin float rounding mode with variable as argument.  */
+  val = 0;
+  __builtin_set_fpscr_rn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != val)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 3;
+  __builtin_set_fpscr_rn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & RN_MASK;
+
+  if (ll_value != val)
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n",
+	      val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  /* Test builtin decimal float rounding mode with const argument.  */
+  __builtin_set_fpscr_drn(7);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != 0x700000000)
+    {
+#ifdef DEBUG
+	    printf("ERROR, __builtin_set_fpscr_drn(7) did not set rounding mode to 7.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_set_fpscr_drn(2);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != 0x200000000)
+    {
+#ifdef DEBUG
+	    printf("ERROR, __builtin_set_fpscr_drn(2) did not set rounding mode to 2.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  __builtin_set_fpscr_drn(5);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != 0x500000000)
+    {
+#ifdef DEBUG
+	    printf("ERROR, __builtin_set_fpscr_drn(5) did not set rounding mode to 5.\n");
+#else
+       abort();
+#endif
+    }		  
+
+  /* Test builtin decimal float rounding mode with variable as argument.  */
+  val = 7;
+  __builtin_set_fpscr_drn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != ((unsigned long long)val << 32))
+    {
+#ifdef DEBUG
+	    printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n",
+		   val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 0;
+  __builtin_set_fpscr_drn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != ((unsigned long long)val << 32))
+    {
+#ifdef DEBUG
+	    printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n",
+		   val, val);
+#else
+       abort();
+#endif
+    }		  
+
+  val = 2;
+  __builtin_set_fpscr_drn(val);
+  conv_val.d = __builtin_mffs();
+  ll_value = conv_val.ll & DRN_MASK;
+
+  if (ll_value != ((unsigned long long)val << 32))
+    {
+#ifdef DEBUG
+	    printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n",
+		   val, val);
+#else
+       abort();
+#endif
+    }		  
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c
new file mode 100644
index 000000000..10de0be44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c
@@ -0,0 +1,26 @@ 
+/* { dg-do compile { target powerpc*-*-* } } */
+
+#include <altivec.h>
+
+int main ()
+{
+
+  /* Test builin with out of range arguments. Can only test for constant
+     int arguments.  The builtins __builtin_set_fpscr_rn(),
+     __builtin_set_fpscr_drn() also support a variable as an argument but
+     can't test variable value at compile time.  */
+
+  __builtin_mtfsb0(-1); /* { dg-error "Argument must be a constant between 0 and 31." } */ 
+  __builtin_mtfsb0(32); /* { dg-error "Argument must be a constant between 0 and 31." } */ 
+
+  __builtin_mtfsb1(-1);  /* { dg-error "Argument must be a constant between 0 and 31." } */
+  __builtin_mtfsb1(32);  /* { dg-error "Argument must be a constant between 0 and 31." } */ 
+
+  __builtin_set_fpscr_rn(-1);  /* { dg-error "Argument must be a value between 0 and 3." } */ 
+  __builtin_set_fpscr_rn(4);   /* { dg-error "Argument must be a value between 0 and 3." } */ 
+
+  __builtin_set_fpscr_drn(-1);  /* { dg-error "Argument must be a value between 0 and 7." } */ 
+  __builtin_set_fpscr_drn(8);   /* { dg-error "Argument must be a value between 0 and 7." } */ 
+
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c b/gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c
new file mode 100644
index 000000000..dc4f863ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c
@@ -0,0 +1,36 @@ 
+/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-pedantic -mcpu=power9" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+void abort (void);
+
+int main ()
+{
+
+  register double  f14;
+  union blah {
+    double d;
+    unsigned long long ll;
+  } conv_val;
+
+  /* Test reading the FPSCR register.  */
+  asm volatile ("mffs %0" : "=f"(f14));
+  conv_val.d = f14;
+
+  if (conv_val.d != __builtin_mffsl())
+    {
+#ifdef DEBUG
+       printf("ERROR, __builtin_mffsl() returned 0x%llx, not the expecected value 0x%llx\n",
+	      __builtin_mffsl(), conv_val.d);
+#else
+       abort();
+#endif
+    }		  
+}