diff mbox

[fortran] Bug 64432 - [5 Regression] SYSTEM_CLOCK(COUNT_RATE=rate) wrong result for integer(4)::rate

Message ID 54FBD37D.1070208@charter.net
State New
Headers show

Commit Message

Jerry DeLisle March 8, 2015, 4:43 a.m. UTC
The attached patch fixes this regression and implements KIND=1 and KIND=2 
compatible calls.  The smallest KIND found of the arguments given by the caller 
is used to determine the results returned.  New versions of the intrinsics with 
the KIND argument are provided.  The previous versions are retained and these 
call the new with KIND= the respective call, 4 or 8.

The test case given here is provided for others to see the results and is not 
yet dejagnu=ized. It provides all possible combinations of kinds and types to test.

I did performance test this with various combinations of the timeit.f08 program 
to show the overhead is very very low. (also attached)

Regression tested on x86-64 and PowerPC. I am not set up to test on 
Cygwin/Windows platforms. If someone can do this, please do.

OK for trunk?


Regards,

Jerry

2015-03-07  Jerry DeLisle  <jvdelisle@gcc.gnu.org>

	PR fortran/64432
	* trans-decl (gfc_build_intrinsic_function_decls): Add a fourth
	argument to system_clock declarations.
	*trans-intrinisic.c (conv_intrinsic_system_clock): Build calls
	to include the smallest kind used as the fourth argument to be
	used by the runtime system_clock functions.

2015-03-07 Jerry DeLisle  <jvdelisle@gcc.gnu.org>

	PR libgfortran/64432
	* gfortran.map: Add new section for new versions of
	system_clock intrinsics.
	* intrinsics/system_clock.c (system_clock4K, system_clock8K):
	New/revised functions to include a KIND argument to handle
	KIND=1 and KIND=2 cases. (system_clock4, system_clock8): Kept
	for backward compatibility. These call the new versions above.
program test
  implicit none
  real(4) :: real_rate4
  real(8) :: real_rate8
  real(10) :: real_rate10
  real(16) :: real_rate16
  integer(1) :: int_count1, int_max1, int_rate1
  integer(2) :: int_count2, int_max2, int_rate2
  integer(4) :: int_count4, int_max4, int_rate4
  integer(8) :: int_count8, int_max8, int_rate8
  integer(16) :: int_count16, int_max16, int_rate16

  call system_clock (int_count1, real_rate4, int_max1)
  print *, int_count1, real_rate4, int_max1
  call system_clock (int_count1, real_rate4, int_max2)
  print *, int_count1, real_rate4, int_max2
  call system_clock (int_count1, real_rate4, int_max4)
  print *, int_count1, real_rate4, int_max4
  call system_clock (int_count1, real_rate4, int_max8)
  print *, int_count1, real_rate4, int_max8
  call system_clock (int_count1, real_rate4, int_max16)
  print *, int_count1, real_rate4, int_max16
  call system_clock (int_count1, real_rate8, int_max1)
  print *, int_count1, real_rate8, int_max1
  call system_clock (int_count1, real_rate8, int_max2)
  print *, int_count1, real_rate8, int_max2
  call system_clock (int_count1, real_rate8, int_max4)
  print *, int_count1, real_rate8, int_max4
  call system_clock (int_count1, real_rate8, int_max8)
  print *, int_count1, real_rate8, int_max8
  call system_clock (int_count1, real_rate8, int_max16)
  print *, int_count1, real_rate8, int_max16
  call system_clock (int_count1, real_rate10, int_max1)
  print *, int_count1, real_rate10, int_max1
  call system_clock (int_count1, real_rate10, int_max2)
  print *, int_count1, real_rate10, int_max2
  call system_clock (int_count1, real_rate10, int_max4)
  print *, int_count1, real_rate10, int_max4
  call system_clock (int_count1, real_rate10, int_max8)
  print *, int_count1, real_rate10, int_max8
  call system_clock (int_count1, real_rate10, int_max16)
  print *, int_count1, real_rate10, int_max16
  call system_clock (int_count1, real_rate16, int_max1)
  print *, int_count1, real_rate16, int_max1
  call system_clock (int_count1, real_rate16, int_max2)
  print *, int_count1, real_rate16, int_max2
  call system_clock (int_count1, real_rate16, int_max4)
  print *, int_count1, real_rate16, int_max4
  call system_clock (int_count1, real_rate16, int_max8)
  print *, int_count1, real_rate16, int_max8
  call system_clock (int_count1, real_rate16, int_max16)
  print *, int_count1, real_rate16, int_max16
  call system_clock (int_count2, real_rate4, int_max1)
  print *, int_count2, real_rate4, int_max1
  call system_clock (int_count2, real_rate4, int_max2)
  print *, int_count2, real_rate4, int_max2
  call system_clock (int_count2, real_rate4, int_max4)
  print *, int_count2, real_rate4, int_max4
  call system_clock (int_count2, real_rate4, int_max8)
  print *, int_count2, real_rate4, int_max8
  call system_clock (int_count2, real_rate4, int_max16)
  print *, int_count2, real_rate4, int_max16
  call system_clock (int_count2, real_rate8, int_max1)
  print *, int_count2, real_rate8, int_max1
  call system_clock (int_count2, real_rate8, int_max2)
  print *, int_count2, real_rate8, int_max2
  call system_clock (int_count2, real_rate8, int_max4)
  print *, int_count2, real_rate8, int_max4
  call system_clock (int_count2, real_rate8, int_max8)
  print *, int_count2, real_rate8, int_max8
  call system_clock (int_count2, real_rate8, int_max16)
  print *, int_count2, real_rate8, int_max16
  call system_clock (int_count2, real_rate10, int_max1)
  print *, int_count2, real_rate10, int_max1
  call system_clock (int_count2, real_rate10, int_max2)
  print *, int_count2, real_rate10, int_max2
  call system_clock (int_count2, real_rate10, int_max4)
  print *, int_count2, real_rate10, int_max4
  call system_clock (int_count2, real_rate10, int_max8)
  print *, int_count2, real_rate10, int_max8
  call system_clock (int_count2, real_rate10, int_max16)
  print *, int_count2, real_rate10, int_max16
  call system_clock (int_count2, real_rate16, int_max1)
  print *, int_count2, real_rate16, int_max1
  call system_clock (int_count2, real_rate16, int_max2)
  print *, int_count2, real_rate16, int_max2
  call system_clock (int_count2, real_rate16, int_max4)
  print *, int_count2, real_rate16, int_max4
  call system_clock (int_count2, real_rate16, int_max8)
  print *, int_count2, real_rate16, int_max8
  call system_clock (int_count2, real_rate16, int_max16)
  print *, int_count2, real_rate16, int_max16
  call system_clock (int_count4, real_rate4, int_max1)
  print *, int_count4, real_rate4, int_max1
  call system_clock (int_count4, real_rate4, int_max2)
  print *, int_count4, real_rate4, int_max2
  call system_clock (int_count4, real_rate4, int_max4)
  print *, int_count4, real_rate4, int_max4
  call system_clock (int_count4, real_rate4, int_max8)
  print *, int_count4, real_rate4, int_max8
  call system_clock (int_count4, real_rate4, int_max16)
  print *, int_count4, real_rate4, int_max16
  call system_clock (int_count4, real_rate8, int_max1)
  print *, int_count4, real_rate8, int_max1
  call system_clock (int_count4, real_rate8, int_max2)
  print *, int_count4, real_rate8, int_max2
  call system_clock (int_count4, real_rate8, int_max4)
  print *, int_count4, real_rate8, int_max4
  call system_clock (int_count4, real_rate8, int_max8)
  print *, int_count4, real_rate8, int_max8
  call system_clock (int_count4, real_rate8, int_max16)
  print *, int_count4, real_rate8, int_max16
  call system_clock (int_count4, real_rate10, int_max1)
  print *, int_count4, real_rate10, int_max1
  call system_clock (int_count4, real_rate10, int_max2)
  print *, int_count4, real_rate10, int_max2
  call system_clock (int_count4, real_rate10, int_max4)
  print *, int_count4, real_rate10, int_max4
  call system_clock (int_count4, real_rate10, int_max8)
  print *, int_count4, real_rate10, int_max8
  call system_clock (int_count4, real_rate10, int_max16)
  print *, int_count4, real_rate10, int_max16
  call system_clock (int_count4, real_rate16, int_max1)
  print *, int_count4, real_rate16, int_max1
  call system_clock (int_count4, real_rate16, int_max2)
  print *, int_count4, real_rate16, int_max2
  call system_clock (int_count4, real_rate16, int_max4)
  print *, int_count4, real_rate16, int_max4
  call system_clock (int_count4, real_rate16, int_max8)
  print *, int_count4, real_rate16, int_max8
  call system_clock (int_count4, real_rate16, int_max16)
  print *, int_count4, real_rate16, int_max16
  call system_clock (int_count8, real_rate4, int_max1)
  print *, int_count8, real_rate4, int_max1
  call system_clock (int_count8, real_rate4, int_max2)
  print *, int_count8, real_rate4, int_max2
  call system_clock (int_count8, real_rate4, int_max4)
  print *, int_count8, real_rate4, int_max4
  call system_clock (int_count8, real_rate4, int_max8)
  print *, int_count8, real_rate4, int_max8
  call system_clock (int_count8, real_rate4, int_max16)
  print *, int_count8, real_rate4, int_max16
  call system_clock (int_count8, real_rate8, int_max1)
  print *, int_count8, real_rate8, int_max1
  call system_clock (int_count8, real_rate8, int_max2)
  print *, int_count8, real_rate8, int_max2
  call system_clock (int_count8, real_rate8, int_max4)
  print *, int_count8, real_rate8, int_max4
  call system_clock (int_count8, real_rate8, int_max8)
  print *, int_count8, real_rate8, int_max8
  call system_clock (int_count8, real_rate8, int_max16)
  print *, int_count8, real_rate8, int_max16
  call system_clock (int_count8, real_rate10, int_max1)
  print *, int_count8, real_rate10, int_max1
  call system_clock (int_count8, real_rate10, int_max2)
  print *, int_count8, real_rate10, int_max2
  call system_clock (int_count8, real_rate10, int_max4)
  print *, int_count8, real_rate10, int_max4
  call system_clock (int_count8, real_rate10, int_max8)
  print *, int_count8, real_rate10, int_max8
  call system_clock (int_count8, real_rate10, int_max16)
  print *, int_count8, real_rate10, int_max16
  call system_clock (int_count8, real_rate16, int_max1)
  print *, int_count8, real_rate16, int_max1
  call system_clock (int_count8, real_rate16, int_max2)
  print *, int_count8, real_rate16, int_max2
  call system_clock (int_count8, real_rate16, int_max4)
  print *, int_count8, real_rate16, int_max4
  call system_clock (int_count8, real_rate16, int_max8)
  print *, int_count8, real_rate16, int_max8
  call system_clock (int_count8, real_rate16, int_max16)
  print *, int_count8, real_rate16, int_max16
  call system_clock (int_count16, real_rate4, int_max1)
  print *, int_count16, real_rate4, int_max1
  call system_clock (int_count16, real_rate4, int_max2)
  print *, int_count16, real_rate4, int_max2
  call system_clock (int_count16, real_rate4, int_max4)
  print *, int_count16, real_rate4, int_max4
  call system_clock (int_count16, real_rate4, int_max8)
  print *, int_count16, real_rate4, int_max8
  call system_clock (int_count16, real_rate4, int_max16)
  print *, int_count16, real_rate4, int_max16
  call system_clock (int_count16, real_rate8, int_max1)
  print *, int_count16, real_rate8, int_max1
  call system_clock (int_count16, real_rate8, int_max2)
  print *, int_count16, real_rate8, int_max2
  call system_clock (int_count16, real_rate8, int_max4)
  print *, int_count16, real_rate8, int_max4
  call system_clock (int_count16, real_rate8, int_max8)
  print *, int_count16, real_rate8, int_max8
  call system_clock (int_count16, real_rate8, int_max16)
  print *, int_count16, real_rate8, int_max16
  call system_clock (int_count16, real_rate10, int_max1)
  print *, int_count16, real_rate10, int_max1
  call system_clock (int_count16, real_rate10, int_max2)
  print *, int_count16, real_rate10, int_max2
  call system_clock (int_count16, real_rate10, int_max4)
  print *, int_count16, real_rate10, int_max4
  call system_clock (int_count16, real_rate10, int_max8)
  print *, int_count16, real_rate10, int_max8
  call system_clock (int_count16, real_rate10, int_max16)
  print *, int_count16, real_rate10, int_max16
  call system_clock (int_count16, real_rate16, int_max1)
  print *, int_count16, real_rate16, int_max1
  call system_clock (int_count16, real_rate16, int_max2)
  print *, int_count16, real_rate16, int_max2
  call system_clock (int_count16, real_rate16, int_max4)
  print *, int_count16, real_rate16, int_max4
  call system_clock (int_count16, real_rate16, int_max8)
  print *, int_count16, real_rate16, int_max8
  call system_clock (int_count16, real_rate16, int_max16)
  print *, int_count16, real_rate16, int_max16
end program test
program test
  implicit none
  integer, parameter  :: np = 16
  integer(np) :: int_count, int_max, int_rate
  integer(np) :: start, finish
  integer(4) :: i,j,k
  real(8) :: real_rate
  k = 1000000000
  call system_clock (start, count_rate=int_rate)
  do i = 1, k
    call system_clock (int_count)
  end do
  call system_clock (finish, count_rate=int_rate)
  print *, finish-start, int_rate
  real_rate = real(finish-start, 8) / real(int_rate, 8)
  print *, real_rate, " seconds"
  print *, real_rate/real(k,8), " seconds/call"
  print *, int_rate
end program test

Comments

Janne Blomqvist March 8, 2015, 11:07 p.m. UTC | #1
On Sun, Mar 8, 2015 at 6:43 AM, Jerry DeLisle <jvdelisle@charter.net> wrote:
> The attached patch fixes this regression

Nice, thanks for working on this! Just out of curiosity, what was the
reason for the powerpc failures you had with the previous version of
the patch?

> and implements KIND=1 and KIND=2
> compatible calls.

... but I'm not convinced this is worth it. As far as I can see, the
reason you needed to add the *K symbols to the library is solely to
correctly handle the kind=1 and 2 cases, right? However, IMHO kind=1
and 2 integers have too little room for decent precision and for
avoiding wraparound anyway. I can see people using the kind=4 version,
as that is the default that you get if you don't play around with
non-default kinds. I can also see people using the kind=8 version, in
order to get better resolution and avoid wraparound. But explicitly
using kind=1 or 2, why on earth would anyone do that? So the only
reason why we need to support it is because the standard says so.

So I would prefer if we just hardcode the error values in the frontend
(-HUGE, 0, 0), in case somebody tries to use the kind=1,2 versions,
thus also removing the need for the new library functions, keeping the
existing simpler ones instead. AFAICT this would be standards
conforming. Any other opinions on this?

Cheers,
Steve Kargl March 8, 2015, 11:58 p.m. UTC | #2
On Mon, Mar 09, 2015 at 01:07:25AM +0200, Janne Blomqvist wrote:
> So I would prefer if we just hardcode the error values in the frontend
> (-HUGE, 0, 0), in case somebody tries to use the kind=1,2 versions,
> thus also removing the need for the new library functions, keeping the
> existing simpler ones instead. AFAICT this would be standards
> conforming. Any other opinions on this?
> 

+1
Jerry DeLisle March 9, 2015, 12:37 a.m. UTC | #3
On 03/08/2015 04:58 PM, Steve Kargl wrote:
> On Mon, Mar 09, 2015 at 01:07:25AM +0200, Janne Blomqvist wrote:
>> So I would prefer if we just hardcode the error values in the frontend
>> (-HUGE, 0, 0), in case somebody tries to use the kind=1,2 versions,
>> thus also removing the need for the new library functions, keeping the
>> existing simpler ones instead. AFAICT this would be standards
>> conforming. Any other opinions on this?
>>
>
> +1
>

No problem, will do.  It was a learning exercise for me regardless.

Jerry
Jerry DeLisle March 14, 2015, 2:22 p.m. UTC | #4
On 03/08/2015 04:58 PM, Steve Kargl wrote:
> On Mon, Mar 09, 2015 at 01:07:25AM +0200, Janne Blomqvist wrote:
>> So I would prefer if we just hardcode the error values in the frontend
>> (-HUGE, 0, 0), in case somebody tries to use the kind=1,2 versions,
>> thus also removing the need for the new library functions, keeping the
>> existing simpler ones instead. AFAICT this would be standards
>> conforming. Any other opinions on this?
>>

Revised patch attached as requested. Regression tested on x86_64 linux. Typical 
results are shown below. I will provide a test case for the test-suite.

$ ./a.out
KIND=1: -127  0  0
KIND=1: -127  0  0
KIND=1: -127  .00000000  0
  -----------------------------------------------------------
KIND=2: -32767  0  0
KIND=2: -32767  .00000000  0
  -----------------------------------------------------------
KIND=4: 57496123  1000  2147483647
KIND=4: 57496123  1000.00000  2147483647
  -----------------------------------------------------------
KIND=8: 57496123484138  1000000000  9223372036854775807
KIND=8: 57496123522116  1000000000.0000000  9223372036854775807
  -----------------------------------------------------------
KIND=10: 57496123575504  1000000000  9223372036854775807
KIND=10: 57496123612377  1000000000.00000000000  9223372036854775807
  -----------------------------------------------------------
KIND=16: 57496123669210  1000000000  9223372036854775807
KIND=16: 57496123698413  1000000000.00000000000000000000000000  9223372036854775807


OK for trunk?

Regards,

Jerry

2015-03-14  Jerry DeLisle  <jvdelisle@gcc.gnu.org>

	PR fortran/64432
	*trans-intrinisic.c (conv_intrinsic_system_clock): Check the
	smallest kind passed in user arguments and hard-code results for
	KIND=1 or KIND=2 to indicate no clock available.

2015-03-14 Jerry DeLisle  <jvdelisle@gcc.gnu.org>

	PR libgfortran/64432
	* intrinsics/system_clock.c (system_clock4, system_clock8):
	Cleanup some whitespace.
diff mbox

Patch

Index: gcc/fortran/trans-decl.c
===================================================================
--- gcc/fortran/trans-decl.c	(revision 221248)
+++ gcc/fortran/trans-decl.c	(working copy)
@@ -3122,15 +3122,16 @@  gfc_build_intrinsic_function_decls (void)
   DECL_PURE_P (gfor_fndecl_sr_kind) = 1;
   TREE_NOTHROW (gfor_fndecl_sr_kind) = 1;
 
+
   gfor_fndecl_system_clock4 = gfc_build_library_function_decl (
-	get_identifier (PREFIX("system_clock_4")),
-	void_type_node, 3, gfc_pint4_type_node, gfc_pint4_type_node,
-	gfc_pint4_type_node);
+	get_identifier (PREFIX("system_clock_4K")),
+	void_type_node, 4, gfc_pint4_type_node, gfc_pint4_type_node,
+	gfc_pint4_type_node, gfc_pint4_type_node);
 
   gfor_fndecl_system_clock8 = gfc_build_library_function_decl (
-	get_identifier (PREFIX("system_clock_8")),
+	get_identifier (PREFIX("system_clock_8K")),
 	void_type_node, 3, gfc_pint8_type_node, gfc_pint8_type_node,
-	gfc_pint8_type_node);
+	gfc_pint8_type_node, gfc_pint4_type_node);
 
   /* Power functions.  */
   {
Index: gcc/fortran/trans-intrinsic.c
===================================================================
--- gcc/fortran/trans-intrinsic.c	(revision 221248)
+++ gcc/fortran/trans-intrinsic.c	(working copy)
@@ -2670,23 +2670,15 @@  conv_intrinsic_system_clock (gfc_code *code)
 {
   stmtblock_t block;
   gfc_se count_se, count_rate_se, count_max_se;
-  tree arg1 = NULL_TREE, arg2 = NULL_TREE, arg3 = NULL_TREE;
-  tree type, tmp;
-  int kind;
+  tree arg1 = NULL_TREE, arg2 = NULL_TREE, arg3 = NULL_TREE,
+	      arg4 = NULL_TREE;
+  tree tmp;
+  int least, most;
 
   gfc_expr *count = code->ext.actual->expr;
   gfc_expr *count_rate = code->ext.actual->next->expr;
   gfc_expr *count_max = code->ext.actual->next->next->expr;
 
-  /* The INTEGER(8) version has higher precision, it is used if both COUNT
-     and COUNT_MAX can hold 64-bit values, or are absent.  */
-  if ((!count || count->ts.kind >= 8)
-      && (!count_max || count_max->ts.kind >= 8))
-    kind = 8;
-  else
-    kind = gfc_default_integer_kind;
-  type = gfc_get_int_type (kind);
-
   /* Evaluate our arguments.  */
   if (count)
     {
@@ -2706,37 +2698,84 @@  conv_intrinsic_system_clock (gfc_code *code)
       gfc_conv_expr (&count_max_se, count_max);
     }
 
+  /* Find the smallest kind found of the arguments. We will pass this to
+     the runtime library.  */
+  least = 16;
+  least = (count && count->ts.kind < least) ? count->ts.kind : least;
+  least = (count_rate && count_rate->ts.kind < least) ? count_rate->ts.kind
+						      : least;
+  least = (count_max && count_max->ts.kind < least) ? count_max->ts.kind
+						    : least;
+  arg4 = build_int_cst (gfc_get_int_type (4), least);
+
+					       
+  /* Find the largest kind.  This is used to decide which runtime call
+     to build.  */
+  most = 1;
+  most = (count && count->ts.kind > most) ? count->ts.kind : most;
+  most = (count_rate && count_rate->ts.kind > most) ? count_rate->ts.kind
+						    : most;
+  most = (count_max && count_max->ts.kind > most) ? count_max->ts.kind
+						  : most;
+
   /* Prepare temporary variables if we need them.  */
-  if (count && count->ts.kind != kind)
-    arg1 = gfc_create_var (type, "count");
-  else if (count)
-    arg1 = count_se.expr;
 
-  if (count_rate && (count_rate->ts.kind != kind
-		     || count_rate->ts.type != BT_INTEGER))
-    arg2 = gfc_create_var (type, "count_rate");
-  else if (count_rate)
-    arg2 = count_rate_se.expr;
+  if (count)
+    {
+      if (most >= 8)
+	arg1 = gfc_create_var (gfc_get_int_type (8), "count");
+      else
+	arg1 = gfc_create_var (gfc_get_int_type (4), "count");
+    }
 
-  if (count_max && count_max->ts.kind != kind)
-    arg3 = gfc_create_var (type, "count_max");
-  else if (count_max)
-    arg3 = count_max_se.expr;
+  if (count_rate)
+    {
+      if (most >= 8)
+	arg2 = gfc_create_var (gfc_get_int_type (8), "count_rate");
+      else
+	arg2 = gfc_create_var (gfc_get_int_type (4), "count_rate");
+    }
 
+  if (count_max)
+    {
+      if (most >= 8)
+	arg3 = gfc_create_var (gfc_get_int_type (8), "count_max");
+      else
+	arg3 = gfc_create_var (gfc_get_int_type (4), "count_max");
+    }
+
   /* Make the function call.  */
   gfc_init_block (&block);
-  tmp = build_call_expr_loc (input_location,
-			     kind == 4 ? gfor_fndecl_system_clock4
-				       : gfor_fndecl_system_clock8,
-                             3,
-			     arg1 ? gfc_build_addr_expr (NULL_TREE, arg1)
-				  : null_pointer_node,
-			     arg2 ? gfc_build_addr_expr (NULL_TREE, arg2)
-				  : null_pointer_node,
-			     arg3 ? gfc_build_addr_expr (NULL_TREE, arg3)
-				  : null_pointer_node);
-  gfc_add_expr_to_block (&block, tmp);
 
+  if (most <= 4)
+    {
+      tmp = build_call_expr_loc (input_location,
+	      gfor_fndecl_system_clock4, 4,
+	      arg1 ? gfc_build_addr_expr (NULL_TREE, arg1)
+		     : null_pointer_node,
+	      arg2 ? gfc_build_addr_expr (NULL_TREE, arg2)
+		     : null_pointer_node,
+	      arg3 ? gfc_build_addr_expr (NULL_TREE, arg3)
+		     : null_pointer_node,
+	      arg4 ? gfc_build_addr_expr (NULL_TREE, arg4)
+		     : null_pointer_node);
+      gfc_add_expr_to_block (&block, tmp);
+    }
+  /* Handle kind>=8, 10, o4 16 arguments */
+  if (most >= 8)
+    {
+      tmp = build_call_expr_loc (input_location,
+	      gfor_fndecl_system_clock8, 4,
+	      arg1 ? gfc_build_addr_expr (NULL_TREE, arg1)
+		     : null_pointer_node,
+	      arg2 ? gfc_build_addr_expr (NULL_TREE, arg2)
+		     : null_pointer_node,
+	      arg3 ? gfc_build_addr_expr (NULL_TREE, arg3)
+		     : null_pointer_node,
+	      arg4 ? gfc_build_addr_expr (NULL_TREE, arg4)
+		     : null_pointer_node);
+      gfc_add_expr_to_block (&block, tmp);
+    }
   /* And store values back if needed.  */
   if (arg1 && arg1 != count_se.expr)
     gfc_add_modify (&block, count_se.expr,
Index: libgfortran/gfortran.map
===================================================================
--- libgfortran/gfortran.map	(revision 221248)
+++ libgfortran/gfortran.map	(working copy)
@@ -1274,8 +1274,14 @@  GFORTRAN_1.6 {
     __ieee_exceptions_MOD_ieee_support_flag_noarg;
     __ieee_exceptions_MOD_ieee_support_halting;
     __ieee_exceptions_MOD_ieee_usual;
-} GFORTRAN_1.5; 
+} GFORTRAN_1.5;
 
+GFORTRAN_1.7 {
+  global:
+    _gfortran_system_clock_4K;
+    _gfortran_system_clock_8K;
+} GFORTRAN_1.6; 
+
 F2C_1.0 {
   global:
     _gfortran_f2c_specific__abs_c4;
Index: libgfortran/intrinsics/system_clock.c
===================================================================
--- libgfortran/intrinsics/system_clock.c	(revision 221248)
+++ libgfortran/intrinsics/system_clock.c	(working copy)
@@ -109,37 +109,77 @@  gf_gettime_mono (time_t * secs, long * fracsecs, l
 
 #endif /* !__MINGW32 && !__CYGWIN__  */
 
-extern void system_clock_4 (GFC_INTEGER_4 *, GFC_INTEGER_4 *, GFC_INTEGER_4 *);
+extern void
+system_clock_4 (GFC_INTEGER_4 *count, GFC_INTEGER_4 *count_rate,
+		GFC_INTEGER_4 *count_max);
 export_proto(system_clock_4);
 
-extern void system_clock_8 (GFC_INTEGER_8 *, GFC_INTEGER_8 *, GFC_INTEGER_8 *);
+extern void
+system_clock_8 (GFC_INTEGER_8 *count, GFC_INTEGER_8 *count_rate,
+		GFC_INTEGER_8 *count_max);
 export_proto(system_clock_8);
 
+extern void
+system_clock_4K (GFC_INTEGER_4 *, GFC_INTEGER_4 *,
+		 GFC_INTEGER_4 *, GFC_INTEGER_4 * );
+export_proto(system_clock_4K);
 
-/* prefix(system_clock_4) is the INTEGER(4) version of the SYSTEM_CLOCK
+extern void
+system_clock_8K (GFC_INTEGER_8 *, GFC_INTEGER_8 *,
+		 GFC_INTEGER_8 *, GFC_INTEGER_4 *);
+export_proto(system_clock_8K);
+
+
+
+/* prefix(system_clock_4K) is the INTEGER(4) version of the SYSTEM_CLOCK
    intrinsic subroutine.  It returns the number of clock ticks for the current
    system time, the number of ticks per second, and the maximum possible value
    for COUNT.  */
 
 void
-system_clock_4(GFC_INTEGER_4 *count, GFC_INTEGER_4 *count_rate,
-	       GFC_INTEGER_4 *count_max)
+system_clock_4K (GFC_INTEGER_4 *count, GFC_INTEGER_4 *count_rate,
+	       GFC_INTEGER_4 *count_max, GFC_INTEGER_4 *kind)
 {
-#if defined(__MINGW32__) || defined(__CYGWIN__) 
-  if (count)
+#if defined(__MINGW32__) || defined(__CYGWIN__)
+  /* Use GetTickCount here as the resolution and range is
+     sufficient for the INTEGER(kind=4) version, and
+     QueryPerformanceCounter has potential issues.  */
+  if (likely(*kind == 4))
     {
-      /* Use GetTickCount here as the resolution and range is
-	 sufficient for the INTEGER(kind=4) version, and
-	 QueryPerformanceCounter has potential issues.  */
-      uint32_t cnt = GetTickCount ();
-      if (cnt > GFC_INTEGER_4_HUGE)
-	cnt = cnt - GFC_INTEGER_4_HUGE - 1;
-      *count = cnt;
+      if (count)
+	{
+	  uint32_t cnt = GetTickCount ();
+	  cnt = cnt > GFC_INTEGER_4_HUGE ? cnt
+		      : cnt - GFC_INTEGER_4_HUGE - 1;
+	  *count = cnt;
+	}
+      if (count_rate)
+	*count_rate = 1000;
+      if (count_max)
+	*count_max = GFC_INTEGER_4_HUGE;
     }
-  if (count_rate)
-    *count_rate = 1000;
-  if (count_max)
-    *count_max = GFC_INTEGER_4_HUGE;
+  else
+    {
+      if (count)
+	{
+	  uint32_t cnt = GetTickCount ();
+	  if (*kind == 1)
+	    {
+	      cnt /= 1000;
+	      cnt = cnt > GFC_INTEGER_1_HUGE ? cnt
+			  : cnt - GFC_INTEGER_1_HUGE - 1;
+	    }
+	  else
+	    cnt = cnt > GFC_INTEGER_2_HUGE ? cnt
+			  : cnt - GFC_INTEGER_2_HUGE - 1;
+	  *count = cnt;
+	}
+      if (count_rate)
+	* count_rate = *kind == 1 ? 1 : 1000
+      if (count_max)
+	*count_max =  *kind == 1 ? GFC_INTEGER_1_HUGE;
+				 : GFC_INTEGER_2_HUGE;
+    }
 #else
   time_t secs;
   long fracsecs, tck;
@@ -146,18 +186,54 @@  void
 
   if (gf_gettime_mono (&secs, &fracsecs, &tck) == 0)
     {
-      long tck_out = tck > 1000 ? 1000 : tck;
-      long tck_r = tck / tck_out;
-      GFC_UINTEGER_4 ucnt = (GFC_UINTEGER_4) secs * tck_out;
-      ucnt += fracsecs / tck_r;
-      if (ucnt > GFC_INTEGER_4_HUGE)
-	ucnt = ucnt - GFC_INTEGER_4_HUGE - 1;
-      if (count)
-	*count = ucnt;
-      if (count_rate)
-	*count_rate = tck_out;
-      if (count_max)
-	*count_max = GFC_INTEGER_4_HUGE;
+      if (likely(*kind == 4))
+	{
+	  long tck_out = tck > 1000 ? 1000 : tck;
+	  long tck_r = tck / tck_out;
+	  GFC_UINTEGER_4 ucnt = (GFC_UINTEGER_4) secs * tck_out;
+	  ucnt += fracsecs / tck_r;
+	  if (ucnt > GFC_INTEGER_4_HUGE)
+	    ucnt = ucnt - GFC_INTEGER_4_HUGE - 1;
+	  if (count)
+	    *count = ucnt;
+	  if (count_rate)
+	    *count_rate = tck_out;
+	  if (count_max)
+	    *count_max = GFC_INTEGER_4_HUGE;
+	}
+      else
+	{
+	  long tck_out = tck > 1000 ? 1000 : tck;
+	  long tck_r = tck / tck_out;
+	  GFC_UINTEGER_2 ucnt = (GFC_UINTEGER_2) secs * tck_out;
+	  ucnt += fracsecs / tck_r;
+	  if (count)
+	    {
+	      if (*kind == 1)
+		{
+		  ucnt = tck_out < 1000 ? 0 : ucnt / 512;
+		  if (ucnt > GFC_INTEGER_1_HUGE)
+		    ucnt = ucnt - GFC_INTEGER_1_HUGE - 1;
+		}
+	      else
+		{
+		  if (ucnt > GFC_INTEGER_2_HUGE)
+		    ucnt = ucnt - GFC_INTEGER_2_HUGE - 1;
+		}
+	      *count = ucnt;
+	    }
+	  if (count_rate)
+	    {
+	      if (*kind == 1)
+		*count_rate = tck_out < 1000 ? 0 : 1;
+	      else
+		*count_rate = tck_out;
+	    }
+	  if (count_max)
+	    *count_max = (*kind == 1) ? GFC_INTEGER_1_HUGE
+				      : GFC_INTEGER_2_HUGE;
+	    
+	}
     }
   else
     {
@@ -175,8 +251,8 @@  void
 /* INTEGER(8) version of the above routine.  */
 
 void
-system_clock_8 (GFC_INTEGER_8 *count, GFC_INTEGER_8 *count_rate,
-		GFC_INTEGER_8 *count_max)
+system_clock_8K (GFC_INTEGER_8 *count, GFC_INTEGER_8 *count_rate,
+		 GFC_INTEGER_8 *count_max, GFC_INTEGER_4 *kind)
 {
 #if defined(__MINGW32__) || defined(__CYGWIN__) 
   LARGE_INTEGER cnt;
@@ -186,40 +262,134 @@  void
     fail = true;
   if (count_rate && !QueryPerformanceFrequency (&freq))
     fail = true;
-  if (fail)
+  if (likely(kind >= 8))
     {
+      if (fail)
+	{
+	  if (count)
+	    *count = - GFC_INTEGER_8_HUGE;
+	  if (count_rate)
+	    *count_rate = 0;
+	  if (count_max)
+	    *count_max = 0;
+	}
+      else
+	{
+	  if (count)
+	    *count = cnt.QuadPart;
+	  if (count_rate)
+	    *count_rate = freq.QuadPart;
+	  if (count_max)
+	    *count_max = GFC_INTEGER_8_HUGE;
+	}
+    }
+  else if (*kind == 4)
+    {
       if (count)
-	*count = - GFC_INTEGER_8_HUGE;
+	{
+	  /* Use GetTickCount here as the resolution and range is
+	     sufficient for the INTEGER(kind=4) version, and
+	     QueryPerformanceCounter has potential issues.  */
+	  uint32_t cnt = GetTickCount ();
+	  if (cnt > GFC_INTEGER_4_HUGE)
+	    cnt = cnt - GFC_INTEGER_4_HUGE - 1;
+	  *count = cnt;
+	}
       if (count_rate)
-	*count_rate = 0;
+	*count_rate = 1000;
       if (count_max)
-	*count_max = 0;
+	*count_max = GFC_INTEGER_4_HUGE;
     }
   else
     {
       if (count)
-	*count = cnt.QuadPart;
+	{
+	  /* Use GetTickCount here as the resolution and range is
+	     sufficient for the INTEGER(kind=4) version, and
+	     QueryPerformanceCounter has potential issues.  */
+	  uint32_t cnt = GetTickCount ();
+	  if (*kind == 1)
+	    {
+	      cnt /= 1000;
+	      cnt = cnt > GFC_INTEGER_1_HUGE ? cnt
+			  : cnt - GFC_INTEGER_1_HUGE - 1;
+	    }
+	  else
+	      cnt = cnt > GFC_INTEGER_2_HUGE ? cnt
+			  : cnt - GFC_INTEGER_2_HUGE - 1;
+	  *count = cnt;
+	}
       if (count_rate)
-	*count_rate = freq.QuadPart;
+	* count_rate = *kind == 1 ? 1 : 1000
       if (count_max)
-	*count_max = GFC_INTEGER_8_HUGE;
+	*count_max =  *kind == 1 ? GFC_INTEGER_1_HUGE;
+				 : GFC_INTEGER_2_HUGE;
     }
 #else
   time_t secs;
   long fracsecs, tck;
-
   if (gf_gettime_mono (&secs, &fracsecs, &tck) == 0)
     {
-      GFC_UINTEGER_8 ucnt = (GFC_UINTEGER_8) secs * tck;
-      ucnt += fracsecs;
-      if (ucnt > GFC_INTEGER_8_HUGE)
-	ucnt = ucnt - GFC_INTEGER_8_HUGE - 1;
-      if (count)
-	*count = ucnt;
-      if (count_rate)
-	*count_rate = tck;
-      if (count_max)
-	*count_max = GFC_INTEGER_8_HUGE;
+      if (likely(*kind >= 8))
+	{
+	  GFC_UINTEGER_8 ucnt = (GFC_UINTEGER_8) secs * tck;
+	  ucnt += fracsecs;
+	  if (ucnt > GFC_INTEGER_8_HUGE)
+	    ucnt = ucnt - GFC_INTEGER_8_HUGE - 1;
+	  if (count)
+	    *count = ucnt;
+	  if (count_rate)
+	    *count_rate = tck;
+	  if (count_max)
+	    *count_max = GFC_INTEGER_8_HUGE;
+	}
+      else if (*kind == 4)
+	{
+	  long tck_out = tck > 1000 ? 1000 : tck;
+	  long tck_r = tck / tck_out;
+	  GFC_UINTEGER_4 ucnt = (GFC_UINTEGER_4) secs * tck_out;
+	  ucnt += fracsecs / tck_r;
+	  if (ucnt > GFC_INTEGER_4_HUGE)
+	    ucnt = ucnt - GFC_INTEGER_4_HUGE - 1;
+	  if (count)
+	    *count = ucnt;
+	  if (count_rate)
+	    *count_rate = tck_out;
+	  if (count_max)
+	    *count_max = GFC_INTEGER_4_HUGE;
+	}
+      else
+	{
+	  long tck_out = tck > 1000 ? 1000 : tck;
+	  long tck_r = tck / tck_out;
+	  GFC_UINTEGER_2 ucnt = (GFC_UINTEGER_2) secs * tck_out;
+	  ucnt += fracsecs / tck_r;
+	  if (count)
+	    {
+	      if (*kind == 1)
+		{
+		  ucnt = tck_out < 1000 ? 0 : ucnt / 512;
+		  if (ucnt > GFC_INTEGER_1_HUGE)
+		    ucnt = ucnt - GFC_INTEGER_1_HUGE - 1;
+		}
+	      else
+		{
+		  if (ucnt > GFC_INTEGER_2_HUGE)
+		    ucnt = ucnt - GFC_INTEGER_2_HUGE - 1;
+		}
+	      *count = ucnt;
+	    }
+	  if (count_rate)
+	    {
+	      if (*kind == 1)
+		*count_rate = tck_out < 1000 ? 0 : 1;
+	      else
+		*count_rate = tck_out;
+	    }
+	  if (count_max)
+	    *count_max = *kind == 1 ? GFC_INTEGER_1_HUGE
+				    : GFC_INTEGER_2_HUGE;
+	}
     }
   else
     {
@@ -232,3 +402,21 @@  void
     }
 #endif
 }
+
+/* For backward compatibility with previous library ABI.  */
+void
+system_clock_4 (GFC_INTEGER_4 *count, GFC_INTEGER_4 *count_rate,
+		GFC_INTEGER_4 *count_max)
+{
+  GFC_INTEGER_4 kind = 4;
+  return system_clock_4K (count, count_rate, count_max, &kind);
+}
+
+void
+system_clock_8 (GFC_INTEGER_8 *count, GFC_INTEGER_8 *count_rate,
+		GFC_INTEGER_8 *count_max)
+{
+  GFC_INTEGER_4 kind = 8;
+  return system_clock_8K (count, count_rate, count_max, &kind);
+}
+