diff mbox series

[3/3] realtime/matrix_mult: Fix test optimization

Message ID 20211115081526.384856-4-lkml@jv-coder.de
State Changes Requested
Headers show
Series [1/3] realtime/librttest: Fix functions with empty parameters | expand

Commit Message

Joerg Vehlow Nov. 15, 2021, 8:15 a.m. UTC
From: Joerg Vehlow <joerg.vehlow@aox-tech.de>

The actual load of the test was optimized away,
because there was no way to reach the memory
used for calculation from the outside of the function.

To fix this, the memory used for the matrices is now
allocated in the main thread and passed to the workload
function as a pointer. This should prevent optimization and
also allows for the matrices to be bigger, without overflowing
the stack.

Signed-off-by: Joerg Vehlow <joerg.vehlow@aox-tech.de>
---
 .../realtime/func/matrix_mult/matrix_mult.c   | 115 ++++++++----------
 1 file changed, 52 insertions(+), 63 deletions(-)

Comments

Cyril Hrubis Nov. 15, 2021, 3:22 p.m. UTC | #1
Hi!
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (c) International Business Machines  Corp., 2007, 2008
> + * 
> + * Authors: Darren Hart <dvhltc@us.ibm.com>
> + *          Dinakar Guniguntala <dino@in.ibm.com>
> + */
> +/*\
> + * [Description]
>   *
> - *   Copyright ?? International Business Machines  Corp., 2007, 2008
> - *
> - *   This program is free software;  you can redistribute it and/or modify
> - *   it under the terms of the GNU General Public License as published by
> - *   the Free Software Foundation; either version 2 of the License, or
> - *   (at your option) any later version.
> - *
> - *   This program is distributed in the hope that it will be useful,
> - *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
> - *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
> - *   the GNU General Public License for more details.
> - *
> - *   You should have received a copy of the GNU General Public License
> - *   along with this program;  if not, write to the Free Software
> - *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> - *
> - * NAME
> - *      matrix_mult.c
> - *
> - * DESCRIPTION
> - *      Compare running sequential matrix multiplication routines
> - *      to running them in parallel to judge mutliprocessor
> - *      performance
> - *
> - * USAGE:
> - *      Use run_auto.sh script in current directory to build and run test.
> - *
> - * AUTHOR
> - *      Darren Hart <dvhltc@us.ibm.com>
> - *
> - * HISTORY
> - *      2007-Mar-09:  Initial version by Darren Hart <dvhltc@us.ibm.com>
> - *      2008-Feb-26:  Closely emulate jvm Dinakar Guniguntala <dino@in.ibm.com>
> - *
> - *****************************************************************************/
> + * Compare running sequential matrix multiplication routines
> + * to running them in parallel to judge multiprocessor
> + * performance
> + */

Ideally the comment changes should be in a separate patch...

>  #include <stdio.h>
>  #include <stdlib.h>
> @@ -69,9 +46,14 @@ static int iterations_percpu;
>  stats_container_t sdat, cdat, *curdat;
>  stats_container_t shist, chist;
>  static pthread_barrier_t mult_start;
> -static pthread_mutex_t mutex_cpu;
>  
> -void usage(void)
> +struct matrices {
> +	double A[MATRIX_SIZE][MATRIX_SIZE];
> +	double B[MATRIX_SIZE][MATRIX_SIZE];
> +	double C[MATRIX_SIZE][MATRIX_SIZE];
> +};
> +
> +static void usage(void)
>  {
>  	rt_help();
>  	printf("matrix_mult specific options:\n");
> @@ -80,7 +62,7 @@ void usage(void)
>  	printf("  -i#	   #: number of iterations\n");
>  }
>  
> -int parse_args(int c, char *v)
> +static int parse_args(int c, char *v)
>  {
>  	int handled = 1;
>  	switch (c) {
> @@ -100,7 +82,7 @@ int parse_args(int c, char *v)
>  	return handled;
>  }
>  
> -void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
> +static void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
>  		 double B[MATRIX_SIZE][MATRIX_SIZE])

These static function conversions should ideally be in a separate patch
as well. At least the patch description does not mention either of these
changes.

>  {
>  	int i, j;
> @@ -112,41 +94,39 @@ void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
>  	}
>  }
>  
> -void matrix_mult(int m_size)
> +static void matrix_mult(struct matrices *matrices)
>  {
> -	double A[m_size][m_size];
> -	double B[m_size][m_size];
> -	double C[m_size][m_size];
>  	int i, j, k;
>  
> -	matrix_init(A, B);
> -	for (i = 0; i < m_size; i++) {
> -		int i_m = m_size - i;
> -		for (j = 0; j < m_size; j++) {
> -			double sum = A[i_m][j] * B[j][i];
> -			for (k = 0; k < m_size; k++)
> -				sum += A[i_m][k] * B[k][j];
> -			C[i][j] = sum;
> +	matrix_init(matrices->A, matrices->B);
> +	for (i = 0; i < MATRIX_SIZE; i++) {
> +		int i_m = MATRIX_SIZE - i;
> +		for (j = 0; j < MATRIX_SIZE; j++) {
> +			double sum = matrices->A[i_m][j] *  matrices->B[j][i];
> +			for (k = 0; k < MATRIX_SIZE; k++)
> +				sum +=  matrices->A[i_m][k] *  matrices->B[k][j];
> +			 matrices->C[i][j] = sum;
>  		}
>  	}
>  }
>  
> -void matrix_mult_record(int m_size, int index)
> +static void matrix_mult_record(struct matrices *matrices, int index)
>  {
>  	nsec_t start, end, delta;
>  	int i;
>  
>  	start = rt_gettime();
>  	for (i = 0; i < ops; i++)
> -		matrix_mult(MATRIX_SIZE);
> +		matrix_mult(matrices);
>  	end = rt_gettime();
>  	delta = (long)((end - start) / NS_PER_US);
>  	curdat->records[index].x = index;
>  	curdat->records[index].y = delta;
>  }
>  
> -int set_affinity(void)
> +static int set_affinity(void)
>  {
> +	static pthread_mutex_t mutex_cpu = PTHREAD_MUTEX_INITIALIZER;

As well as this change, it looks okay, but it's not listed in the patch
description.

>  	cpu_set_t mask;
>  	int cpuid;
>  
> @@ -166,9 +146,10 @@ int set_affinity(void)
>  	return -1;
>  }
>  
> -void *concurrent_thread(void *thread)
> +static void *concurrent_thread(void *thread)
>  {
>  	struct thread *t = (struct thread *)thread;
> +	struct matrices *matrices = (struct matrices *) t->arg;
>  	int thread_id = (intptr_t) t->id;
>  	int cpuid;
>  	int i;
> @@ -183,18 +164,23 @@ void *concurrent_thread(void *thread)
>  	index = iterations_percpu * thread_id;	/* To avoid stats overlapping */
>  	pthread_barrier_wait(&mult_start);
>  	for (i = 0; i < iterations_percpu; i++)
> -		matrix_mult_record(MATRIX_SIZE, index++);
> +		matrix_mult_record(matrices, index++);
>  
>  	return NULL;
>  }
>  
> -int main_thread(void)
> +static int main_thread(void)
>  {
>  	int ret, i, j;
>  	nsec_t start, end;
>  	long smin = 0, smax = 0, cmin = 0, cmax = 0, delta = 0;
>  	float savg, cavg;
>  	int cpuid;
> +	struct matrices *matrices[numcpus];
> +
> +	for (i = 0; i < numcpus; ++i) {
> +		matrices[i] = malloc(sizeof(struct matrices));
> +	}
>  
>  	if (stats_container_init(&sdat, iterations) ||
>  	    stats_container_init(&shist, HIST_BUCKETS) ||
> @@ -205,12 +191,11 @@ int main_thread(void)
>  		exit(1);
>  	}
>  
> -	tids = malloc(sizeof(int) * numcpus);
> +	tids = calloc(numcpus, sizeof(int));
>  	if (!tids) {
>  		perror("malloc");
>  		exit(1);
>  	}
> -	memset(tids, 0, numcpus);
>  
>  	cpuid = set_affinity();
>  	if (cpuid == -1) {
> @@ -223,8 +208,9 @@ int main_thread(void)
>  	curdat->index = iterations - 1;
>  	printf("\nRunning sequential operations\n");
>  	start = rt_gettime();
> -	for (i = 0; i < iterations; i++)
> -		matrix_mult_record(MATRIX_SIZE, i);
> +	for (i = 0; i < iterations; i++) {
> +		matrix_mult_record(matrices[0], i);
> +	}

And here LMKL coding style prefers to avoid the curly braces, but that
is very minor.

>  	end = rt_gettime();
>  	delta = (long)((end - start) / NS_PER_US);
>  
> @@ -256,7 +242,7 @@ int main_thread(void)
>  	online_cpu_id = -1;	/* Redispatch cpus */
>  	/* Create numcpus-1 concurrent threads */
>  	for (j = 0; j < numcpus; j++) {
> -		tids[j] = create_fifo_thread(concurrent_thread, NULL, PRIO);
> +		tids[j] = create_fifo_thread(concurrent_thread, matrices[j], PRIO);
>  		if (tids[j] == -1) {
>  			printf
>  			    ("Thread creation failed (max threads exceeded?)\n");
> @@ -308,6 +294,9 @@ int main_thread(void)
>  	     criteria);
>  	printf("Result: %s\n", ret ? "FAIL" : "PASS");
>  
> +	for (i = 0; i < numcpus; i++)
> +		free(matrices[i]);
> +
>  	return ret;
>  }
>  
> -- 
> 2.25.1
> 
> 
> -- 
> Mailing list info: https://lists.linux.it/listinfo/ltp
Joerg Vehlow Nov. 15, 2021, 3:28 p.m. UTC | #2
Hi Cyril,
> Ideally the comment changes should be in a separate patch...
>
>
>   
> These static function conversions should ideally be in a separate patch
> as well. At least the patch description does not mention either of these
> changes.
I just did some cleanup here. Would it be ok, to do all cleanup in one 
commit and the fix in a second one?

>>   
>> -int set_affinity(void)
>> +static int set_affinity(void)
>>   {
>> +	static pthread_mutex_t mutex_cpu = PTHREAD_MUTEX_INITIALIZER;
> As well as this change, it looks okay, but it's not listed in the patch
> description.
Right, that is also just cleanup. The mutex is only used inside of this 
function, and I figured
using the static initializer is  better, than 0-initialization.
>> +	for (i = 0; i < iterations; i++) {
>> +		matrix_mult_record(matrices[0], i);
>> +	}
> And here LMKL coding style prefers to avoid the curly braces, but that
> is very minor.
Yes sorry, I had this without curly braces, than added some debug code 
and forgot to remove them again.
This btw. is the reason, why I don't like this rule (but I will not try 
to argue about it here)


Joerg
Cyril Hrubis Nov. 15, 2021, 3:40 p.m. UTC | #3
Hi!
> > These static function conversions should ideally be in a separate patch
> > as well. At least the patch description does not mention either of these
> > changes.
> I just did some cleanup here. Would it be ok, to do all cleanup in one 
> commit and the fix in a second one?

Yes, that would be perfect.
diff mbox series

Patch

diff --git a/testcases/realtime/func/matrix_mult/matrix_mult.c b/testcases/realtime/func/matrix_mult/matrix_mult.c
index e702c0ff9..6d4da89bd 100644
--- a/testcases/realtime/func/matrix_mult/matrix_mult.c
+++ b/testcases/realtime/func/matrix_mult/matrix_mult.c
@@ -1,40 +1,17 @@ 
-/******************************************************************************
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) International Business Machines  Corp., 2007, 2008
+ * 
+ * Authors: Darren Hart <dvhltc@us.ibm.com>
+ *          Dinakar Guniguntala <dino@in.ibm.com>
+ */
+/*\
+ * [Description]
  *
- *   Copyright © International Business Machines  Corp., 2007, 2008
- *
- *   This program is free software;  you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program;  if not, write to the Free Software
- *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * NAME
- *      matrix_mult.c
- *
- * DESCRIPTION
- *      Compare running sequential matrix multiplication routines
- *      to running them in parallel to judge mutliprocessor
- *      performance
- *
- * USAGE:
- *      Use run_auto.sh script in current directory to build and run test.
- *
- * AUTHOR
- *      Darren Hart <dvhltc@us.ibm.com>
- *
- * HISTORY
- *      2007-Mar-09:  Initial version by Darren Hart <dvhltc@us.ibm.com>
- *      2008-Feb-26:  Closely emulate jvm Dinakar Guniguntala <dino@in.ibm.com>
- *
- *****************************************************************************/
+ * Compare running sequential matrix multiplication routines
+ * to running them in parallel to judge multiprocessor
+ * performance
+ */
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -69,9 +46,14 @@  static int iterations_percpu;
 stats_container_t sdat, cdat, *curdat;
 stats_container_t shist, chist;
 static pthread_barrier_t mult_start;
-static pthread_mutex_t mutex_cpu;
 
-void usage(void)
+struct matrices {
+	double A[MATRIX_SIZE][MATRIX_SIZE];
+	double B[MATRIX_SIZE][MATRIX_SIZE];
+	double C[MATRIX_SIZE][MATRIX_SIZE];
+};
+
+static void usage(void)
 {
 	rt_help();
 	printf("matrix_mult specific options:\n");
@@ -80,7 +62,7 @@  void usage(void)
 	printf("  -i#	   #: number of iterations\n");
 }
 
-int parse_args(int c, char *v)
+static int parse_args(int c, char *v)
 {
 	int handled = 1;
 	switch (c) {
@@ -100,7 +82,7 @@  int parse_args(int c, char *v)
 	return handled;
 }
 
-void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
+static void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
 		 double B[MATRIX_SIZE][MATRIX_SIZE])
 {
 	int i, j;
@@ -112,41 +94,39 @@  void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
 	}
 }
 
-void matrix_mult(int m_size)
+static void matrix_mult(struct matrices *matrices)
 {
-	double A[m_size][m_size];
-	double B[m_size][m_size];
-	double C[m_size][m_size];
 	int i, j, k;
 
-	matrix_init(A, B);
-	for (i = 0; i < m_size; i++) {
-		int i_m = m_size - i;
-		for (j = 0; j < m_size; j++) {
-			double sum = A[i_m][j] * B[j][i];
-			for (k = 0; k < m_size; k++)
-				sum += A[i_m][k] * B[k][j];
-			C[i][j] = sum;
+	matrix_init(matrices->A, matrices->B);
+	for (i = 0; i < MATRIX_SIZE; i++) {
+		int i_m = MATRIX_SIZE - i;
+		for (j = 0; j < MATRIX_SIZE; j++) {
+			double sum = matrices->A[i_m][j] *  matrices->B[j][i];
+			for (k = 0; k < MATRIX_SIZE; k++)
+				sum +=  matrices->A[i_m][k] *  matrices->B[k][j];
+			 matrices->C[i][j] = sum;
 		}
 	}
 }
 
-void matrix_mult_record(int m_size, int index)
+static void matrix_mult_record(struct matrices *matrices, int index)
 {
 	nsec_t start, end, delta;
 	int i;
 
 	start = rt_gettime();
 	for (i = 0; i < ops; i++)
-		matrix_mult(MATRIX_SIZE);
+		matrix_mult(matrices);
 	end = rt_gettime();
 	delta = (long)((end - start) / NS_PER_US);
 	curdat->records[index].x = index;
 	curdat->records[index].y = delta;
 }
 
-int set_affinity(void)
+static int set_affinity(void)
 {
+	static pthread_mutex_t mutex_cpu = PTHREAD_MUTEX_INITIALIZER;
 	cpu_set_t mask;
 	int cpuid;
 
@@ -166,9 +146,10 @@  int set_affinity(void)
 	return -1;
 }
 
-void *concurrent_thread(void *thread)
+static void *concurrent_thread(void *thread)
 {
 	struct thread *t = (struct thread *)thread;
+	struct matrices *matrices = (struct matrices *) t->arg;
 	int thread_id = (intptr_t) t->id;
 	int cpuid;
 	int i;
@@ -183,18 +164,23 @@  void *concurrent_thread(void *thread)
 	index = iterations_percpu * thread_id;	/* To avoid stats overlapping */
 	pthread_barrier_wait(&mult_start);
 	for (i = 0; i < iterations_percpu; i++)
-		matrix_mult_record(MATRIX_SIZE, index++);
+		matrix_mult_record(matrices, index++);
 
 	return NULL;
 }
 
-int main_thread(void)
+static int main_thread(void)
 {
 	int ret, i, j;
 	nsec_t start, end;
 	long smin = 0, smax = 0, cmin = 0, cmax = 0, delta = 0;
 	float savg, cavg;
 	int cpuid;
+	struct matrices *matrices[numcpus];
+
+	for (i = 0; i < numcpus; ++i) {
+		matrices[i] = malloc(sizeof(struct matrices));
+	}
 
 	if (stats_container_init(&sdat, iterations) ||
 	    stats_container_init(&shist, HIST_BUCKETS) ||
@@ -205,12 +191,11 @@  int main_thread(void)
 		exit(1);
 	}
 
-	tids = malloc(sizeof(int) * numcpus);
+	tids = calloc(numcpus, sizeof(int));
 	if (!tids) {
 		perror("malloc");
 		exit(1);
 	}
-	memset(tids, 0, numcpus);
 
 	cpuid = set_affinity();
 	if (cpuid == -1) {
@@ -223,8 +208,9 @@  int main_thread(void)
 	curdat->index = iterations - 1;
 	printf("\nRunning sequential operations\n");
 	start = rt_gettime();
-	for (i = 0; i < iterations; i++)
-		matrix_mult_record(MATRIX_SIZE, i);
+	for (i = 0; i < iterations; i++) {
+		matrix_mult_record(matrices[0], i);
+	}
 	end = rt_gettime();
 	delta = (long)((end - start) / NS_PER_US);
 
@@ -256,7 +242,7 @@  int main_thread(void)
 	online_cpu_id = -1;	/* Redispatch cpus */
 	/* Create numcpus-1 concurrent threads */
 	for (j = 0; j < numcpus; j++) {
-		tids[j] = create_fifo_thread(concurrent_thread, NULL, PRIO);
+		tids[j] = create_fifo_thread(concurrent_thread, matrices[j], PRIO);
 		if (tids[j] == -1) {
 			printf
 			    ("Thread creation failed (max threads exceeded?)\n");
@@ -308,6 +294,9 @@  int main_thread(void)
 	     criteria);
 	printf("Result: %s\n", ret ? "FAIL" : "PASS");
 
+	for (i = 0; i < numcpus; i++)
+		free(matrices[i]);
+
 	return ret;
 }