Message ID | 1432200867.812895.725561201452.11.gpush@pablo |
---|---|
State | Accepted |
Headers | show |
On 21/05/15 10:34, Jeremy Kerr wrote: > On a (single-socket) openpower machine, the cpufreq tests take nearly an > hour: > > $ time sudo ./src/fwts cpufreq > Running 1 tests, results appended to results.log > Test: CPU frequency scaling tests. > CPU frequency performance tests. 1 passed > > real 58m43.334s > user 58m40.118s > sys 0m0.366s > > This is because we have 44 possible frequencies: > > $ wc -w scaling_available_frequencies > 44 scaling_available_frequencies > > on 80 cpus: > > $ getconf _NPROCESSORS_ONLN > 80 > > That's a total of 3520 individual benchmarks, at one second per test. > > However, those 80 cpus that linux reports are acually threads: 8 threads > per core, on a 10 core system. Because each of these threads is on the > same core, they share the same cpufreq control: > > $ ls -ld cpu{0..7}/cpufreq > drwxr-xr-x 3 root root 0 May 21 03:06 cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu1/cpufreq -> ../cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu2/cpufreq -> ../cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu3/cpufreq -> ../cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu4/cpufreq -> ../cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu5/cpufreq -> ../cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu6/cpufreq -> ../cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu7/cpufreq -> ../cpu0/cpufreq > > So, rather than running the exact same test 8 times over on each core, > this change only runs the cpufreq test on the 'master' threads. > > Signed-off-by: Jeremy Kerr <jk@ozlabs.org> > > --- > src/cpu/cpufreq/cpufreq.c | 43 +++++++++++++++++++++++++++----------- > 1 file changed, 31 insertions(+), 12 deletions(-) > > diff --git a/src/cpu/cpufreq/cpufreq.c b/src/cpu/cpufreq/cpufreq.c > index e409375..c6d6a9d 100644 > --- a/src/cpu/cpufreq/cpufreq.c > +++ b/src/cpu/cpufreq/cpufreq.c > @@ -29,6 +29,7 @@ > #include <stdbool.h> > #include <unistd.h> > #include <sys/types.h> > +#include <sys/stat.h> > #include <limits.h> > #include <dirent.h> > #include <stdint.h> > @@ -52,6 +53,7 @@ struct cpu { > int idx; > char sysfs_path[PATH_MAX]; > bool online; > + bool master; > > int n_freqs; > fwts_cpu_freq freqs[MAX_FREQS]; > @@ -79,8 +81,10 @@ static inline void cpu_mkpath( > const struct cpu *cpu, > const char *const name) > { > - snprintf(path, len, "%s/%s/cpufreq/%s", FWTS_CPU_PATH, > - cpu->sysfs_path, name); > + snprintf(path, len, "%s/%s/cpufreq%s%s", FWTS_CPU_PATH, > + cpu->sysfs_path, > + name ? "/" : "", > + name ?: ""); > } > > static int cpu_set_governor(fwts_framework *fw, struct cpu *cpu, > @@ -348,15 +352,16 @@ static int test_one_cpu_performance(fwts_framework *fw, struct cpu *cpu, > > static int cpufreq_test_cpu_performance(fwts_framework *fw) > { > - int n_online_cpus, i, c, rc; > + int n_master_cpus, i, c, rc; > bool ok = true; > > - n_online_cpus = 0; > + n_master_cpus = 0; > > > for (i = 0; cpufreq_settable && i < num_cpus; i++) { > - if (cpus[i].online) > - n_online_cpus++; > + if (!(cpus[i].online && cpus[i].master)) > + continue; > + n_master_cpus++; > rc = cpu_set_lowest_frequency(fw, &cpus[i]); > if (rc != FWTS_OK) > cpufreq_settable = false; > @@ -371,10 +376,10 @@ static int cpufreq_test_cpu_performance(fwts_framework *fw) > > /* then do the benchmark */ > for (i = 0, c = 0; i < num_cpus; i++) { > - if (!cpus[i].online) > + if (!(cpus[i].online && cpus[i].master)) > continue; > > - rc = test_one_cpu_performance(fw, &cpus[i], c++, n_online_cpus); > + rc = test_one_cpu_performance(fw, &cpus[i], c++, n_master_cpus); > if (rc != FWTS_OK) > ok = false; > > @@ -738,15 +743,29 @@ static int cpu_freq_compare(const void *v1, const void *v2) > return f1->Hz - f2->Hz; > } > > -static int parse_cpu_info(struct cpu *cpu, struct dirent *dir) > +static int parse_cpu_info(fwts_framework *fw, > + struct cpu *cpu, struct dirent *dir) > { > char *end, path[PATH_MAX+1], *str, *tmp, *tok; > - int i; > + struct stat statbuf; > + int i, rc; > > strcpy(cpu->sysfs_path, dir->d_name); > cpu->idx = strtoul(cpu->sysfs_path + strlen("cpu"), &end, 10); > cpu->online = true; > > + /* check if this is the master of a group of CPUs; we only > + * need to do perf checks on those that are the master */ > + cpu_mkpath(path, sizeof(path), cpu, NULL); > + rc = lstat(path, &statbuf); > + if (rc) { > + fwts_log_warning(fw, "Can't stat cpufreq info!"); > + return FWTS_ERROR; > + } > + > + /* non-master CPUs will have a link, not a dir */ > + cpu->master = S_ISDIR(statbuf.st_mode); > + > cpu_mkpath(path, sizeof(path), cpu, "scaling_governor"); > cpu->orig_governor = fwts_get(path); > > @@ -785,7 +804,7 @@ static int is_cpu_dir(const struct dirent *dir) > isdigit(dir->d_name[3]); > } > > -static int cpufreq_init(fwts_framework *fw __attribute__((unused))) > +static int cpufreq_init(fwts_framework *fw) > { > struct dirent **dirs; > int i, rc; > @@ -794,7 +813,7 @@ static int cpufreq_init(fwts_framework *fw __attribute__((unused))) > cpus = calloc(num_cpus, sizeof(*cpus)); > > for (i = 0; i < num_cpus; i++) > - parse_cpu_info(&cpus[i], dirs[i]); > + parse_cpu_info(fw, &cpus[i], dirs[i]); > > /* all test require a userspace governor */ > for (i = 0; i < num_cpus; i++) { > I like this optimisation step. Great idea! Acked-by: Colin Ian King <colin.king@canonical.com>
On 05/21/2015 05:34 PM, Jeremy Kerr wrote: > On a (single-socket) openpower machine, the cpufreq tests take nearly an > hour: > > $ time sudo ./src/fwts cpufreq > Running 1 tests, results appended to results.log > Test: CPU frequency scaling tests. > CPU frequency performance tests. 1 passed > > real 58m43.334s > user 58m40.118s > sys 0m0.366s > > This is because we have 44 possible frequencies: > > $ wc -w scaling_available_frequencies > 44 scaling_available_frequencies > > on 80 cpus: > > $ getconf _NPROCESSORS_ONLN > 80 > > That's a total of 3520 individual benchmarks, at one second per test. > > However, those 80 cpus that linux reports are acually threads: 8 threads > per core, on a 10 core system. Because each of these threads is on the > same core, they share the same cpufreq control: > > $ ls -ld cpu{0..7}/cpufreq > drwxr-xr-x 3 root root 0 May 21 03:06 cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu1/cpufreq -> ../cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu2/cpufreq -> ../cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu3/cpufreq -> ../cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu4/cpufreq -> ../cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu5/cpufreq -> ../cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu6/cpufreq -> ../cpu0/cpufreq > lrwxrwxrwx 1 root root 0 May 21 01:29 cpu7/cpufreq -> ../cpu0/cpufreq > > So, rather than running the exact same test 8 times over on each core, > this change only runs the cpufreq test on the 'master' threads. > > Signed-off-by: Jeremy Kerr <jk@ozlabs.org> > > --- > src/cpu/cpufreq/cpufreq.c | 43 +++++++++++++++++++++++++++----------- > 1 file changed, 31 insertions(+), 12 deletions(-) > > diff --git a/src/cpu/cpufreq/cpufreq.c b/src/cpu/cpufreq/cpufreq.c > index e409375..c6d6a9d 100644 > --- a/src/cpu/cpufreq/cpufreq.c > +++ b/src/cpu/cpufreq/cpufreq.c > @@ -29,6 +29,7 @@ > #include <stdbool.h> > #include <unistd.h> > #include <sys/types.h> > +#include <sys/stat.h> > #include <limits.h> > #include <dirent.h> > #include <stdint.h> > @@ -52,6 +53,7 @@ struct cpu { > int idx; > char sysfs_path[PATH_MAX]; > bool online; > + bool master; > > int n_freqs; > fwts_cpu_freq freqs[MAX_FREQS]; > @@ -79,8 +81,10 @@ static inline void cpu_mkpath( > const struct cpu *cpu, > const char *const name) > { > - snprintf(path, len, "%s/%s/cpufreq/%s", FWTS_CPU_PATH, > - cpu->sysfs_path, name); > + snprintf(path, len, "%s/%s/cpufreq%s%s", FWTS_CPU_PATH, > + cpu->sysfs_path, > + name ? "/" : "", > + name ?: ""); > } > > static int cpu_set_governor(fwts_framework *fw, struct cpu *cpu, > @@ -348,15 +352,16 @@ static int test_one_cpu_performance(fwts_framework *fw, struct cpu *cpu, > > static int cpufreq_test_cpu_performance(fwts_framework *fw) > { > - int n_online_cpus, i, c, rc; > + int n_master_cpus, i, c, rc; > bool ok = true; > > - n_online_cpus = 0; > + n_master_cpus = 0; > > > for (i = 0; cpufreq_settable && i < num_cpus; i++) { > - if (cpus[i].online) > - n_online_cpus++; > + if (!(cpus[i].online && cpus[i].master)) > + continue; > + n_master_cpus++; > rc = cpu_set_lowest_frequency(fw, &cpus[i]); > if (rc != FWTS_OK) > cpufreq_settable = false; > @@ -371,10 +376,10 @@ static int cpufreq_test_cpu_performance(fwts_framework *fw) > > /* then do the benchmark */ > for (i = 0, c = 0; i < num_cpus; i++) { > - if (!cpus[i].online) > + if (!(cpus[i].online && cpus[i].master)) > continue; > > - rc = test_one_cpu_performance(fw, &cpus[i], c++, n_online_cpus); > + rc = test_one_cpu_performance(fw, &cpus[i], c++, n_master_cpus); > if (rc != FWTS_OK) > ok = false; > > @@ -738,15 +743,29 @@ static int cpu_freq_compare(const void *v1, const void *v2) > return f1->Hz - f2->Hz; > } > > -static int parse_cpu_info(struct cpu *cpu, struct dirent *dir) > +static int parse_cpu_info(fwts_framework *fw, > + struct cpu *cpu, struct dirent *dir) > { > char *end, path[PATH_MAX+1], *str, *tmp, *tok; > - int i; > + struct stat statbuf; > + int i, rc; > > strcpy(cpu->sysfs_path, dir->d_name); > cpu->idx = strtoul(cpu->sysfs_path + strlen("cpu"), &end, 10); > cpu->online = true; > > + /* check if this is the master of a group of CPUs; we only > + * need to do perf checks on those that are the master */ > + cpu_mkpath(path, sizeof(path), cpu, NULL); > + rc = lstat(path, &statbuf); > + if (rc) { > + fwts_log_warning(fw, "Can't stat cpufreq info!"); > + return FWTS_ERROR; > + } > + > + /* non-master CPUs will have a link, not a dir */ > + cpu->master = S_ISDIR(statbuf.st_mode); > + > cpu_mkpath(path, sizeof(path), cpu, "scaling_governor"); > cpu->orig_governor = fwts_get(path); > > @@ -785,7 +804,7 @@ static int is_cpu_dir(const struct dirent *dir) > isdigit(dir->d_name[3]); > } > > -static int cpufreq_init(fwts_framework *fw __attribute__((unused))) > +static int cpufreq_init(fwts_framework *fw) > { > struct dirent **dirs; > int i, rc; > @@ -794,7 +813,7 @@ static int cpufreq_init(fwts_framework *fw __attribute__((unused))) > cpus = calloc(num_cpus, sizeof(*cpus)); > > for (i = 0; i < num_cpus; i++) > - parse_cpu_info(&cpus[i], dirs[i]); > + parse_cpu_info(fw, &cpus[i], dirs[i]); > > /* all test require a userspace governor */ > for (i = 0; i < num_cpus; i++) { > Acked-by: Alex Hung <alex.hung@canonical.com>
diff --git a/src/cpu/cpufreq/cpufreq.c b/src/cpu/cpufreq/cpufreq.c index e409375..c6d6a9d 100644 --- a/src/cpu/cpufreq/cpufreq.c +++ b/src/cpu/cpufreq/cpufreq.c @@ -29,6 +29,7 @@ #include <stdbool.h> #include <unistd.h> #include <sys/types.h> +#include <sys/stat.h> #include <limits.h> #include <dirent.h> #include <stdint.h> @@ -52,6 +53,7 @@ struct cpu { int idx; char sysfs_path[PATH_MAX]; bool online; + bool master; int n_freqs; fwts_cpu_freq freqs[MAX_FREQS]; @@ -79,8 +81,10 @@ static inline void cpu_mkpath( const struct cpu *cpu, const char *const name) { - snprintf(path, len, "%s/%s/cpufreq/%s", FWTS_CPU_PATH, - cpu->sysfs_path, name); + snprintf(path, len, "%s/%s/cpufreq%s%s", FWTS_CPU_PATH, + cpu->sysfs_path, + name ? "/" : "", + name ?: ""); } static int cpu_set_governor(fwts_framework *fw, struct cpu *cpu, @@ -348,15 +352,16 @@ static int test_one_cpu_performance(fwts_framework *fw, struct cpu *cpu, static int cpufreq_test_cpu_performance(fwts_framework *fw) { - int n_online_cpus, i, c, rc; + int n_master_cpus, i, c, rc; bool ok = true; - n_online_cpus = 0; + n_master_cpus = 0; for (i = 0; cpufreq_settable && i < num_cpus; i++) { - if (cpus[i].online) - n_online_cpus++; + if (!(cpus[i].online && cpus[i].master)) + continue; + n_master_cpus++; rc = cpu_set_lowest_frequency(fw, &cpus[i]); if (rc != FWTS_OK) cpufreq_settable = false; @@ -371,10 +376,10 @@ static int cpufreq_test_cpu_performance(fwts_framework *fw) /* then do the benchmark */ for (i = 0, c = 0; i < num_cpus; i++) { - if (!cpus[i].online) + if (!(cpus[i].online && cpus[i].master)) continue; - rc = test_one_cpu_performance(fw, &cpus[i], c++, n_online_cpus); + rc = test_one_cpu_performance(fw, &cpus[i], c++, n_master_cpus); if (rc != FWTS_OK) ok = false; @@ -738,15 +743,29 @@ static int cpu_freq_compare(const void *v1, const void *v2) return f1->Hz - f2->Hz; } -static int parse_cpu_info(struct cpu *cpu, struct dirent *dir) +static int parse_cpu_info(fwts_framework *fw, + struct cpu *cpu, struct dirent *dir) { char *end, path[PATH_MAX+1], *str, *tmp, *tok; - int i; + struct stat statbuf; + int i, rc; strcpy(cpu->sysfs_path, dir->d_name); cpu->idx = strtoul(cpu->sysfs_path + strlen("cpu"), &end, 10); cpu->online = true; + /* check if this is the master of a group of CPUs; we only + * need to do perf checks on those that are the master */ + cpu_mkpath(path, sizeof(path), cpu, NULL); + rc = lstat(path, &statbuf); + if (rc) { + fwts_log_warning(fw, "Can't stat cpufreq info!"); + return FWTS_ERROR; + } + + /* non-master CPUs will have a link, not a dir */ + cpu->master = S_ISDIR(statbuf.st_mode); + cpu_mkpath(path, sizeof(path), cpu, "scaling_governor"); cpu->orig_governor = fwts_get(path); @@ -785,7 +804,7 @@ static int is_cpu_dir(const struct dirent *dir) isdigit(dir->d_name[3]); } -static int cpufreq_init(fwts_framework *fw __attribute__((unused))) +static int cpufreq_init(fwts_framework *fw) { struct dirent **dirs; int i, rc; @@ -794,7 +813,7 @@ static int cpufreq_init(fwts_framework *fw __attribute__((unused))) cpus = calloc(num_cpus, sizeof(*cpus)); for (i = 0; i < num_cpus; i++) - parse_cpu_info(&cpus[i], dirs[i]); + parse_cpu_info(fw, &cpus[i], dirs[i]); /* all test require a userspace governor */ for (i = 0; i < num_cpus; i++) {
On a (single-socket) openpower machine, the cpufreq tests take nearly an hour: $ time sudo ./src/fwts cpufreq Running 1 tests, results appended to results.log Test: CPU frequency scaling tests. CPU frequency performance tests. 1 passed real 58m43.334s user 58m40.118s sys 0m0.366s This is because we have 44 possible frequencies: $ wc -w scaling_available_frequencies 44 scaling_available_frequencies on 80 cpus: $ getconf _NPROCESSORS_ONLN 80 That's a total of 3520 individual benchmarks, at one second per test. However, those 80 cpus that linux reports are acually threads: 8 threads per core, on a 10 core system. Because each of these threads is on the same core, they share the same cpufreq control: $ ls -ld cpu{0..7}/cpufreq drwxr-xr-x 3 root root 0 May 21 03:06 cpu0/cpufreq lrwxrwxrwx 1 root root 0 May 21 01:29 cpu1/cpufreq -> ../cpu0/cpufreq lrwxrwxrwx 1 root root 0 May 21 01:29 cpu2/cpufreq -> ../cpu0/cpufreq lrwxrwxrwx 1 root root 0 May 21 01:29 cpu3/cpufreq -> ../cpu0/cpufreq lrwxrwxrwx 1 root root 0 May 21 01:29 cpu4/cpufreq -> ../cpu0/cpufreq lrwxrwxrwx 1 root root 0 May 21 01:29 cpu5/cpufreq -> ../cpu0/cpufreq lrwxrwxrwx 1 root root 0 May 21 01:29 cpu6/cpufreq -> ../cpu0/cpufreq lrwxrwxrwx 1 root root 0 May 21 01:29 cpu7/cpufreq -> ../cpu0/cpufreq So, rather than running the exact same test 8 times over on each core, this change only runs the cpufreq test on the 'master' threads. Signed-off-by: Jeremy Kerr <jk@ozlabs.org> --- src/cpu/cpufreq/cpufreq.c | 43 +++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 12 deletions(-)