diff mbox

[kvm-unit-tests,PATCHv2] arm: Add PMU test

Message ID 1443800908-12159-1-git-send-email-cov@codeaurora.org
State New
Headers show

Commit Message

Christopher Covington Oct. 2, 2015, 3:48 p.m. UTC
Add test the ARM Performance Monitors Unit (PMU). The informational
fields from the control register are printed, but not checked, and
the number of cycles it takes to run a known-instruction-count loop
is printed, but not checked. Once QEMU is fixed, we can at least
begin to check for IPC == 1 when using -icount.

Signed-off-by: Christopher Covington <cov@codeaurora.org>
---
 arm/pmu.c               | 89 +++++++++++++++++++++++++++++++++++++++++++++++++
 arm/unittests.cfg       | 11 ++++++
 config/config-arm64.mak |  4 ++-
 3 files changed, 103 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

Comments

Wei Huang Oct. 5, 2015, 9:37 p.m. UTC | #1
On 10/02/2015 10:48 AM, Christopher Covington wrote:
> Add test the ARM Performance Monitors Unit (PMU). The informational
> fields from the control register are printed, but not checked, and
> the number of cycles it takes to run a known-instruction-count loop
> is printed, but not checked. Once QEMU is fixed, we can at least
> begin to check for IPC == 1 when using -icount.

Thanks for submitting it. I think this is a good starting point to add
PMU unit testing support for ARM. Some comments below.

> 
> Signed-off-by: Christopher Covington <cov@codeaurora.org>
> ---
>  arm/pmu.c               | 89 +++++++++++++++++++++++++++++++++++++++++++++++++
>  arm/unittests.cfg       | 11 ++++++
>  config/config-arm64.mak |  4 ++-
>  3 files changed, 103 insertions(+), 1 deletion(-)
>  create mode 100644 arm/pmu.c
> 
> diff --git a/arm/pmu.c b/arm/pmu.c
> new file mode 100644
> index 0000000..f724c2c
> --- /dev/null
> +++ b/arm/pmu.c
> @@ -0,0 +1,89 @@
> +/*
> + * Test the ARM Performance Monitors Unit (PMU).
> + *
> + * Copyright 2015 The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU Lesser General Public License version 2.1 and
> + * only version 2.1 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
> + * for more details.
> + */
> +#include "libcflat.h"
> +
> +struct pmu_data {
> +	union {
> +		uint32_t pmcr_el0;
> +		struct {
> +			unsigned int enable:1;
> +			unsigned int event_counter_reset:1;
> +			unsigned int cycle_counter_reset:1;
> +			unsigned int cycle_counter_clock_divider:1;
> +			unsigned int event_counter_export:1;
> +			unsigned int cycle_counter_disable_when_prohibited:1;
> +			unsigned int cycle_counter_long:1;
> +			unsigned int zeros:4;
> +			unsigned int num_counters:5;
> +			unsigned int identification_code:8;
> +			unsigned int implementor:8;
                        ^^^^^^^^
Not saying it is a problem because "unsigned int" is 32-bit on 64bit
machine. But to make it consistent with pmcr_el0, I would prefer
"unsigned int" been replaced by "uint32_t".

> +		};
> +	};
> +};
> +
> +/* Execute a known number of guest instructions. Only odd instruction counts
> + * greater than or equal to 3 are supported. The control register (PMCR) is
> + * initialized with the provided value (allowing for example for the cycle
> + * counter or eventer count to be reset if needed). After the known instruction
> + * count loop, zero is written to the PMCR to disable counting, allowing the
> + * cycle counter or event counters to be read as needed at a later time.
> + */
> +static void measure_instrs(int len, struct pmu_data pmcr)
> +{
> +	int i = (len - 1) / 2;
> +
> +	if (len < 3 || ((len - 1) % 2))
> +		abort();
> +
> +	asm volatile(
> +		"msr pmcr_el0, %[pmcr]\n"
> +		"1: subs %[i], %[i], #1\n"
> +		"b.gt 1b\n"
> +		"msr pmcr_el0, xzr"
> +	: [i] "+r" (i) : [pmcr] "r" (pmcr) : "cc");
> +}
> +
> +int main()
> +{
> +	struct pmu_data pmcr;
> +	const int samples = 10;
> +
> +	asm volatile("mrs %0, pmcr_el0" : "=r" (pmcr));
> +
> +	printf("PMU implementor:     %c\n", pmcr.implementor);
> +	printf("Identification code: 0x%x\n", pmcr.identification_code);
> +	printf("Event counters:      %d\n", pmcr.num_counters);
> +
> +	pmcr.cycle_counter_reset = 1;
> +	pmcr.enable = 1;
> +
> +	printf("\ninstructions : cycles0 cycles1 ...\n");
> +
> +	for (int i = 3; i < 300; i += 32) {
> +		int avg, sum = 0;
> +		printf("%d :", i);
> +		for (int j = 0; j < samples; j++) {
> +			int val;
> +			measure_instrs(i, pmcr);
> +			asm volatile("mrs %0, pmccntr_el0" : "=r" (val));
> +			sum += val;
> +			printf(" %d", val);
> +		}
> +		avg = sum / samples;
> +		printf(" sum=%d avg=%d avg_ipc=%d avg_cpi=%d\n", sum, avg, i / avg, avg / i);
> +	}

I understand that, as stated in commit message, it currently doesn't
check the correctness of PMU counter values. But it would be better if
testing code can be abstracted into an independent function (e.g.
instr_cycle_check) for report("Instruction Cycles",
instr_cycle_check()). You can return TRUE in the checking code for now.


> +
> +	return report_summary();
> +}
> diff --git a/arm/unittests.cfg b/arm/unittests.cfg
> index e068a0c..b3b7ff4 100644
> --- a/arm/unittests.cfg
> +++ b/arm/unittests.cfg
> @@ -35,3 +35,14 @@ file = selftest.flat
>  smp = `getconf _NPROCESSORS_CONF`
>  extra_params = -append 'smp'
>  groups = selftest
> +
> +# Test PMU support without -icount
> +[pmu]
> +file = pmu.flat
> +groups = pmu
> +
> +# Test PMU support with -icount
> +[pmu-icount]
> +file = pmu.flat
> +groups = pmu
> +extra_params = -icount 0
> diff --git a/config/config-arm64.mak b/config/config-arm64.mak
> index d61b703..140b611 100644
> --- a/config/config-arm64.mak
> +++ b/config/config-arm64.mak
> @@ -12,9 +12,11 @@ cflatobjs += lib/arm64/processor.o
>  cflatobjs += lib/arm64/spinlock.o
>  
>  # arm64 specific tests
> -tests =
> +tests = $(TEST_DIR)/pmu.flat
>  
>  include config/config-arm-common.mak
>  
>  arch_clean: arm_clean
>  	$(RM) lib/arm64/.*.d
> +
> +$(TEST_DIR)/pmu.elf: $(cstart.o) $(TEST_DIR)/pmu.o
>
Christopher Covington Oct. 6, 2015, 5:49 p.m. UTC | #2
Changes from v2:

* Explicit test for monotonically increasing cycle count
* Tests now pass or fail
* Tests broken into functions
* Tests/functions broken into separate patches in series
* Style improvements as suggested by Wei Huang and Linux checkpatch.pl
* Spelling and comment improvements
Andrew Jones Oct. 6, 2015, 7:38 p.m. UTC | #3
On Tue, Oct 06, 2015 at 01:49:24PM -0400, Christopher Covington wrote:
> Beginning with a simple sanity check of the control register, add
> a unit test for the ARM Performance Monitors Unit (PMU).
> 
> Signed-off-by: Christopher Covington <cov@codeaurora.org>
> ---
>  arm/pmu.c               | 66 +++++++++++++++++++++++++++++++++++++++++++++++++
>  arm/unittests.cfg       |  5 ++++
>  config/config-arm64.mak |  4 ++-
>  3 files changed, 74 insertions(+), 1 deletion(-)
>  create mode 100644 arm/pmu.c
> 
> diff --git a/arm/pmu.c b/arm/pmu.c
> new file mode 100644
> index 0000000..91a3688
> --- /dev/null
> +++ b/arm/pmu.c
> @@ -0,0 +1,66 @@
> +/*
> + * Test the ARM Performance Monitors Unit (PMU).
> + *
> + * Copyright 2015 The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU Lesser General Public License version 2.1 and
> + * only version 2.1 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
> + * for more details.
> + */
> +#include "libcflat.h"
> +
> +struct pmu_data {
> +	union {
> +		uint32_t pmcr_el0;
> +		struct {
> +			uint32_t enable:1;
> +			uint32_t event_counter_reset:1;
> +			uint32_t cycle_counter_reset:1;
> +			uint32_t cycle_counter_clock_divider:1;
> +			uint32_t event_counter_export:1;
> +			uint32_t cycle_counter_disable_when_prohibited:1;
> +			uint32_t cycle_counter_long:1;
> +			uint32_t zeros:4;

resv might be a better name than zeros. Actually, does the spec even
state these bits will always be read as zeros?

> +			uint32_t num_counters:5;
> +			uint32_t identification_code:8;
> +			uint32_t implementer:8;
> +		};
> +	};
> +};
> +
> +/* As a simple sanity check on the PMCR_EL0, ensure the implementer field isn't
> + * null. Also print out a couple other interesting fields for diagnostic
> + * purposes. For example, as of fall 2015, QEMU TCG mode doesn't implement
> + * event counters and therefore reports zero of them, but hopefully support for
                                                ^for
> + * at least the instructions event will be added in the future and the reported
> + * number of event counters will become nonzero.
> + */
> +static bool check_pmcr(void)
> +{
> +	struct pmu_data pmcr;

Thanks for making the union and bitfield anonymous. I suggested
naming the struct pmu_data because I'm expecting more stuff to
eventually get shoved in there. If not, then maybe it needs to
be renamed something more pmcr-ish. Or, if it might expand its
purpose as I expect, then this variable should be renamed, i.e.

  struct pmu_data pmu;

> +
> +	asm volatile("mrs %0, pmcr_el0" : "=r" (pmcr));

And this should be

  asm volatile("mrs %0, pmcr_el0" : "=r" (pmu.pmcr_el0));

> +
> +	printf("PMU implementer:     %c\n", pmcr.implementer);
> +	printf("Identification code: 0x%x\n", pmcr.identification_code);
> +	printf("Event counters:      %d\n", pmcr.num_counters);
> +
> +	if (pmcr.implementer)
> +		return true;
> +
> +	return false;

How about

  return pmcr.implementer != 0;

> +}
> +
> +int main(void)
> +{
> +	report_prefix_push("pmu");
> +
> +	report("Control register", check_pmcr());
> +
> +	return report_summary();
> +}
> diff --git a/arm/unittests.cfg b/arm/unittests.cfg
> index e068a0c..fd94adb 100644
> --- a/arm/unittests.cfg
> +++ b/arm/unittests.cfg
> @@ -35,3 +35,8 @@ file = selftest.flat
>  smp = `getconf _NPROCESSORS_CONF`
>  extra_params = -append 'smp'
>  groups = selftest
> +
> +# Test PMU support without -icount
> +[pmu]
> +file = pmu.flat
> +groups = pmu
> diff --git a/config/config-arm64.mak b/config/config-arm64.mak
> index d61b703..140b611 100644
> --- a/config/config-arm64.mak
> +++ b/config/config-arm64.mak
> @@ -12,9 +12,11 @@ cflatobjs += lib/arm64/processor.o
>  cflatobjs += lib/arm64/spinlock.o
>  
>  # arm64 specific tests
> -tests =
> +tests = $(TEST_DIR)/pmu.flat
>  
>  include config/config-arm-common.mak
>  
>  arch_clean: arm_clean
>  	$(RM) lib/arm64/.*.d
> +
> +$(TEST_DIR)/pmu.elf: $(cstart.o) $(TEST_DIR)/pmu.o

We can have a PMU on 32-bit ARM processors too. If this test file
needs to be specific to 64-bit, then we should probably name it
less generically, like pmu64?

Thanks,
drew
Andrew Jones Oct. 6, 2015, 7:49 p.m. UTC | #4
On Tue, Oct 06, 2015 at 01:49:25PM -0400, Christopher Covington wrote:
> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
> even for the smallest delta of two subsequent reads.
> 
> Signed-off-by: Christopher Covington <cov@codeaurora.org>
> ---
>  arm/pmu.c | 29 +++++++++++++++++++++++++++++
>  1 file changed, 29 insertions(+)
> 
> diff --git a/arm/pmu.c b/arm/pmu.c
> index 91a3688..589e605 100644
> --- a/arm/pmu.c
> +++ b/arm/pmu.c
> @@ -33,6 +33,8 @@ struct pmu_data {
>  	};
>  };
>  
> +static const int samples = 10;

#define NR_SAMPLES 10

> +
>  /* As a simple sanity check on the PMCR_EL0, ensure the implementer field isn't
>   * null. Also print out a couple other interesting fields for diagnostic
>   * purposes. For example, as of fall 2015, QEMU TCG mode doesn't implement
> @@ -56,11 +58,38 @@ static bool check_pmcr(void)
>  	return false;
>  }
>  
> +/* Ensure that the cycle counter progresses between back-to-back reads.
> + */

style nit: your block quotes don't have opening wing (the preferred
kernel style - and, fwiw, my preference too...)

> +static bool check_cycles_increase(void)
> +{
> +	struct pmu_data pmcr;
> +
> +	pmcr.enable = 1;
> +	asm volatile("msr pmcr_el0, %0" : : "r" (pmcr));
> +
> +	for (int i = 0; i < samples; i++) {
> +		int a, b;
> +
> +		asm volatile(
> +			"mrs %[a], pmccntr_el0\n"
> +			"mrs %[b], pmccntr_el0\n"
> +		: [a] "=r" (a), [b] "=r" (b));
> +
> +		if (a >= b) {
> +			printf("Read %d then %d.\n", a, b);
> +			return false;
> +		}
> +	}
> +
> +	return true;
> +}
> +
>  int main(void)
>  {
>  	report_prefix_push("pmu");
>  
>  	report("Control register", check_pmcr());
> +	report("Monotonically increasing cycle count", check_cycles_increase());
>  
>  	return report_summary();
>  }
> -- 
> Qualcomm Innovation Center, Inc.
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
> 
>
Christopher Covington Oct. 12, 2015, 3:07 p.m. UTC | #5
Changes from v3 in response to Drew's suggestions:

* Improved pmu_data / PMCR fields and usage
* Straightened out awkward conditionals
* Added 32-bit support
* Styling enhancements
* Deferred -icount testing to later patch
Andrew Jones Oct. 18, 2015, 6:29 p.m. UTC | #6
On Mon, Oct 12, 2015 at 11:07:47AM -0400, Christopher Covington wrote:
> Changes from v3 in response to Drew's suggestions:
> 
> * Improved pmu_data / PMCR fields and usage
> * Straightened out awkward conditionals
> * Added 32-bit support
> * Styling enhancements
> * Deferred -icount testing to later patch
> 
>

Sorry I was slow to review this version. Also, just FYI, I'll be on
vacation for a week, so I'll probably be slow to review the next
version too :-) Anyway, thanks for the patches, and thanks for your
patience.

drew
Christopher Covington Oct. 26, 2015, 3:38 p.m. UTC | #7
Changes from v4:
* Add Drew's Reviewed-by to first patch.
* Explain use of 32-bit cycle count values in AArch32.
* Zero-initialize pmu_data struct before use in check_cycles_increase and check_cpi. While the insistence on not using memset is entirely my own vanity, I blame the funny syntax on https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119 (adds extra set of braces) and checkpatch (adds spaces).
* Improve formatting of inline assembly and better explain why so much code must be inline assembly.
Christopher Covington Oct. 28, 2015, 7:12 p.m. UTC | #8
Changes from v5:

2/3 arm: pmu: Check cycle count increases
* Use universal initializer {0} despite spurious compiler warnings.
* Add Drew's Reviewed-by.

3/3 arm: pmu: Add CPI checking
* Use numeric constant 0 directly with no intermediate variable.
* More tabs in inline assembly.
* Make A32/A64 inline assembly justification comments uniform.
* Check argc properly.

Thanks,
Christopher Covington
diff mbox

Patch

diff --git a/arm/pmu.c b/arm/pmu.c
new file mode 100644
index 0000000..f724c2c
--- /dev/null
+++ b/arm/pmu.c
@@ -0,0 +1,89 @@ 
+/*
+ * Test the ARM Performance Monitors Unit (PMU).
+ *
+ * Copyright 2015 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version 2.1 and
+ * only version 2.1 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ */
+#include "libcflat.h"
+
+struct pmu_data {
+	union {
+		uint32_t pmcr_el0;
+		struct {
+			unsigned int enable:1;
+			unsigned int event_counter_reset:1;
+			unsigned int cycle_counter_reset:1;
+			unsigned int cycle_counter_clock_divider:1;
+			unsigned int event_counter_export:1;
+			unsigned int cycle_counter_disable_when_prohibited:1;
+			unsigned int cycle_counter_long:1;
+			unsigned int zeros:4;
+			unsigned int num_counters:5;
+			unsigned int identification_code:8;
+			unsigned int implementor:8;
+		};
+	};
+};
+
+/* Execute a known number of guest instructions. Only odd instruction counts
+ * greater than or equal to 3 are supported. The control register (PMCR) is
+ * initialized with the provided value (allowing for example for the cycle
+ * counter or eventer count to be reset if needed). After the known instruction
+ * count loop, zero is written to the PMCR to disable counting, allowing the
+ * cycle counter or event counters to be read as needed at a later time.
+ */
+static void measure_instrs(int len, struct pmu_data pmcr)
+{
+	int i = (len - 1) / 2;
+
+	if (len < 3 || ((len - 1) % 2))
+		abort();
+
+	asm volatile(
+		"msr pmcr_el0, %[pmcr]\n"
+		"1: subs %[i], %[i], #1\n"
+		"b.gt 1b\n"
+		"msr pmcr_el0, xzr"
+	: [i] "+r" (i) : [pmcr] "r" (pmcr) : "cc");
+}
+
+int main()
+{
+	struct pmu_data pmcr;
+	const int samples = 10;
+
+	asm volatile("mrs %0, pmcr_el0" : "=r" (pmcr));
+
+	printf("PMU implementor:     %c\n", pmcr.implementor);
+	printf("Identification code: 0x%x\n", pmcr.identification_code);
+	printf("Event counters:      %d\n", pmcr.num_counters);
+
+	pmcr.cycle_counter_reset = 1;
+	pmcr.enable = 1;
+
+	printf("\ninstructions : cycles0 cycles1 ...\n");
+
+	for (int i = 3; i < 300; i += 32) {
+		int avg, sum = 0;
+		printf("%d :", i);
+		for (int j = 0; j < samples; j++) {
+			int val;
+			measure_instrs(i, pmcr);
+			asm volatile("mrs %0, pmccntr_el0" : "=r" (val));
+			sum += val;
+			printf(" %d", val);
+		}
+		avg = sum / samples;
+		printf(" sum=%d avg=%d avg_ipc=%d avg_cpi=%d\n", sum, avg, i / avg, avg / i);
+	}
+
+	return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index e068a0c..b3b7ff4 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -35,3 +35,14 @@  file = selftest.flat
 smp = `getconf _NPROCESSORS_CONF`
 extra_params = -append 'smp'
 groups = selftest
+
+# Test PMU support without -icount
+[pmu]
+file = pmu.flat
+groups = pmu
+
+# Test PMU support with -icount
+[pmu-icount]
+file = pmu.flat
+groups = pmu
+extra_params = -icount 0
diff --git a/config/config-arm64.mak b/config/config-arm64.mak
index d61b703..140b611 100644
--- a/config/config-arm64.mak
+++ b/config/config-arm64.mak
@@ -12,9 +12,11 @@  cflatobjs += lib/arm64/processor.o
 cflatobjs += lib/arm64/spinlock.o
 
 # arm64 specific tests
-tests =
+tests = $(TEST_DIR)/pmu.flat
 
 include config/config-arm-common.mak
 
 arch_clean: arm_clean
 	$(RM) lib/arm64/.*.d
+
+$(TEST_DIR)/pmu.elf: $(cstart.o) $(TEST_DIR)/pmu.o