diff mbox series

[5/5] KVM: PPC: Book3S HV: Provide more detailed timings for P9 entry path

Message ID 20220525130554.2614394-6-farosas@linux.ibm.com
State New
Headers show
Series KVM: PPC: Book3S HV: Update debug timing code | expand

Commit Message

Fabiano Rosas May 25, 2022, 1:05 p.m. UTC
Alter the data collection points for the debug timing code in the P9
path to be more in line with what the code does. The points where we
accumulate time are now the following:

vcpu_entry: From vcpu_run_hv entry until the start of the inner loop;

guest_entry: From the start of the inner loop until the guest entry in
             asm;

in_guest: From the guest entry in asm until the return to KVM C code;

guest_exit: From the return into KVM C code until the corresponding
            hypercall/page fault handling or re-entry into the guest;

hypercall: Time spent handling hcalls in the kernel (hcalls can go to
	   QEMU, not accounted here);

page_fault: Time spent handling page faults;

vcpu_exit: vcpu_run_hv exit (almost no code here currently).

Like before, these are exposed in debugfs in a file called
"timings". There are four values:

- number of occurrences of the accumulation point;
- total time the vcpu spent in the phase in ns;
- shortest time the vcpu spent in the phase in ns;
- longest time the vcpu spent in the phase in ns;

===
Before:

  rm_entry: 53132 16793518 256 4060
  rm_intr: 53132 2125914 22 340
  rm_exit: 53132 24108344 374 2180
  guest: 53132 40980507996 404 9997650
  cede: 0 0 0 0

After:

  vcpu_entry: 34637 7716108 178 4416
  guest_entry: 52414 49365608 324 747542
  in_guest: 52411 40828715840 258 9997480
  guest_exit: 52410 19681717182 826 102496674
  vcpu_exit: 34636 1744462 38 182
  hypercall: 45712 22878288 38 1307962
  page_fault: 992 111104034 568 168688

  With just one instruction (hcall):

  vcpu_entry: 1 942 942 942
  guest_entry: 1 4044 4044 4044
  in_guest: 1 1540 1540 1540
  guest_exit: 1 3542 3542 3542
  vcpu_exit: 1 80 80 80
  hypercall: 0 0 0 0
  page_fault: 0 0 0 0
===

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
---
 arch/powerpc/include/asm/kvm_host.h   | 12 +++++++-----
 arch/powerpc/kvm/Kconfig              |  9 +++++----
 arch/powerpc/kvm/book3s_hv.c          | 23 ++++++++++++++++++-----
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 14 ++++----------
 4 files changed, 34 insertions(+), 24 deletions(-)
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 37f03665bfa2..de2b226aa350 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -827,11 +827,13 @@  struct kvm_vcpu_arch {
 	struct kvmhv_tb_accumulator *cur_activity;	/* What we're timing */
 	u64	cur_tb_start;			/* when it started */
 #ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING
-	struct kvmhv_tb_accumulator rm_entry;	/* real-mode entry code */
-	struct kvmhv_tb_accumulator rm_intr;	/* real-mode intr handling */
-	struct kvmhv_tb_accumulator rm_exit;	/* real-mode exit code */
-	struct kvmhv_tb_accumulator guest_time;	/* guest execution */
-	struct kvmhv_tb_accumulator cede_time;	/* time napping inside guest */
+	struct kvmhv_tb_accumulator vcpu_entry;
+	struct kvmhv_tb_accumulator vcpu_exit;
+	struct kvmhv_tb_accumulator in_guest;
+	struct kvmhv_tb_accumulator hcall;
+	struct kvmhv_tb_accumulator pg_fault;
+	struct kvmhv_tb_accumulator guest_entry;
+	struct kvmhv_tb_accumulator guest_exit;
 #else
 	struct kvmhv_tb_accumulator rm_entry;	/* real-mode entry code */
 	struct kvmhv_tb_accumulator rm_intr;	/* real-mode intr handling */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 191347f44731..cedf1e0f50e1 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -135,10 +135,11 @@  config KVM_BOOK3S_HV_P9_TIMING
 	select KVM_BOOK3S_HV_EXIT_TIMING
 	depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
 	help
-	  Calculate time taken for each vcpu in various parts of the
-	  code. The total, minimum and maximum times in nanoseconds
-	  together with the number of executions are reported in debugfs in
-	  kvm/vm#/vcpu#/timings.
+	  Calculate time taken for each vcpu during vcpu entry and
+	  exit, time spent inside the guest and time spent handling
+	  hypercalls and page faults. The total, minimum and maximum
+	  times in nanoseconds together with the number of executions
+	  are reported in debugfs in kvm/vm#/vcpu#/timings.
 
 	  If unsure, say N.
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 69a6b40d58b9..f485632f247a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2654,11 +2654,13 @@  static struct debugfs_timings_element {
 	size_t offset;
 } timings[] = {
 #ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING
-	{"rm_entry",	offsetof(struct kvm_vcpu, arch.rm_entry)},
-	{"rm_intr",	offsetof(struct kvm_vcpu, arch.rm_intr)},
-	{"rm_exit",	offsetof(struct kvm_vcpu, arch.rm_exit)},
-	{"guest",	offsetof(struct kvm_vcpu, arch.guest_time)},
-	{"cede",	offsetof(struct kvm_vcpu, arch.cede_time)},
+	{"vcpu_entry",	offsetof(struct kvm_vcpu, arch.vcpu_entry)},
+	{"guest_entry",	offsetof(struct kvm_vcpu, arch.guest_entry)},
+	{"in_guest",	offsetof(struct kvm_vcpu, arch.in_guest)},
+	{"guest_exit",	offsetof(struct kvm_vcpu, arch.guest_exit)},
+	{"vcpu_exit",	offsetof(struct kvm_vcpu, arch.vcpu_exit)},
+	{"hypercall",	offsetof(struct kvm_vcpu, arch.hcall)},
+	{"page_fault",	offsetof(struct kvm_vcpu, arch.pg_fault)},
 #else
 	{"rm_entry",	offsetof(struct kvm_vcpu, arch.rm_entry)},
 	{"rm_intr",	offsetof(struct kvm_vcpu, arch.rm_intr)},
@@ -4006,8 +4008,10 @@  static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns
 	mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
 	mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
 	switch_pmu_to_guest(vcpu, &host_os_sprs);
+	accumulate_time(vcpu, &vcpu->arch.in_guest);
 	trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
 				  __pa(&vcpu->arch.regs));
+	accumulate_time(vcpu, &vcpu->arch.guest_exit);
 	kvmhv_restore_hv_return_state(vcpu, &hvregs);
 	switch_pmu_to_host(vcpu, &host_os_sprs);
 	vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
@@ -4661,6 +4665,8 @@  static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 	struct kvm *kvm;
 	unsigned long msr;
 
+	start_timing(vcpu, &vcpu->arch.vcpu_entry);
+
 	if (!vcpu->arch.sane) {
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		return -EINVAL;
@@ -4726,6 +4732,7 @@  static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
 	do {
+		accumulate_time(vcpu, &vcpu->arch.guest_entry);
 		if (cpu_has_feature(CPU_FTR_ARCH_300))
 			r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
 						  vcpu->arch.vcore->lpcr);
@@ -4733,6 +4740,8 @@  static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 			r = kvmppc_run_vcpu(vcpu);
 
 		if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
+			accumulate_time(vcpu, &vcpu->arch.hcall);
+
 			if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) {
 				/*
 				 * These should have been caught reflected
@@ -4748,6 +4757,7 @@  static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 			trace_kvm_hcall_exit(vcpu, r);
 			kvmppc_core_prepare_to_enter(vcpu);
 		} else if (r == RESUME_PAGE_FAULT) {
+			accumulate_time(vcpu, &vcpu->arch.pg_fault);
 			srcu_idx = srcu_read_lock(&kvm->srcu);
 			r = kvmppc_book3s_hv_page_fault(vcpu,
 				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
@@ -4759,12 +4769,15 @@  static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 				r = kvmppc_xics_rm_complete(vcpu, 0);
 		}
 	} while (is_kvmppc_resume_guest(r));
+	accumulate_time(vcpu, &vcpu->arch.vcpu_exit);
 
 	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
 	atomic_dec(&kvm->arch.vcpus_running);
 
 	srr_regs_clobbered();
 
+	end_timing(vcpu);
+
 	return r;
 }
 
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 8b2a9a360e4e..f8e1752535e8 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -772,8 +772,6 @@  int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
 	WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
 
-	start_timing(vcpu, &vcpu->arch.rm_entry);
-
 	vcpu->arch.ceded = 0;
 
 	/* Save MSR for restore, with EE clear. */
@@ -934,13 +932,13 @@  int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
 	mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
 
-	accumulate_time(vcpu, &vcpu->arch.guest_time);
-
 	switch_pmu_to_guest(vcpu, &host_os_sprs);
+	accumulate_time(vcpu, &vcpu->arch.in_guest);
+
 	kvmppc_p9_enter_guest(vcpu);
-	switch_pmu_to_host(vcpu, &host_os_sprs);
 
-	accumulate_time(vcpu, &vcpu->arch.rm_intr);
+	accumulate_time(vcpu, &vcpu->arch.guest_exit);
+	switch_pmu_to_host(vcpu, &host_os_sprs);
 
 	/* XXX: Could get these from r11/12 and paca exsave instead */
 	vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
@@ -1035,8 +1033,6 @@  int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 #endif
 	}
 
-	accumulate_time(vcpu, &vcpu->arch.rm_exit);
-
 	/* Advance host PURR/SPURR by the amount used by guest */
 	purr = mfspr(SPRN_PURR);
 	spurr = mfspr(SPRN_SPURR);
@@ -1143,8 +1139,6 @@  int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 		asm volatile(PPC_CP_ABORT);
 
 out:
-	end_timing(vcpu);
-
 	return trap;
 }
 EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);