diff mbox

KVM: PPC: Add userspace debug stub support

Message ID 1367919653-30414-7-git-send-email-Bharat.Bhushan@freescale.com
State New, archived
Headers show

Commit Message

Bharat Bhushan May 7, 2013, 9:40 a.m. UTC
This patch adds the debug stub support on booke/bookehv.
Now QEMU debug stub can use hw breakpoint, watchpoint and
software breakpoint to debug guest.

This is how we save/restore debug register context when switching
between guest, userspace and kernel user-process:

When QEMU is running
 -> thread->debug_reg == QEMU debug register context.
 -> Kernel will handle switching the debug register on context switch.
 -> no vcpu_load() called

QEMU makes ioctls (except RUN)
 -> This will call vcpu_load()
 -> should not change context.
 -> Some ioctls can change vcpu debug register, context saved in vcpu->debug_regs

QEMU Makes RUN ioctl
 -> Save thread->debug_reg on STACK
 -> Store thread->debug_reg == vcpu->debug_reg 
 -> load thread->debug_reg
 -> RUN VCPU ( So thread points to vcpu context )

Context switch happens When VCPU running 
 -> makes vcpu_load() should not load any context
 -> kernel loads the vcpu context as thread->debug_regs points to vcpu context.

On heavyweight_exit
 -> Load the context saved on stack in thread->debug_reg

Currently we do not support debug resource emulation to guest,
On debug exception, always exit to user space irrespective of
user space is expecting the debug exception or not. If this is
unexpected exception (breakpoint/watchpoint event not set by
userspace) then let us leave the action on user space. This
is similar to what it was before, only thing is that now we
have proper exit state available to user space.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
 arch/powerpc/include/asm/kvm_host.h |    3 +
 arch/powerpc/include/uapi/asm/kvm.h |    1 +
 arch/powerpc/kvm/booke.c            |  242 ++++++++++++++++++++++++++++++++---
 arch/powerpc/kvm/booke.h            |    5 +
 4 files changed, 233 insertions(+), 18 deletions(-)

Comments

Alexander Graf May 10, 2013, 10:18 a.m. UTC | #1
On 07.05.2013, at 11:40, Bharat Bhushan wrote:

> This patch adds the debug stub support on booke/bookehv.
> Now QEMU debug stub can use hw breakpoint, watchpoint and
> software breakpoint to debug guest.
> 
> This is how we save/restore debug register context when switching
> between guest, userspace and kernel user-process:
> 
> When QEMU is running
> -> thread->debug_reg == QEMU debug register context.
> -> Kernel will handle switching the debug register on context switch.
> -> no vcpu_load() called
> 
> QEMU makes ioctls (except RUN)
> -> This will call vcpu_load()
> -> should not change context.
> -> Some ioctls can change vcpu debug register, context saved in vcpu->debug_regs
> 
> QEMU Makes RUN ioctl
> -> Save thread->debug_reg on STACK
> -> Store thread->debug_reg == vcpu->debug_reg 
> -> load thread->debug_reg
> -> RUN VCPU ( So thread points to vcpu context )
> 
> Context switch happens When VCPU running 
> -> makes vcpu_load() should not load any context
> -> kernel loads the vcpu context as thread->debug_regs points to vcpu context.
> 
> On heavyweight_exit
> -> Load the context saved on stack in thread->debug_reg
> 
> Currently we do not support debug resource emulation to guest,
> On debug exception, always exit to user space irrespective of
> user space is expecting the debug exception or not. If this is
> unexpected exception (breakpoint/watchpoint event not set by
> userspace) then let us leave the action on user space. This
> is similar to what it was before, only thing is that now we
> have proper exit state available to user space.
> 
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
> arch/powerpc/include/asm/kvm_host.h |    3 +
> arch/powerpc/include/uapi/asm/kvm.h |    1 +
> arch/powerpc/kvm/booke.c            |  242 ++++++++++++++++++++++++++++++++---
> arch/powerpc/kvm/booke.h            |    5 +
> 4 files changed, 233 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 838a577..1b29945 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -524,7 +524,10 @@ struct kvm_vcpu_arch {
> 	u32 eptcfg;
> 	u32 epr;
> 	u32 crit_save;
> +	/* guest debug registers*/
> 	struct debug_reg dbg_reg;
> +	/* shadow debug registers */

Please be more verbose here. What exactly does this contain? Why do we need shadow and non-shadow registers? The comment as it is reads like

  /* Add one plus one */
  x = 1 + 1;

> +	struct debug_reg shadow_dbg_reg;
> #endif
> 	gpa_t paddr_accessed;
> 	gva_t vaddr_accessed;
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> index ded0607..f5077c2 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -27,6 +27,7 @@
> #define __KVM_HAVE_PPC_SMT
> #define __KVM_HAVE_IRQCHIP
> #define __KVM_HAVE_IRQ_LINE
> +#define __KVM_HAVE_GUEST_DEBUG
> 
> struct kvm_regs {
> 	__u64 pc;
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index ef99536..6a44ad4 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
> #endif
> }
> 
> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
> +{
> +	/* Synchronize guest's desire to get debug interrupts into shadow MSR */
> +#ifndef CONFIG_KVM_BOOKE_HV
> +	vcpu->arch.shadow_msr &= ~MSR_DE;
> +	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE;
> +#endif
> +
> +	/* Force enable debug interrupts when user space wants to debug */
> +	if (vcpu->guest_debug) {
> +#ifdef CONFIG_KVM_BOOKE_HV
> +		/*
> +		 * Since there is no shadow MSR, sync MSR_DE into the guest
> +		 * visible MSR.
> +		 */
> +		vcpu->arch.shared->msr |= MSR_DE;
> +#else
> +		vcpu->arch.shadow_msr |= MSR_DE;
> +		vcpu->arch.shared->msr &= ~MSR_DE;
> +#endif
> +	}
> +}
> +
> /*
>  * Helper function for "full" MSR writes.  No need to call this if only
>  * EE/CE/ME/DE/RI are changing.
> @@ -150,6 +173,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
> 	kvmppc_mmu_msr_notify(vcpu, old_msr);
> 	kvmppc_vcpu_sync_spe(vcpu);
> 	kvmppc_vcpu_sync_fpu(vcpu);
> +	kvmppc_vcpu_sync_debug(vcpu);
> }
> 
> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
> @@ -655,6 +679,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
> int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
> {
> 	int ret, s;
> +	struct debug_reg debug;
> #ifdef CONFIG_PPC_FPU
> 	unsigned int fpscr;
> 	int fpexc_mode;
> @@ -699,11 +724,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
> 	kvmppc_load_guest_fp(vcpu);
> #endif
> 
> +	/*
> +	 * Clear current->thread.dbcr0 so that kernel does not
> +	 * restore h/w registers on context switch in vcpu running state.
> +	 */

Incorrect comment?

> +	/* Save thread->debug context on stack */

/* Switch to guest debug context */

> +	memcpy(&debug, &current->thread.debug, sizeof(struct debug_reg));

debug = current->thread.debug;

> +	/* Load vcpu debug context in thread->debug */
> +	memcpy(&current->thread.debug, &vcpu->arch.shadow_dbg_reg,
> +	       sizeof(struct debug_reg));

current->thread.debug = vcpu->arch.shadow_dbg_reg;

> +
> +	switch_booke_debug_regs(&current->thread);
> +
> 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
> 
> 	/* No need for kvm_guest_exit. It's done in handle_exit.
> 	   We also get here with interrupts enabled. */
> 
> +	/* Restore userspace context in thread->dbcr0 from stack*/

/* Switch back to user space debug context */

> +	memcpy(&current->thread.debug, &debug, sizeof(struct debug_reg));

current->thread.debug = debug;

> +	switch_booke_debug_regs(&current->thread);
> +
> #ifdef CONFIG_PPC_FPU
> 	kvmppc_save_guest_fp(vcpu);
> 
> @@ -759,6 +800,36 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
> 	}
> }
> 
> +/*
> + * Currently we do not support debug resource emulation to guest,
> + * so always exit to user space irrespective of user space is
> + * expecting the debug exception or not. This is unexpected event
> + * and let us leave the action on user space.

Please rework your wording.

> + */
> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
> +{
> +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> +	u32 dbsr = vcpu->arch.dbsr;
> +
> +	run->debug.arch.status = 0;
> +	run->debug.arch.address = vcpu->arch.pc;
> +
> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> +	} else {
> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> +			run->debug.arch.address = dbg_reg->dac1;
> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> +			run->debug.arch.address = dbg_reg->dac2;
> +	}
> +
> +	return RESUME_HOST;
> +}
> +
> static void kvmppc_fill_pt_regs(struct pt_regs *regs)
> {
> 	ulong r1, ip, msr, lr;
> @@ -819,6 +890,11 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
> 	case BOOKE_INTERRUPT_CRITICAL:
> 		unknown_exception(&regs);
> 		break;
> +	case BOOKE_INTERRUPT_DEBUG:
> +		/* Save DBSR before preemption is enabled */
> +		vcpu->arch.dbsr = mfspr(SPRN_DBSR);
> +		kvmppc_clear_dbsr();
> +		break;
> 	}
> }
> 
> @@ -1118,18 +1194,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 	}
> 
> 	case BOOKE_INTERRUPT_DEBUG: {
> -		u32 dbsr;
> -
> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> -
> -		/* clear IAC events in DBSR register */
> -		dbsr = mfspr(SPRN_DBSR);
> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> -		mtspr(SPRN_DBSR, dbsr);
> -
> -		run->exit_reason = KVM_EXIT_DEBUG;
> +		r = kvmppc_handle_debug(run, vcpu);
> +		if (r == RESUME_HOST)
> +			run->exit_reason = KVM_EXIT_DEBUG;
> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> -		r = RESUME_HOST;
> 		break;
> 	}
> 
> @@ -1180,7 +1248,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> 	kvmppc_set_msr(vcpu, 0);
> 
> #ifndef CONFIG_KVM_BOOKE_HV
> -	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
> +	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
> 	vcpu->arch.shadow_pid = 1;
> 	vcpu->arch.shared->msr = 0;
> #endif
> @@ -1557,12 +1625,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
> 	return r;
> }
> 
> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> -					 struct kvm_guest_debug *dbg)
> -{
> -	return -EINVAL;
> -}
> -
> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> {
> 	return -ENOTSUPP;
> @@ -1668,6 +1730,145 @@ void kvmppc_decrementer_func(unsigned long data)
> 	kvmppc_set_tsr_bits(vcpu, TSR_DIS);
> }
> 
> +static int kvmppc_booke_set_breakpoint(struct kvm_vcpu *vcpu, uint64_t addr,

s/set/add/

> +				       int index)
> +{
> +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> +
> +	switch (index) {
> +	case 0:
> +		dbg_reg->dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
> +		dbg_reg->iac1 = addr;
> +		break;
> +	case 1:
> +		dbg_reg->dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
> +		dbg_reg->iac2 = addr;
> +		break;
> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> +	case 2:
> +		dbg_reg->dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
> +		dbg_reg->iac3 = addr;
> +		break;
> +	case 3:
> +		dbg_reg->dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
> +		dbg_reg->iac4 = addr;
> +		break;
> +#endif
> +	default:
> +		return -EINVAL;
> +	}

    dbg_reg->dbcr0 |= DBCR0_IDM;

> +	return 0;
> +}
> +
> +static int kvmppc_booke_set_watchpoint(struct kvm_vcpu *vcpu, uint64_t addr,

s/set/add/

> +				       int type, int index)
> +{
> +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> +
> +	switch (index) {
> +	case 0:
> +		if (type & KVMPPC_DEBUG_WATCH_READ)
> +			dbg_reg->dbcr0 |= DBCR0_DAC1R | DBCR0_IDM;
> +		if (type & KVMPPC_DEBUG_WATCH_WRITE)
> +			dbg_reg->dbcr0 |= DBCR0_DAC1W | DBCR0_IDM;
> +		dbg_reg->dac1 = addr;
> +		break;
> +	case 1:
> +		if (type & KVMPPC_DEBUG_WATCH_READ)
> +			dbg_reg->dbcr0 |= DBCR0_DAC2R | DBCR0_IDM;
> +		if (type & KVMPPC_DEBUG_WATCH_WRITE)
> +			dbg_reg->dbcr0 |= DBCR0_DAC2W | DBCR0_IDM;
> +		dbg_reg->dac2 = addr;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}

    dbg_reg->dbcr0 |= DBCR0_IDM;

> +	return 0;
> +}
> +
> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> +					 struct kvm_guest_debug *dbg)
> +{
> +	struct debug_reg *dbg_reg;
> +	int n, b = 0, w = 0;
> +
> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> +		/* Clear All debug events */
> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> +		vcpu->guest_debug = 0;
> +#ifdef CONFIG_KVM_BOOKE_HV
> +		/*
> +		 * When user space is not using the debug resources
> +		 * then allow guest to change the MSR.DE.
> +		 */
> +		vcpu->arch.shadow_msrp &= ~MSRP_DEP;
> +#endif

guest_may_set_msr_de(vcpu, true);

> +		return 0;
> +	}
> +
> +#ifdef CONFIG_KVM_BOOKE_HV
> +	/*
> +	 * When user space is using the debug resource then
> +	 * do not allow guest to change the MSR.DE.
> +	 */
> +	vcpu->arch.shadow_msrp |= MSRP_DEP;
> +#endif

guest_may_set_msr_de(vcpu, false);

> +	vcpu->guest_debug = dbg->control;
> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> +	/* Set DBCR0_EDM in guest visible DBCR0 register. */
> +	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
> +
> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> +
> +	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
> +		return 0;
> +
> +	/* Code below handles only HW breakpoints */
> +	dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> +
> +	/*
> +	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
> +	 * to occur when MSR.PR is set.
> +	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
> +	 * should clear DBCR1 and DBCR2. And EPCR.DUVD is used to control
> +	 * that debug events will not come in hypervisor (GS = 0).

This is still wrong. We want to trap in PR=1. It's what PR KVM does, it's what TCG does. There is no point in making HV KVM behave differently.

> +	 */
> +#ifdef CONFIG_KVM_BOOKE_HV
> +	dbg_reg->dbcr1 = 0;
> +	dbg_reg->dbcr2 = 0;
> +#else
> +	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
> +			  DBCR1_IAC4US;
> +	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
> +#endif
> +
> +	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
> +		uint64_t addr = dbg->arch.bp[n].addr;
> +		uint32_t type = dbg->arch.bp[n].type;
> +
> +		if (type == KVMPPC_DEBUG_NONE)
> +			continue;
> +
> +		if (type & !(KVMPPC_DEBUG_WATCH_READ |
> +			     KVMPPC_DEBUG_WATCH_WRITE |
> +			     KVMPPC_DEBUG_BREAKPOINT))
> +			return -EINVAL;
> +
> +		if (type & KVMPPC_DEBUG_BREAKPOINT) {
> +			/* Setting H/W breakpoint */
> +			if (kvmppc_booke_set_breakpoint(vcpu, addr, b++))

Please pass dbg_reg into the function

> +				return -EINVAL;
> +		} else {
> +			/* Setting H/W watchpoint */
> +			if (kvmppc_booke_set_watchpoint(vcpu, addr, type, w++))

here too

> +				return -EINVAL;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> {
> 	vcpu->cpu = smp_processor_id();
> @@ -1678,6 +1879,11 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
> {
> 	current->thread.kvm_vcpu = NULL;
> 	vcpu->cpu = -1;
> +
> +	/* Disable all debug events */
> +	mtspr(SPRN_DBCR0, 0x0);

Why? Wouldn't normal preemption handling take care of this already?

> +	/* Clear pending debug event in DBSR */
> +	kvmppc_clear_dbsr();

Is there a chance we will ever have to do this? On debug exits from guest context we already clear dbsr.

My question from the last review round still stands. What if a breakpoint for guest address 0xc0001234 is active and on the host 0xc0001234 happens to be at kvm_run? Will we get a breakpoint inside of the host context? That would be bad. We only want to get debug events when

  e500v2: MSR.PR == 1
  e500mc: MSR.GS == 1


Alex

> }
> 
> int __init kvmppc_booke_init(void)
> diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
> index 5fd1ba6..a1ff67d 100644
> --- a/arch/powerpc/kvm/booke.h
> +++ b/arch/powerpc/kvm/booke.h
> @@ -129,4 +129,9 @@ static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
> 		giveup_fpu(current);
> #endif
> }
> +
> +static inline void kvmppc_clear_dbsr(void)
> +{
> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
> +}
> #endif /* __KVM_BOOKE_H__ */
> -- 
> 1.5.6.5
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan May 10, 2013, 5:31 p.m. UTC | #2
> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Friday, May 10, 2013 3:48 PM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421;
> tiejun.chen@windriver.com; Bhushan Bharat-R65777
> Subject: Re: [PATCH] KVM: PPC: Add userspace debug stub support
> 
> 
> On 07.05.2013, at 11:40, Bharat Bhushan wrote:
> 
> > This patch adds the debug stub support on booke/bookehv.
> > Now QEMU debug stub can use hw breakpoint, watchpoint and software
> > breakpoint to debug guest.
> >
> > This is how we save/restore debug register context when switching
> > between guest, userspace and kernel user-process:
> >
> > When QEMU is running
> > -> thread->debug_reg == QEMU debug register context.
> > -> Kernel will handle switching the debug register on context switch.
> > -> no vcpu_load() called
> >
> > QEMU makes ioctls (except RUN)
> > -> This will call vcpu_load()
> > -> should not change context.
> > -> Some ioctls can change vcpu debug register, context saved in
> > -> vcpu->debug_regs
> >
> > QEMU Makes RUN ioctl
> > -> Save thread->debug_reg on STACK
> > -> Store thread->debug_reg == vcpu->debug_reg load thread->debug_reg
> > -> RUN VCPU ( So thread points to vcpu context )
> >
> > Context switch happens When VCPU running
> > -> makes vcpu_load() should not load any context kernel loads the vcpu
> > -> context as thread->debug_regs points to vcpu context.
> >
> > On heavyweight_exit
> > -> Load the context saved on stack in thread->debug_reg
> >
> > Currently we do not support debug resource emulation to guest, On
> > debug exception, always exit to user space irrespective of user space
> > is expecting the debug exception or not. If this is unexpected
> > exception (breakpoint/watchpoint event not set by
> > userspace) then let us leave the action on user space. This is similar
> > to what it was before, only thing is that now we have proper exit
> > state available to user space.
> >
> > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > ---
> > arch/powerpc/include/asm/kvm_host.h |    3 +
> > arch/powerpc/include/uapi/asm/kvm.h |    1 +
> > arch/powerpc/kvm/booke.c            |  242 ++++++++++++++++++++++++++++++++---
> > arch/powerpc/kvm/booke.h            |    5 +
> > 4 files changed, 233 insertions(+), 18 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/kvm_host.h
> > b/arch/powerpc/include/asm/kvm_host.h
> > index 838a577..1b29945 100644
> > --- a/arch/powerpc/include/asm/kvm_host.h
> > +++ b/arch/powerpc/include/asm/kvm_host.h
> > @@ -524,7 +524,10 @@ struct kvm_vcpu_arch {
> > 	u32 eptcfg;
> > 	u32 epr;
> > 	u32 crit_save;
> > +	/* guest debug registers*/
> > 	struct debug_reg dbg_reg;
> > +	/* shadow debug registers */
> 
> Please be more verbose here. What exactly does this contain? Why do we need
> shadow and non-shadow registers? The comment as it is reads like
> 
>   /* Add one plus one */
>   x = 1 + 1;


/*
 * Shadow debug registers hold the debug register content
 * to be written in h/w debug register on behalf of guest
 * written value or user space written value.
 */


> 
> > +	struct debug_reg shadow_dbg_reg;
> > #endif
> > 	gpa_t paddr_accessed;
> > 	gva_t vaddr_accessed;
> > diff --git a/arch/powerpc/include/uapi/asm/kvm.h
> > b/arch/powerpc/include/uapi/asm/kvm.h
> > index ded0607..f5077c2 100644
> > --- a/arch/powerpc/include/uapi/asm/kvm.h
> > +++ b/arch/powerpc/include/uapi/asm/kvm.h
> > @@ -27,6 +27,7 @@
> > #define __KVM_HAVE_PPC_SMT
> > #define __KVM_HAVE_IRQCHIP
> > #define __KVM_HAVE_IRQ_LINE
> > +#define __KVM_HAVE_GUEST_DEBUG
> >
> > struct kvm_regs {
> > 	__u64 pc;
> > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index
> > ef99536..6a44ad4 100644
> > --- a/arch/powerpc/kvm/booke.c
> > +++ b/arch/powerpc/kvm/booke.c
> > @@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu
> > *vcpu) #endif }
> >
> > +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) {
> > +	/* Synchronize guest's desire to get debug interrupts into shadow
> > +MSR */ #ifndef CONFIG_KVM_BOOKE_HV
> > +	vcpu->arch.shadow_msr &= ~MSR_DE;
> > +	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; #endif
> > +
> > +	/* Force enable debug interrupts when user space wants to debug */
> > +	if (vcpu->guest_debug) {
> > +#ifdef CONFIG_KVM_BOOKE_HV
> > +		/*
> > +		 * Since there is no shadow MSR, sync MSR_DE into the guest
> > +		 * visible MSR.
> > +		 */
> > +		vcpu->arch.shared->msr |= MSR_DE;
> > +#else
> > +		vcpu->arch.shadow_msr |= MSR_DE;
> > +		vcpu->arch.shared->msr &= ~MSR_DE;
> > +#endif
> > +	}
> > +}
> > +
> > /*
> >  * Helper function for "full" MSR writes.  No need to call this if
> > only
> >  * EE/CE/ME/DE/RI are changing.
> > @@ -150,6 +173,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
> > 	kvmppc_mmu_msr_notify(vcpu, old_msr);
> > 	kvmppc_vcpu_sync_spe(vcpu);
> > 	kvmppc_vcpu_sync_fpu(vcpu);
> > +	kvmppc_vcpu_sync_debug(vcpu);
> > }
> >
> > static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@
> > -655,6 +679,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
> > int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) {
> > 	int ret, s;
> > +	struct debug_reg debug;
> > #ifdef CONFIG_PPC_FPU
> > 	unsigned int fpscr;
> > 	int fpexc_mode;
> > @@ -699,11 +724,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct
> kvm_vcpu *vcpu)
> > 	kvmppc_load_guest_fp(vcpu);
> > #endif
> >
> > +	/*
> > +	 * Clear current->thread.dbcr0 so that kernel does not
> > +	 * restore h/w registers on context switch in vcpu running state.
> > +	 */
> 
> Incorrect comment?

Leftover from previous code, will remove this.

> 
> > +	/* Save thread->debug context on stack */
> 
> /* Switch to guest debug context */
> 
> > +	memcpy(&debug, &current->thread.debug, sizeof(struct debug_reg));
> 
> debug = current->thread.debug;
> 
> > +	/* Load vcpu debug context in thread->debug */
> > +	memcpy(&current->thread.debug, &vcpu->arch.shadow_dbg_reg,
> > +	       sizeof(struct debug_reg));
> 
> current->thread.debug = vcpu->arch.shadow_dbg_reg;
> 
> > +
> > +	switch_booke_debug_regs(&current->thread);
> > +
> > 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
> >
> > 	/* No need for kvm_guest_exit. It's done in handle_exit.
> > 	   We also get here with interrupts enabled. */
> >
> > +	/* Restore userspace context in thread->dbcr0 from stack*/
> 
> /* Switch back to user space debug context */
> 
> > +	memcpy(&current->thread.debug, &debug, sizeof(struct debug_reg));
> 
> current->thread.debug = debug;
> 
> > +	switch_booke_debug_regs(&current->thread);
> > +
> > #ifdef CONFIG_PPC_FPU
> > 	kvmppc_save_guest_fp(vcpu);
> >
> > @@ -759,6 +800,36 @@ static int emulation_exit(struct kvm_run *run, struct
> kvm_vcpu *vcpu)
> > 	}
> > }
> >
> > +/*
> > + * Currently we do not support debug resource emulation to guest,
> > + * so always exit to user space irrespective of user space is
> > + * expecting the debug exception or not. This is unexpected event
> > + * and let us leave the action on user space.
> 
> Please rework your wording.
> 
> > + */
> > +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu
> > +*vcpu) {
> > +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> > +	u32 dbsr = vcpu->arch.dbsr;
> > +
> > +	run->debug.arch.status = 0;
> > +	run->debug.arch.address = vcpu->arch.pc;
> > +
> > +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> > +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> > +	} else {
> > +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> > +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> > +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> > +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> > +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> > +			run->debug.arch.address = dbg_reg->dac1;
> > +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> > +			run->debug.arch.address = dbg_reg->dac2;
> > +	}
> > +
> > +	return RESUME_HOST;
> > +}
> > +
> > static void kvmppc_fill_pt_regs(struct pt_regs *regs) {
> > 	ulong r1, ip, msr, lr;
> > @@ -819,6 +890,11 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu
> *vcpu,
> > 	case BOOKE_INTERRUPT_CRITICAL:
> > 		unknown_exception(&regs);
> > 		break;
> > +	case BOOKE_INTERRUPT_DEBUG:
> > +		/* Save DBSR before preemption is enabled */
> > +		vcpu->arch.dbsr = mfspr(SPRN_DBSR);
> > +		kvmppc_clear_dbsr();
> > +		break;
> > 	}
> > }
> >
> > @@ -1118,18 +1194,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> kvm_vcpu *vcpu,
> > 	}
> >
> > 	case BOOKE_INTERRUPT_DEBUG: {
> > -		u32 dbsr;
> > -
> > -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> > -
> > -		/* clear IAC events in DBSR register */
> > -		dbsr = mfspr(SPRN_DBSR);
> > -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> > -		mtspr(SPRN_DBSR, dbsr);
> > -
> > -		run->exit_reason = KVM_EXIT_DEBUG;
> > +		r = kvmppc_handle_debug(run, vcpu);
> > +		if (r == RESUME_HOST)
> > +			run->exit_reason = KVM_EXIT_DEBUG;
> > 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> > -		r = RESUME_HOST;
> > 		break;
> > 	}
> >
> > @@ -1180,7 +1248,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> > 	kvmppc_set_msr(vcpu, 0);
> >
> > #ifndef CONFIG_KVM_BOOKE_HV
> > -	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
> > +	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
> > 	vcpu->arch.shadow_pid = 1;
> > 	vcpu->arch.shared->msr = 0;
> > #endif
> > @@ -1557,12 +1625,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
> struct kvm_one_reg *reg)
> > 	return r;
> > }
> >
> > -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> > -					 struct kvm_guest_debug *dbg)
> > -{
> > -	return -EINVAL;
> > -}
> > -
> > int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu
> > *fpu) {
> > 	return -ENOTSUPP;
> > @@ -1668,6 +1730,145 @@ void kvmppc_decrementer_func(unsigned long data)
> > 	kvmppc_set_tsr_bits(vcpu, TSR_DIS);
> > }
> >
> > +static int kvmppc_booke_set_breakpoint(struct kvm_vcpu *vcpu,
> > +uint64_t addr,
> 
> s/set/add/
> 
> > +				       int index)
> > +{
> > +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> > +
> > +	switch (index) {
> > +	case 0:
> > +		dbg_reg->dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
> > +		dbg_reg->iac1 = addr;
> > +		break;
> > +	case 1:
> > +		dbg_reg->dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
> > +		dbg_reg->iac2 = addr;
> > +		break;
> > +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> > +	case 2:
> > +		dbg_reg->dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
> > +		dbg_reg->iac3 = addr;
> > +		break;
> > +	case 3:
> > +		dbg_reg->dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
> > +		dbg_reg->iac4 = addr;
> > +		break;
> > +#endif
> > +	default:
> > +		return -EINVAL;
> > +	}
> 
>     dbg_reg->dbcr0 |= DBCR0_IDM;
> 
> > +	return 0;
> > +}
> > +
> > +static int kvmppc_booke_set_watchpoint(struct kvm_vcpu *vcpu,
> > +uint64_t addr,
> 
> s/set/add/
> 
> > +				       int type, int index)
> > +{
> > +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> > +
> > +	switch (index) {
> > +	case 0:
> > +		if (type & KVMPPC_DEBUG_WATCH_READ)
> > +			dbg_reg->dbcr0 |= DBCR0_DAC1R | DBCR0_IDM;
> > +		if (type & KVMPPC_DEBUG_WATCH_WRITE)
> > +			dbg_reg->dbcr0 |= DBCR0_DAC1W | DBCR0_IDM;
> > +		dbg_reg->dac1 = addr;
> > +		break;
> > +	case 1:
> > +		if (type & KVMPPC_DEBUG_WATCH_READ)
> > +			dbg_reg->dbcr0 |= DBCR0_DAC2R | DBCR0_IDM;
> > +		if (type & KVMPPC_DEBUG_WATCH_WRITE)
> > +			dbg_reg->dbcr0 |= DBCR0_DAC2W | DBCR0_IDM;
> > +		dbg_reg->dac2 = addr;
> > +		break;
> > +	default:
> > +		return -EINVAL;
> > +	}
> 
>     dbg_reg->dbcr0 |= DBCR0_IDM;
> 
> > +	return 0;
> > +}
> > +
> > +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> > +					 struct kvm_guest_debug *dbg)
> > +{
> > +	struct debug_reg *dbg_reg;
> > +	int n, b = 0, w = 0;
> > +
> > +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> > +		/* Clear All debug events */
> > +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> > +		vcpu->guest_debug = 0;
> > +#ifdef CONFIG_KVM_BOOKE_HV
> > +		/*
> > +		 * When user space is not using the debug resources
> > +		 * then allow guest to change the MSR.DE.
> > +		 */
> > +		vcpu->arch.shadow_msrp &= ~MSRP_DEP; #endif
> 
> guest_may_set_msr_de(vcpu, true);
> 
> > +		return 0;
> > +	}
> > +
> > +#ifdef CONFIG_KVM_BOOKE_HV
> > +	/*
> > +	 * When user space is using the debug resource then
> > +	 * do not allow guest to change the MSR.DE.
> > +	 */
> > +	vcpu->arch.shadow_msrp |= MSRP_DEP;
> > +#endif
> 
> guest_may_set_msr_de(vcpu, false);
> 
> > +	vcpu->guest_debug = dbg->control;
> > +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> > +	/* Set DBCR0_EDM in guest visible DBCR0 register. */
> > +	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
> > +
> > +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> > +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> > +
> > +	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
> > +		return 0;
> > +
> > +	/* Code below handles only HW breakpoints */
> > +	dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> > +
> > +	/*
> > +	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
> > +	 * to occur when MSR.PR is set.
> > +	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
> > +	 * should clear DBCR1 and DBCR2. And EPCR.DUVD is used to control
> > +	 * that debug events will not come in hypervisor (GS = 0).

Will rework the above comment as discussed.

> 
> This is still wrong. We want to trap in PR=1. It's what PR KVM does, it's what
> TCG does. There is no point in making HV KVM behave differently.

This comment is not valid.

> 
> > +	 */
> > +#ifdef CONFIG_KVM_BOOKE_HV
> > +	dbg_reg->dbcr1 = 0;
> > +	dbg_reg->dbcr2 = 0;
> > +#else
> > +	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
> > +			  DBCR1_IAC4US;
> > +	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #endif
> > +
> > +	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
> > +		uint64_t addr = dbg->arch.bp[n].addr;
> > +		uint32_t type = dbg->arch.bp[n].type;
> > +
> > +		if (type == KVMPPC_DEBUG_NONE)
> > +			continue;
> > +
> > +		if (type & !(KVMPPC_DEBUG_WATCH_READ |
> > +			     KVMPPC_DEBUG_WATCH_WRITE |
> > +			     KVMPPC_DEBUG_BREAKPOINT))
> > +			return -EINVAL;
> > +
> > +		if (type & KVMPPC_DEBUG_BREAKPOINT) {
> > +			/* Setting H/W breakpoint */
> > +			if (kvmppc_booke_set_breakpoint(vcpu, addr, b++))
> 
> Please pass dbg_reg into the function
> 
> > +				return -EINVAL;
> > +		} else {
> > +			/* Setting H/W watchpoint */
> > +			if (kvmppc_booke_set_watchpoint(vcpu, addr, type, w++))
> 
> here too
> 
> > +				return -EINVAL;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) {
> > 	vcpu->cpu = smp_processor_id();
> > @@ -1678,6 +1879,11 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu
> > *vcpu) {
> > 	current->thread.kvm_vcpu = NULL;
> > 	vcpu->cpu = -1;
> > +
> > +	/* Disable all debug events */
> > +	mtspr(SPRN_DBCR0, 0x0);
> 
> Why? Wouldn't normal preemption handling take care of this already?

Yes, normal preemption will take care. On vcpu_put we do not clear EPCR.DUVD and DBCR1/2.
So debug event will not be taken in host.

So yes, it is not needed here.

> 
> > +	/* Clear pending debug event in DBSR */
> > +	kvmppc_clear_dbsr();
> 
> Is there a chance we will ever have to do this? On debug exits from guest
> context we already clear dbsr.

DBSR can have events captured but not delivered as interrupt if MSR.DE is clear. I know with qemu debug stub we do not allow msr.de to be cleared by guest.

> 
> My question from the last review round still stands. What if a breakpoint for
> guest address 0xc0001234 is active and on the host 0xc0001234 happens to be at
> kvm_run? Will we get a breakpoint inside of the host context? That would be bad.
> We only want to get debug events when
> 
>   e500v2: MSR.PR == 1
>   e500mc: MSR.GS == 1

I responded last time also when you asked this question. Here I am pasting same answer again:
"No,
On e500v2, we uses DBCR1 and DBCR2 to not allow debug events when MSR.PR = 0 On e500mc+, we uses EPCR.DUVD to not allow debug events when in hypervisor mode.
"

This answer to your question as we discussed on IRC.

-Bharat

> 
> 
> Alex
> 
> > }
> >
> > int __init kvmppc_booke_init(void)
> > diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index
> > 5fd1ba6..a1ff67d 100644
> > --- a/arch/powerpc/kvm/booke.h
> > +++ b/arch/powerpc/kvm/booke.h
> > @@ -129,4 +129,9 @@ static inline void kvmppc_save_guest_fp(struct kvm_vcpu
> *vcpu)
> > 		giveup_fpu(current);
> > #endif
> > }
> > +
> > +static inline void kvmppc_clear_dbsr(void) {
> > +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
> > +}
> > #endif /* __KVM_BOOKE_H__ */
> > --
> > 1.5.6.5
> >
> >
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf May 10, 2013, 5:44 p.m. UTC | #3
On 10.05.2013, at 19:31, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de]
>> Sent: Friday, May 10, 2013 3:48 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421;
>> tiejun.chen@windriver.com; Bhushan Bharat-R65777
>> Subject: Re: [PATCH] KVM: PPC: Add userspace debug stub support
>> 
>> 
>> On 07.05.2013, at 11:40, Bharat Bhushan wrote:
>> 
>>> This patch adds the debug stub support on booke/bookehv.
>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>> breakpoint to debug guest.
>>> 
>>> This is how we save/restore debug register context when switching
>>> between guest, userspace and kernel user-process:
>>> 
>>> When QEMU is running
>>> -> thread->debug_reg == QEMU debug register context.
>>> -> Kernel will handle switching the debug register on context switch.
>>> -> no vcpu_load() called
>>> 
>>> QEMU makes ioctls (except RUN)
>>> -> This will call vcpu_load()
>>> -> should not change context.
>>> -> Some ioctls can change vcpu debug register, context saved in
>>> -> vcpu->debug_regs
>>> 
>>> QEMU Makes RUN ioctl
>>> -> Save thread->debug_reg on STACK
>>> -> Store thread->debug_reg == vcpu->debug_reg load thread->debug_reg
>>> -> RUN VCPU ( So thread points to vcpu context )
>>> 
>>> Context switch happens When VCPU running
>>> -> makes vcpu_load() should not load any context kernel loads the vcpu
>>> -> context as thread->debug_regs points to vcpu context.
>>> 
>>> On heavyweight_exit
>>> -> Load the context saved on stack in thread->debug_reg
>>> 
>>> Currently we do not support debug resource emulation to guest, On
>>> debug exception, always exit to user space irrespective of user space
>>> is expecting the debug exception or not. If this is unexpected
>>> exception (breakpoint/watchpoint event not set by
>>> userspace) then let us leave the action on user space. This is similar
>>> to what it was before, only thing is that now we have proper exit
>>> state available to user space.
>>> 
>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>> ---
>>> arch/powerpc/include/asm/kvm_host.h |    3 +
>>> arch/powerpc/include/uapi/asm/kvm.h |    1 +
>>> arch/powerpc/kvm/booke.c            |  242 ++++++++++++++++++++++++++++++++---
>>> arch/powerpc/kvm/booke.h            |    5 +
>>> 4 files changed, 233 insertions(+), 18 deletions(-)
>>> 
>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>>> b/arch/powerpc/include/asm/kvm_host.h
>>> index 838a577..1b29945 100644
>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>> @@ -524,7 +524,10 @@ struct kvm_vcpu_arch {
>>> 	u32 eptcfg;
>>> 	u32 epr;
>>> 	u32 crit_save;
>>> +	/* guest debug registers*/
>>> 	struct debug_reg dbg_reg;
>>> +	/* shadow debug registers */
>> 
>> Please be more verbose here. What exactly does this contain? Why do we need
>> shadow and non-shadow registers? The comment as it is reads like
>> 
>>  /* Add one plus one */
>>  x = 1 + 1;
> 
> 
> /*
> * Shadow debug registers hold the debug register content
> * to be written in h/w debug register on behalf of guest
> * written value or user space written value.
> */

/* hardware visible debug registers when in guest state */

> 
> 
>> 
>>> +	struct debug_reg shadow_dbg_reg;
>>> #endif
>>> 	gpa_t paddr_accessed;
>>> 	gva_t vaddr_accessed;
>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h
>>> b/arch/powerpc/include/uapi/asm/kvm.h
>>> index ded0607..f5077c2 100644
>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>>> @@ -27,6 +27,7 @@
>>> #define __KVM_HAVE_PPC_SMT
>>> #define __KVM_HAVE_IRQCHIP
>>> #define __KVM_HAVE_IRQ_LINE
>>> +#define __KVM_HAVE_GUEST_DEBUG
>>> 
>>> struct kvm_regs {
>>> 	__u64 pc;
>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index
>>> ef99536..6a44ad4 100644
>>> --- a/arch/powerpc/kvm/booke.c
>>> +++ b/arch/powerpc/kvm/booke.c
>>> @@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu
>>> *vcpu) #endif }
>>> 
>>> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) {
>>> +	/* Synchronize guest's desire to get debug interrupts into shadow
>>> +MSR */ #ifndef CONFIG_KVM_BOOKE_HV
>>> +	vcpu->arch.shadow_msr &= ~MSR_DE;
>>> +	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; #endif
>>> +
>>> +	/* Force enable debug interrupts when user space wants to debug */
>>> +	if (vcpu->guest_debug) {
>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>> +		/*
>>> +		 * Since there is no shadow MSR, sync MSR_DE into the guest
>>> +		 * visible MSR.
>>> +		 */
>>> +		vcpu->arch.shared->msr |= MSR_DE;
>>> +#else
>>> +		vcpu->arch.shadow_msr |= MSR_DE;
>>> +		vcpu->arch.shared->msr &= ~MSR_DE;
>>> +#endif
>>> +	}
>>> +}
>>> +
>>> /*
>>> * Helper function for "full" MSR writes.  No need to call this if
>>> only
>>> * EE/CE/ME/DE/RI are changing.
>>> @@ -150,6 +173,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
>>> 	kvmppc_mmu_msr_notify(vcpu, old_msr);
>>> 	kvmppc_vcpu_sync_spe(vcpu);
>>> 	kvmppc_vcpu_sync_fpu(vcpu);
>>> +	kvmppc_vcpu_sync_debug(vcpu);
>>> }
>>> 
>>> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@
>>> -655,6 +679,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
>>> int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) {
>>> 	int ret, s;
>>> +	struct debug_reg debug;
>>> #ifdef CONFIG_PPC_FPU
>>> 	unsigned int fpscr;
>>> 	int fpexc_mode;
>>> @@ -699,11 +724,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct
>> kvm_vcpu *vcpu)
>>> 	kvmppc_load_guest_fp(vcpu);
>>> #endif
>>> 
>>> +	/*
>>> +	 * Clear current->thread.dbcr0 so that kernel does not
>>> +	 * restore h/w registers on context switch in vcpu running state.
>>> +	 */
>> 
>> Incorrect comment?
> 
> Leftover from previous code, will remove this.
> 
>> 
>>> +	/* Save thread->debug context on stack */
>> 
>> /* Switch to guest debug context */
>> 
>>> +	memcpy(&debug, &current->thread.debug, sizeof(struct debug_reg));
>> 
>> debug = current->thread.debug;
>> 
>>> +	/* Load vcpu debug context in thread->debug */
>>> +	memcpy(&current->thread.debug, &vcpu->arch.shadow_dbg_reg,
>>> +	       sizeof(struct debug_reg));
>> 
>> current->thread.debug = vcpu->arch.shadow_dbg_reg;
>> 
>>> +
>>> +	switch_booke_debug_regs(&current->thread);
>>> +
>>> 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
>>> 
>>> 	/* No need for kvm_guest_exit. It's done in handle_exit.
>>> 	   We also get here with interrupts enabled. */
>>> 
>>> +	/* Restore userspace context in thread->dbcr0 from stack*/
>> 
>> /* Switch back to user space debug context */
>> 
>>> +	memcpy(&current->thread.debug, &debug, sizeof(struct debug_reg));
>> 
>> current->thread.debug = debug;
>> 
>>> +	switch_booke_debug_regs(&current->thread);
>>> +
>>> #ifdef CONFIG_PPC_FPU
>>> 	kvmppc_save_guest_fp(vcpu);
>>> 
>>> @@ -759,6 +800,36 @@ static int emulation_exit(struct kvm_run *run, struct
>> kvm_vcpu *vcpu)
>>> 	}
>>> }
>>> 
>>> +/*
>>> + * Currently we do not support debug resource emulation to guest,
>>> + * so always exit to user space irrespective of user space is
>>> + * expecting the debug exception or not. This is unexpected event
>>> + * and let us leave the action on user space.
>> 
>> Please rework your wording.
>> 
>>> + */
>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu
>>> +*vcpu) {
>>> +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>> +	u32 dbsr = vcpu->arch.dbsr;
>>> +
>>> +	run->debug.arch.status = 0;
>>> +	run->debug.arch.address = vcpu->arch.pc;
>>> +
>>> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
>>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
>>> +	} else {
>>> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
>>> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
>>> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
>>> +			run->debug.arch.address = dbg_reg->dac1;
>>> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
>>> +			run->debug.arch.address = dbg_reg->dac2;
>>> +	}
>>> +
>>> +	return RESUME_HOST;
>>> +}
>>> +
>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) {
>>> 	ulong r1, ip, msr, lr;
>>> @@ -819,6 +890,11 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu
>> *vcpu,
>>> 	case BOOKE_INTERRUPT_CRITICAL:
>>> 		unknown_exception(&regs);
>>> 		break;
>>> +	case BOOKE_INTERRUPT_DEBUG:
>>> +		/* Save DBSR before preemption is enabled */
>>> +		vcpu->arch.dbsr = mfspr(SPRN_DBSR);
>>> +		kvmppc_clear_dbsr();
>>> +		break;
>>> 	}
>>> }
>>> 
>>> @@ -1118,18 +1194,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
>> kvm_vcpu *vcpu,
>>> 	}
>>> 
>>> 	case BOOKE_INTERRUPT_DEBUG: {
>>> -		u32 dbsr;
>>> -
>>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
>>> -
>>> -		/* clear IAC events in DBSR register */
>>> -		dbsr = mfspr(SPRN_DBSR);
>>> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
>>> -		mtspr(SPRN_DBSR, dbsr);
>>> -
>>> -		run->exit_reason = KVM_EXIT_DEBUG;
>>> +		r = kvmppc_handle_debug(run, vcpu);
>>> +		if (r == RESUME_HOST)
>>> +			run->exit_reason = KVM_EXIT_DEBUG;
>>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
>>> -		r = RESUME_HOST;
>>> 		break;
>>> 	}
>>> 
>>> @@ -1180,7 +1248,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>>> 	kvmppc_set_msr(vcpu, 0);
>>> 
>>> #ifndef CONFIG_KVM_BOOKE_HV
>>> -	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
>>> +	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
>>> 	vcpu->arch.shadow_pid = 1;
>>> 	vcpu->arch.shared->msr = 0;
>>> #endif
>>> @@ -1557,12 +1625,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
>> struct kvm_one_reg *reg)
>>> 	return r;
>>> }
>>> 
>>> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>> -					 struct kvm_guest_debug *dbg)
>>> -{
>>> -	return -EINVAL;
>>> -}
>>> -
>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu
>>> *fpu) {
>>> 	return -ENOTSUPP;
>>> @@ -1668,6 +1730,145 @@ void kvmppc_decrementer_func(unsigned long data)
>>> 	kvmppc_set_tsr_bits(vcpu, TSR_DIS);
>>> }
>>> 
>>> +static int kvmppc_booke_set_breakpoint(struct kvm_vcpu *vcpu,
>>> +uint64_t addr,
>> 
>> s/set/add/
>> 
>>> +				       int index)
>>> +{
>>> +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>> +
>>> +	switch (index) {
>>> +	case 0:
>>> +		dbg_reg->dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
>>> +		dbg_reg->iac1 = addr;
>>> +		break;
>>> +	case 1:
>>> +		dbg_reg->dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
>>> +		dbg_reg->iac2 = addr;
>>> +		break;
>>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
>>> +	case 2:
>>> +		dbg_reg->dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
>>> +		dbg_reg->iac3 = addr;
>>> +		break;
>>> +	case 3:
>>> +		dbg_reg->dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
>>> +		dbg_reg->iac4 = addr;
>>> +		break;
>>> +#endif
>>> +	default:
>>> +		return -EINVAL;
>>> +	}
>> 
>>    dbg_reg->dbcr0 |= DBCR0_IDM;
>> 
>>> +	return 0;
>>> +}
>>> +
>>> +static int kvmppc_booke_set_watchpoint(struct kvm_vcpu *vcpu,
>>> +uint64_t addr,
>> 
>> s/set/add/
>> 
>>> +				       int type, int index)
>>> +{
>>> +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>> +
>>> +	switch (index) {
>>> +	case 0:
>>> +		if (type & KVMPPC_DEBUG_WATCH_READ)
>>> +			dbg_reg->dbcr0 |= DBCR0_DAC1R | DBCR0_IDM;
>>> +		if (type & KVMPPC_DEBUG_WATCH_WRITE)
>>> +			dbg_reg->dbcr0 |= DBCR0_DAC1W | DBCR0_IDM;
>>> +		dbg_reg->dac1 = addr;
>>> +		break;
>>> +	case 1:
>>> +		if (type & KVMPPC_DEBUG_WATCH_READ)
>>> +			dbg_reg->dbcr0 |= DBCR0_DAC2R | DBCR0_IDM;
>>> +		if (type & KVMPPC_DEBUG_WATCH_WRITE)
>>> +			dbg_reg->dbcr0 |= DBCR0_DAC2W | DBCR0_IDM;
>>> +		dbg_reg->dac2 = addr;
>>> +		break;
>>> +	default:
>>> +		return -EINVAL;
>>> +	}
>> 
>>    dbg_reg->dbcr0 |= DBCR0_IDM;
>> 
>>> +	return 0;
>>> +}
>>> +
>>> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>> +					 struct kvm_guest_debug *dbg)
>>> +{
>>> +	struct debug_reg *dbg_reg;
>>> +	int n, b = 0, w = 0;
>>> +
>>> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
>>> +		/* Clear All debug events */
>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>> +		vcpu->guest_debug = 0;
>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>> +		/*
>>> +		 * When user space is not using the debug resources
>>> +		 * then allow guest to change the MSR.DE.
>>> +		 */
>>> +		vcpu->arch.shadow_msrp &= ~MSRP_DEP; #endif
>> 
>> guest_may_set_msr_de(vcpu, true);
>> 
>>> +		return 0;
>>> +	}
>>> +
>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>> +	/*
>>> +	 * When user space is using the debug resource then
>>> +	 * do not allow guest to change the MSR.DE.
>>> +	 */
>>> +	vcpu->arch.shadow_msrp |= MSRP_DEP;
>>> +#endif
>> 
>> guest_may_set_msr_de(vcpu, false);
>> 
>>> +	vcpu->guest_debug = dbg->control;
>>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>> +	/* Set DBCR0_EDM in guest visible DBCR0 register. */
>>> +	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
>>> +
>>> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
>>> +
>>> +	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
>>> +		return 0;
>>> +
>>> +	/* Code below handles only HW breakpoints */
>>> +	dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>> +
>>> +	/*
>>> +	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
>>> +	 * to occur when MSR.PR is set.
>>> +	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
>>> +	 * should clear DBCR1 and DBCR2. And EPCR.DUVD is used to control
>>> +	 * that debug events will not come in hypervisor (GS = 0).
> 
> Will rework the above comment as discussed.
> 
>> 
>> This is still wrong. We want to trap in PR=1. It's what PR KVM does, it's what
>> TCG does. There is no point in making HV KVM behave differently.
> 
> This comment is not valid.
> 
>> 
>>> +	 */
>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>> +	dbg_reg->dbcr1 = 0;
>>> +	dbg_reg->dbcr2 = 0;
>>> +#else
>>> +	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
>>> +			  DBCR1_IAC4US;
>>> +	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #endif
>>> +
>>> +	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
>>> +		uint64_t addr = dbg->arch.bp[n].addr;
>>> +		uint32_t type = dbg->arch.bp[n].type;
>>> +
>>> +		if (type == KVMPPC_DEBUG_NONE)
>>> +			continue;
>>> +
>>> +		if (type & !(KVMPPC_DEBUG_WATCH_READ |
>>> +			     KVMPPC_DEBUG_WATCH_WRITE |
>>> +			     KVMPPC_DEBUG_BREAKPOINT))
>>> +			return -EINVAL;
>>> +
>>> +		if (type & KVMPPC_DEBUG_BREAKPOINT) {
>>> +			/* Setting H/W breakpoint */
>>> +			if (kvmppc_booke_set_breakpoint(vcpu, addr, b++))
>> 
>> Please pass dbg_reg into the function
>> 
>>> +				return -EINVAL;
>>> +		} else {
>>> +			/* Setting H/W watchpoint */
>>> +			if (kvmppc_booke_set_watchpoint(vcpu, addr, type, w++))
>> 
>> here too
>> 
>>> +				return -EINVAL;
>>> +		}
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) {
>>> 	vcpu->cpu = smp_processor_id();
>>> @@ -1678,6 +1879,11 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu
>>> *vcpu) {
>>> 	current->thread.kvm_vcpu = NULL;
>>> 	vcpu->cpu = -1;
>>> +
>>> +	/* Disable all debug events */
>>> +	mtspr(SPRN_DBCR0, 0x0);
>> 
>> Why? Wouldn't normal preemption handling take care of this already?
> 
> Yes, normal preemption will take care. On vcpu_put we do not clear EPCR.DUVD and DBCR1/2.
> So debug event will not be taken in host.
> 
> So yes, it is not needed here.
> 
>> 
>>> +	/* Clear pending debug event in DBSR */
>>> +	kvmppc_clear_dbsr();
>> 
>> Is there a chance we will ever have to do this? On debug exits from guest
>> context we already clear dbsr.
> 
> DBSR can have events captured but not delivered as interrupt if MSR.DE is clear. I know with qemu debug stub we do not allow msr.de to be cleared by guest.

I see. In that case we'd have to save the original DBSR value into vcpu->arch though, no? Otherwise we might lose guest debug events on preemption if we simply clear it.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan May 11, 2013, 10:24 a.m. UTC | #4
> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Friday, May 10, 2013 11:14 PM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421;
> tiejun.chen@windriver.com
> Subject: Re: [PATCH] KVM: PPC: Add userspace debug stub support
> 
> 
> On 10.05.2013, at 19:31, Bhushan Bharat-R65777 wrote:
> 
> >
> >
> >> -----Original Message-----
> >> From: Alexander Graf [mailto:agraf@suse.de]
> >> Sent: Friday, May 10, 2013 3:48 PM
> >> To: Bhushan Bharat-R65777
> >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421;
> >> tiejun.chen@windriver.com; Bhushan Bharat-R65777
> >> Subject: Re: [PATCH] KVM: PPC: Add userspace debug stub support
> >>
> >>
> >> On 07.05.2013, at 11:40, Bharat Bhushan wrote:
> >>
> >>> This patch adds the debug stub support on booke/bookehv.
> >>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
> >>> breakpoint to debug guest.
> >>>
> >>> This is how we save/restore debug register context when switching
> >>> between guest, userspace and kernel user-process:
> >>>
> >>> When QEMU is running
> >>> -> thread->debug_reg == QEMU debug register context.
> >>> -> Kernel will handle switching the debug register on context switch.
> >>> -> no vcpu_load() called
> >>>
> >>> QEMU makes ioctls (except RUN)
> >>> -> This will call vcpu_load()
> >>> -> should not change context.
> >>> -> Some ioctls can change vcpu debug register, context saved in
> >>> -> vcpu->debug_regs
> >>>
> >>> QEMU Makes RUN ioctl
> >>> -> Save thread->debug_reg on STACK
> >>> -> Store thread->debug_reg == vcpu->debug_reg load thread->debug_reg
> >>> -> RUN VCPU ( So thread points to vcpu context )
> >>>
> >>> Context switch happens When VCPU running
> >>> -> makes vcpu_load() should not load any context kernel loads the
> >>> -> vcpu context as thread->debug_regs points to vcpu context.
> >>>
> >>> On heavyweight_exit
> >>> -> Load the context saved on stack in thread->debug_reg
> >>>
> >>> Currently we do not support debug resource emulation to guest, On
> >>> debug exception, always exit to user space irrespective of user
> >>> space is expecting the debug exception or not. If this is unexpected
> >>> exception (breakpoint/watchpoint event not set by
> >>> userspace) then let us leave the action on user space. This is
> >>> similar to what it was before, only thing is that now we have proper
> >>> exit state available to user space.
> >>>
> >>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> >>> ---
> >>> arch/powerpc/include/asm/kvm_host.h |    3 +
> >>> arch/powerpc/include/uapi/asm/kvm.h |    1 +
> >>> arch/powerpc/kvm/booke.c            |  242 ++++++++++++++++++++++++++++++++-
> --
> >>> arch/powerpc/kvm/booke.h            |    5 +
> >>> 4 files changed, 233 insertions(+), 18 deletions(-)
> >>>
> >>> diff --git a/arch/powerpc/include/asm/kvm_host.h
> >>> b/arch/powerpc/include/asm/kvm_host.h
> >>> index 838a577..1b29945 100644
> >>> --- a/arch/powerpc/include/asm/kvm_host.h
> >>> +++ b/arch/powerpc/include/asm/kvm_host.h
> >>> @@ -524,7 +524,10 @@ struct kvm_vcpu_arch {
> >>> 	u32 eptcfg;
> >>> 	u32 epr;
> >>> 	u32 crit_save;
> >>> +	/* guest debug registers*/
> >>> 	struct debug_reg dbg_reg;
> >>> +	/* shadow debug registers */
> >>
> >> Please be more verbose here. What exactly does this contain? Why do
> >> we need shadow and non-shadow registers? The comment as it is reads
> >> like
> >>
> >>  /* Add one plus one */
> >>  x = 1 + 1;
> >
> >
> > /*
> > * Shadow debug registers hold the debug register content
> > * to be written in h/w debug register on behalf of guest
> > * written value or user space written value.
> > */
> 
> /* hardware visible debug registers when in guest state */
> 
> >
> >
> >>
> >>> +	struct debug_reg shadow_dbg_reg;
> >>> #endif
> >>> 	gpa_t paddr_accessed;
> >>> 	gva_t vaddr_accessed;
> >>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h
> >>> b/arch/powerpc/include/uapi/asm/kvm.h
> >>> index ded0607..f5077c2 100644
> >>> --- a/arch/powerpc/include/uapi/asm/kvm.h
> >>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> >>> @@ -27,6 +27,7 @@
> >>> #define __KVM_HAVE_PPC_SMT
> >>> #define __KVM_HAVE_IRQCHIP
> >>> #define __KVM_HAVE_IRQ_LINE
> >>> +#define __KVM_HAVE_GUEST_DEBUG
> >>>
> >>> struct kvm_regs {
> >>> 	__u64 pc;
> >>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> >>> index
> >>> ef99536..6a44ad4 100644
> >>> --- a/arch/powerpc/kvm/booke.c
> >>> +++ b/arch/powerpc/kvm/booke.c
> >>> @@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct
> >>> kvm_vcpu
> >>> *vcpu) #endif }
> >>>
> >>> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) {
> >>> +	/* Synchronize guest's desire to get debug interrupts into shadow
> >>> +MSR */ #ifndef CONFIG_KVM_BOOKE_HV
> >>> +	vcpu->arch.shadow_msr &= ~MSR_DE;
> >>> +	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; #endif
> >>> +
> >>> +	/* Force enable debug interrupts when user space wants to debug */
> >>> +	if (vcpu->guest_debug) {
> >>> +#ifdef CONFIG_KVM_BOOKE_HV
> >>> +		/*
> >>> +		 * Since there is no shadow MSR, sync MSR_DE into the guest
> >>> +		 * visible MSR.
> >>> +		 */
> >>> +		vcpu->arch.shared->msr |= MSR_DE; #else
> >>> +		vcpu->arch.shadow_msr |= MSR_DE;
> >>> +		vcpu->arch.shared->msr &= ~MSR_DE; #endif
> >>> +	}
> >>> +}
> >>> +
> >>> /*
> >>> * Helper function for "full" MSR writes.  No need to call this if
> >>> only
> >>> * EE/CE/ME/DE/RI are changing.
> >>> @@ -150,6 +173,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
> >>> 	kvmppc_mmu_msr_notify(vcpu, old_msr);
> >>> 	kvmppc_vcpu_sync_spe(vcpu);
> >>> 	kvmppc_vcpu_sync_fpu(vcpu);
> >>> +	kvmppc_vcpu_sync_debug(vcpu);
> >>> }
> >>>
> >>> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@
> >>> -655,6 +679,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu
> >>> *vcpu) int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) {
> >>> 	int ret, s;
> >>> +	struct debug_reg debug;
> >>> #ifdef CONFIG_PPC_FPU
> >>> 	unsigned int fpscr;
> >>> 	int fpexc_mode;
> >>> @@ -699,11 +724,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run,
> >>> struct
> >> kvm_vcpu *vcpu)
> >>> 	kvmppc_load_guest_fp(vcpu);
> >>> #endif
> >>>
> >>> +	/*
> >>> +	 * Clear current->thread.dbcr0 so that kernel does not
> >>> +	 * restore h/w registers on context switch in vcpu running state.
> >>> +	 */
> >>
> >> Incorrect comment?
> >
> > Leftover from previous code, will remove this.
> >
> >>
> >>> +	/* Save thread->debug context on stack */
> >>
> >> /* Switch to guest debug context */
> >>
> >>> +	memcpy(&debug, &current->thread.debug, sizeof(struct debug_reg));
> >>
> >> debug = current->thread.debug;
> >>
> >>> +	/* Load vcpu debug context in thread->debug */
> >>> +	memcpy(&current->thread.debug, &vcpu->arch.shadow_dbg_reg,
> >>> +	       sizeof(struct debug_reg));
> >>
> >> current->thread.debug = vcpu->arch.shadow_dbg_reg;
> >>
> >>> +
> >>> +	switch_booke_debug_regs(&current->thread);
> >>> +
> >>> 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
> >>>
> >>> 	/* No need for kvm_guest_exit. It's done in handle_exit.
> >>> 	   We also get here with interrupts enabled. */
> >>>
> >>> +	/* Restore userspace context in thread->dbcr0 from stack*/
> >>
> >> /* Switch back to user space debug context */
> >>
> >>> +	memcpy(&current->thread.debug, &debug, sizeof(struct debug_reg));
> >>
> >> current->thread.debug = debug;
> >>
> >>> +	switch_booke_debug_regs(&current->thread);
> >>> +
> >>> #ifdef CONFIG_PPC_FPU
> >>> 	kvmppc_save_guest_fp(vcpu);
> >>>
> >>> @@ -759,6 +800,36 @@ static int emulation_exit(struct kvm_run *run,
> >>> struct
> >> kvm_vcpu *vcpu)
> >>> 	}
> >>> }
> >>>
> >>> +/*
> >>> + * Currently we do not support debug resource emulation to guest,
> >>> + * so always exit to user space irrespective of user space is
> >>> + * expecting the debug exception or not. This is unexpected event
> >>> + * and let us leave the action on user space.
> >>
> >> Please rework your wording.
> >>
> >>> + */
> >>> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu
> >>> +*vcpu) {
> >>> +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> >>> +	u32 dbsr = vcpu->arch.dbsr;
> >>> +
> >>> +	run->debug.arch.status = 0;
> >>> +	run->debug.arch.address = vcpu->arch.pc;
> >>> +
> >>> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> >>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> >>> +	} else {
> >>> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> >>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> >>> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> >>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> >>> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> >>> +			run->debug.arch.address = dbg_reg->dac1;
> >>> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> >>> +			run->debug.arch.address = dbg_reg->dac2;
> >>> +	}
> >>> +
> >>> +	return RESUME_HOST;
> >>> +}
> >>> +
> >>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) {
> >>> 	ulong r1, ip, msr, lr;
> >>> @@ -819,6 +890,11 @@ static void kvmppc_restart_interrupt(struct
> >>> kvm_vcpu
> >> *vcpu,
> >>> 	case BOOKE_INTERRUPT_CRITICAL:
> >>> 		unknown_exception(&regs);
> >>> 		break;
> >>> +	case BOOKE_INTERRUPT_DEBUG:
> >>> +		/* Save DBSR before preemption is enabled */
> >>> +		vcpu->arch.dbsr = mfspr(SPRN_DBSR);
> >>> +		kvmppc_clear_dbsr();
> >>> +		break;
> >>> 	}
> >>> }
> >>>
> >>> @@ -1118,18 +1194,10 @@ int kvmppc_handle_exit(struct kvm_run *run,
> >>> struct
> >> kvm_vcpu *vcpu,
> >>> 	}
> >>>
> >>> 	case BOOKE_INTERRUPT_DEBUG: {
> >>> -		u32 dbsr;
> >>> -
> >>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> >>> -
> >>> -		/* clear IAC events in DBSR register */
> >>> -		dbsr = mfspr(SPRN_DBSR);
> >>> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> >>> -		mtspr(SPRN_DBSR, dbsr);
> >>> -
> >>> -		run->exit_reason = KVM_EXIT_DEBUG;
> >>> +		r = kvmppc_handle_debug(run, vcpu);
> >>> +		if (r == RESUME_HOST)
> >>> +			run->exit_reason = KVM_EXIT_DEBUG;
> >>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> >>> -		r = RESUME_HOST;
> >>> 		break;
> >>> 	}
> >>>
> >>> @@ -1180,7 +1248,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> >>> 	kvmppc_set_msr(vcpu, 0);
> >>>
> >>> #ifndef CONFIG_KVM_BOOKE_HV
> >>> -	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
> >>> +	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
> >>> 	vcpu->arch.shadow_pid = 1;
> >>> 	vcpu->arch.shared->msr = 0;
> >>> #endif
> >>> @@ -1557,12 +1625,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct
> >>> kvm_vcpu *vcpu,
> >> struct kvm_one_reg *reg)
> >>> 	return r;
> >>> }
> >>>
> >>> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> >>> -					 struct kvm_guest_debug *dbg)
> >>> -{
> >>> -	return -EINVAL;
> >>> -}
> >>> -
> >>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
> >>> kvm_fpu
> >>> *fpu) {
> >>> 	return -ENOTSUPP;
> >>> @@ -1668,6 +1730,145 @@ void kvmppc_decrementer_func(unsigned long data)
> >>> 	kvmppc_set_tsr_bits(vcpu, TSR_DIS); }
> >>>
> >>> +static int kvmppc_booke_set_breakpoint(struct kvm_vcpu *vcpu,
> >>> +uint64_t addr,
> >>
> >> s/set/add/
> >>
> >>> +				       int index)
> >>> +{
> >>> +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> >>> +
> >>> +	switch (index) {
> >>> +	case 0:
> >>> +		dbg_reg->dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
> >>> +		dbg_reg->iac1 = addr;
> >>> +		break;
> >>> +	case 1:
> >>> +		dbg_reg->dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
> >>> +		dbg_reg->iac2 = addr;
> >>> +		break;
> >>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> >>> +	case 2:
> >>> +		dbg_reg->dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
> >>> +		dbg_reg->iac3 = addr;
> >>> +		break;
> >>> +	case 3:
> >>> +		dbg_reg->dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
> >>> +		dbg_reg->iac4 = addr;
> >>> +		break;
> >>> +#endif
> >>> +	default:
> >>> +		return -EINVAL;
> >>> +	}
> >>
> >>    dbg_reg->dbcr0 |= DBCR0_IDM;
> >>
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int kvmppc_booke_set_watchpoint(struct kvm_vcpu *vcpu,
> >>> +uint64_t addr,
> >>
> >> s/set/add/
> >>
> >>> +				       int type, int index)
> >>> +{
> >>> +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> >>> +
> >>> +	switch (index) {
> >>> +	case 0:
> >>> +		if (type & KVMPPC_DEBUG_WATCH_READ)
> >>> +			dbg_reg->dbcr0 |= DBCR0_DAC1R | DBCR0_IDM;
> >>> +		if (type & KVMPPC_DEBUG_WATCH_WRITE)
> >>> +			dbg_reg->dbcr0 |= DBCR0_DAC1W | DBCR0_IDM;
> >>> +		dbg_reg->dac1 = addr;
> >>> +		break;
> >>> +	case 1:
> >>> +		if (type & KVMPPC_DEBUG_WATCH_READ)
> >>> +			dbg_reg->dbcr0 |= DBCR0_DAC2R | DBCR0_IDM;
> >>> +		if (type & KVMPPC_DEBUG_WATCH_WRITE)
> >>> +			dbg_reg->dbcr0 |= DBCR0_DAC2W | DBCR0_IDM;
> >>> +		dbg_reg->dac2 = addr;
> >>> +		break;
> >>> +	default:
> >>> +		return -EINVAL;
> >>> +	}
> >>
> >>    dbg_reg->dbcr0 |= DBCR0_IDM;
> >>
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> >>> +					 struct kvm_guest_debug *dbg)
> >>> +{
> >>> +	struct debug_reg *dbg_reg;
> >>> +	int n, b = 0, w = 0;
> >>> +
> >>> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> >>> +		/* Clear All debug events */
> >>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>> +		vcpu->guest_debug = 0;
> >>> +#ifdef CONFIG_KVM_BOOKE_HV
> >>> +		/*
> >>> +		 * When user space is not using the debug resources
> >>> +		 * then allow guest to change the MSR.DE.
> >>> +		 */
> >>> +		vcpu->arch.shadow_msrp &= ~MSRP_DEP; #endif
> >>
> >> guest_may_set_msr_de(vcpu, true);
> >>
> >>> +		return 0;
> >>> +	}
> >>> +
> >>> +#ifdef CONFIG_KVM_BOOKE_HV
> >>> +	/*
> >>> +	 * When user space is using the debug resource then
> >>> +	 * do not allow guest to change the MSR.DE.
> >>> +	 */
> >>> +	vcpu->arch.shadow_msrp |= MSRP_DEP; #endif
> >>
> >> guest_may_set_msr_de(vcpu, false);
> >>
> >>> +	vcpu->guest_debug = dbg->control;
> >>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>> +	/* Set DBCR0_EDM in guest visible DBCR0 register. */
> >>> +	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
> >>> +
> >>> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> >>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> >>> +
> >>> +	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
> >>> +		return 0;
> >>> +
> >>> +	/* Code below handles only HW breakpoints */
> >>> +	dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> >>> +
> >>> +	/*
> >>> +	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
> >>> +	 * to occur when MSR.PR is set.
> >>> +	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
> >>> +	 * should clear DBCR1 and DBCR2. And EPCR.DUVD is used to control
> >>> +	 * that debug events will not come in hypervisor (GS = 0).
> >
> > Will rework the above comment as discussed.
> >
> >>
> >> This is still wrong. We want to trap in PR=1. It's what PR KVM does,
> >> it's what TCG does. There is no point in making HV KVM behave differently.
> >
> > This comment is not valid.
> >
> >>
> >>> +	 */
> >>> +#ifdef CONFIG_KVM_BOOKE_HV
> >>> +	dbg_reg->dbcr1 = 0;
> >>> +	dbg_reg->dbcr2 = 0;
> >>> +#else
> >>> +	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
> >>> +			  DBCR1_IAC4US;
> >>> +	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #endif
> >>> +
> >>> +	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
> >>> +		uint64_t addr = dbg->arch.bp[n].addr;
> >>> +		uint32_t type = dbg->arch.bp[n].type;
> >>> +
> >>> +		if (type == KVMPPC_DEBUG_NONE)
> >>> +			continue;
> >>> +
> >>> +		if (type & !(KVMPPC_DEBUG_WATCH_READ |
> >>> +			     KVMPPC_DEBUG_WATCH_WRITE |
> >>> +			     KVMPPC_DEBUG_BREAKPOINT))
> >>> +			return -EINVAL;
> >>> +
> >>> +		if (type & KVMPPC_DEBUG_BREAKPOINT) {
> >>> +			/* Setting H/W breakpoint */
> >>> +			if (kvmppc_booke_set_breakpoint(vcpu, addr, b++))
> >>
> >> Please pass dbg_reg into the function
> >>
> >>> +				return -EINVAL;
> >>> +		} else {
> >>> +			/* Setting H/W watchpoint */
> >>> +			if (kvmppc_booke_set_watchpoint(vcpu, addr, type, w++))
> >>
> >> here too
> >>
> >>> +				return -EINVAL;
> >>> +		}
> >>> +	}
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) {
> >>> 	vcpu->cpu = smp_processor_id();
> >>> @@ -1678,6 +1879,11 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu
> >>> *vcpu) {
> >>> 	current->thread.kvm_vcpu = NULL;
> >>> 	vcpu->cpu = -1;
> >>> +
> >>> +	/* Disable all debug events */
> >>> +	mtspr(SPRN_DBCR0, 0x0);
> >>
> >> Why? Wouldn't normal preemption handling take care of this already?
> >
> > Yes, normal preemption will take care. On vcpu_put we do not clear EPCR.DUVD
> and DBCR1/2.
> > So debug event will not be taken in host.
> >
> > So yes, it is not needed here.
> >
> >>
> >>> +	/* Clear pending debug event in DBSR */
> >>> +	kvmppc_clear_dbsr();
> >>
> >> Is there a chance we will ever have to do this? On debug exits from
> >> guest context we already clear dbsr.
> >
> > DBSR can have events captured but not delivered as interrupt if MSR.DE is
> clear. I know with qemu debug stub we do not allow msr.de to be cleared by
> guest.
> 
> I see. In that case we'd have to save the original DBSR value into vcpu->arch
> though, no? Otherwise we might lose guest debug events on preemption if we
> simply clear it.

We do not want to keep the delayed debug events as they does not carry much value. Example: taking h/w breakpoint at some other execution address.

-Bharat

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf May 11, 2013, 1:18 p.m. UTC | #5
Am 11.05.2013 um 12:24 schrieb Bhushan Bharat-R65777 <R65777@freescale.com>:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de]
>> Sent: Friday, May 10, 2013 11:14 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421;
>> tiejun.chen@windriver.com
>> Subject: Re: [PATCH] KVM: PPC: Add userspace debug stub support
>> 
>> 
>> On 10.05.2013, at 19:31, Bhushan Bharat-R65777 wrote:
>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: Alexander Graf [mailto:agraf@suse.de]
>>>> Sent: Friday, May 10, 2013 3:48 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421;
>>>> tiejun.chen@windriver.com; Bhushan Bharat-R65777
>>>> Subject: Re: [PATCH] KVM: PPC: Add userspace debug stub support
>>>> 
>>>> 
>>>> On 07.05.2013, at 11:40, Bharat Bhushan wrote:
>>>> 
>>>>> This patch adds the debug stub support on booke/bookehv.
>>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>>>> breakpoint to debug guest.
>>>>> 
>>>>> This is how we save/restore debug register context when switching
>>>>> between guest, userspace and kernel user-process:
>>>>> 
>>>>> When QEMU is running
>>>>> -> thread->debug_reg == QEMU debug register context.
>>>>> -> Kernel will handle switching the debug register on context switch.
>>>>> -> no vcpu_load() called
>>>>> 
>>>>> QEMU makes ioctls (except RUN)
>>>>> -> This will call vcpu_load()
>>>>> -> should not change context.
>>>>> -> Some ioctls can change vcpu debug register, context saved in
>>>>> -> vcpu->debug_regs
>>>>> 
>>>>> QEMU Makes RUN ioctl
>>>>> -> Save thread->debug_reg on STACK
>>>>> -> Store thread->debug_reg == vcpu->debug_reg load thread->debug_reg
>>>>> -> RUN VCPU ( So thread points to vcpu context )
>>>>> 
>>>>> Context switch happens When VCPU running
>>>>> -> makes vcpu_load() should not load any context kernel loads the
>>>>> -> vcpu context as thread->debug_regs points to vcpu context.
>>>>> 
>>>>> On heavyweight_exit
>>>>> -> Load the context saved on stack in thread->debug_reg
>>>>> 
>>>>> Currently we do not support debug resource emulation to guest, On
>>>>> debug exception, always exit to user space irrespective of user
>>>>> space is expecting the debug exception or not. If this is unexpected
>>>>> exception (breakpoint/watchpoint event not set by
>>>>> userspace) then let us leave the action on user space. This is
>>>>> similar to what it was before, only thing is that now we have proper
>>>>> exit state available to user space.
>>>>> 
>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>> ---
>>>>> arch/powerpc/include/asm/kvm_host.h |    3 +
>>>>> arch/powerpc/include/uapi/asm/kvm.h |    1 +
>>>>> arch/powerpc/kvm/booke.c            |  242 ++++++++++++++++++++++++++++++++-
>> --
>>>>> arch/powerpc/kvm/booke.h            |    5 +
>>>>> 4 files changed, 233 insertions(+), 18 deletions(-)
>>>>> 
>>>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>>>>> b/arch/powerpc/include/asm/kvm_host.h
>>>>> index 838a577..1b29945 100644
>>>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>>>> @@ -524,7 +524,10 @@ struct kvm_vcpu_arch {
>>>>>    u32 eptcfg;
>>>>>    u32 epr;
>>>>>    u32 crit_save;
>>>>> +    /* guest debug registers*/
>>>>>    struct debug_reg dbg_reg;
>>>>> +    /* shadow debug registers */
>>>> 
>>>> Please be more verbose here. What exactly does this contain? Why do
>>>> we need shadow and non-shadow registers? The comment as it is reads
>>>> like
>>>> 
>>>> /* Add one plus one */
>>>> x = 1 + 1;
>>> 
>>> 
>>> /*
>>> * Shadow debug registers hold the debug register content
>>> * to be written in h/w debug register on behalf of guest
>>> * written value or user space written value.
>>> */
>> 
>> /* hardware visible debug registers when in guest state */
>> 
>>> 
>>> 
>>>> 
>>>>> +    struct debug_reg shadow_dbg_reg;
>>>>> #endif
>>>>>    gpa_t paddr_accessed;
>>>>>    gva_t vaddr_accessed;
>>>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h
>>>>> b/arch/powerpc/include/uapi/asm/kvm.h
>>>>> index ded0607..f5077c2 100644
>>>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>>>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>>>>> @@ -27,6 +27,7 @@
>>>>> #define __KVM_HAVE_PPC_SMT
>>>>> #define __KVM_HAVE_IRQCHIP
>>>>> #define __KVM_HAVE_IRQ_LINE
>>>>> +#define __KVM_HAVE_GUEST_DEBUG
>>>>> 
>>>>> struct kvm_regs {
>>>>>    __u64 pc;
>>>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
>>>>> index
>>>>> ef99536..6a44ad4 100644
>>>>> --- a/arch/powerpc/kvm/booke.c
>>>>> +++ b/arch/powerpc/kvm/booke.c
>>>>> @@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct
>>>>> kvm_vcpu
>>>>> *vcpu) #endif }
>>>>> 
>>>>> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) {
>>>>> +    /* Synchronize guest's desire to get debug interrupts into shadow
>>>>> +MSR */ #ifndef CONFIG_KVM_BOOKE_HV
>>>>> +    vcpu->arch.shadow_msr &= ~MSR_DE;
>>>>> +    vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; #endif
>>>>> +
>>>>> +    /* Force enable debug interrupts when user space wants to debug */
>>>>> +    if (vcpu->guest_debug) {
>>>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>>>> +        /*
>>>>> +         * Since there is no shadow MSR, sync MSR_DE into the guest
>>>>> +         * visible MSR.
>>>>> +         */
>>>>> +        vcpu->arch.shared->msr |= MSR_DE; #else
>>>>> +        vcpu->arch.shadow_msr |= MSR_DE;
>>>>> +        vcpu->arch.shared->msr &= ~MSR_DE; #endif
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> /*
>>>>> * Helper function for "full" MSR writes.  No need to call this if
>>>>> only
>>>>> * EE/CE/ME/DE/RI are changing.
>>>>> @@ -150,6 +173,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
>>>>>    kvmppc_mmu_msr_notify(vcpu, old_msr);
>>>>>    kvmppc_vcpu_sync_spe(vcpu);
>>>>>    kvmppc_vcpu_sync_fpu(vcpu);
>>>>> +    kvmppc_vcpu_sync_debug(vcpu);
>>>>> }
>>>>> 
>>>>> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@
>>>>> -655,6 +679,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu
>>>>> *vcpu) int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) {
>>>>>    int ret, s;
>>>>> +    struct debug_reg debug;
>>>>> #ifdef CONFIG_PPC_FPU
>>>>>    unsigned int fpscr;
>>>>>    int fpexc_mode;
>>>>> @@ -699,11 +724,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run,
>>>>> struct
>>>> kvm_vcpu *vcpu)
>>>>>    kvmppc_load_guest_fp(vcpu);
>>>>> #endif
>>>>> 
>>>>> +    /*
>>>>> +     * Clear current->thread.dbcr0 so that kernel does not
>>>>> +     * restore h/w registers on context switch in vcpu running state.
>>>>> +     */
>>>> 
>>>> Incorrect comment?
>>> 
>>> Leftover from previous code, will remove this.
>>> 
>>>> 
>>>>> +    /* Save thread->debug context on stack */
>>>> 
>>>> /* Switch to guest debug context */
>>>> 
>>>>> +    memcpy(&debug, &current->thread.debug, sizeof(struct debug_reg));
>>>> 
>>>> debug = current->thread.debug;
>>>> 
>>>>> +    /* Load vcpu debug context in thread->debug */
>>>>> +    memcpy(&current->thread.debug, &vcpu->arch.shadow_dbg_reg,
>>>>> +           sizeof(struct debug_reg));
>>>> 
>>>> current->thread.debug = vcpu->arch.shadow_dbg_reg;
>>>> 
>>>>> +
>>>>> +    switch_booke_debug_regs(&current->thread);
>>>>> +
>>>>>    ret = __kvmppc_vcpu_run(kvm_run, vcpu);
>>>>> 
>>>>>    /* No need for kvm_guest_exit. It's done in handle_exit.
>>>>>       We also get here with interrupts enabled. */
>>>>> 
>>>>> +    /* Restore userspace context in thread->dbcr0 from stack*/
>>>> 
>>>> /* Switch back to user space debug context */
>>>> 
>>>>> +    memcpy(&current->thread.debug, &debug, sizeof(struct debug_reg));
>>>> 
>>>> current->thread.debug = debug;
>>>> 
>>>>> +    switch_booke_debug_regs(&current->thread);
>>>>> +
>>>>> #ifdef CONFIG_PPC_FPU
>>>>>    kvmppc_save_guest_fp(vcpu);
>>>>> 
>>>>> @@ -759,6 +800,36 @@ static int emulation_exit(struct kvm_run *run,
>>>>> struct
>>>> kvm_vcpu *vcpu)
>>>>>    }
>>>>> }
>>>>> 
>>>>> +/*
>>>>> + * Currently we do not support debug resource emulation to guest,
>>>>> + * so always exit to user space irrespective of user space is
>>>>> + * expecting the debug exception or not. This is unexpected event
>>>>> + * and let us leave the action on user space.
>>>> 
>>>> Please rework your wording.
>>>> 
>>>>> + */
>>>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu
>>>>> +*vcpu) {
>>>>> +    struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>>>> +    u32 dbsr = vcpu->arch.dbsr;
>>>>> +
>>>>> +    run->debug.arch.status = 0;
>>>>> +    run->debug.arch.address = vcpu->arch.pc;
>>>>> +
>>>>> +    if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
>>>>> +        run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
>>>>> +    } else {
>>>>> +        if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
>>>>> +            run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
>>>>> +        else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
>>>>> +            run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
>>>>> +        if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
>>>>> +            run->debug.arch.address = dbg_reg->dac1;
>>>>> +        else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
>>>>> +            run->debug.arch.address = dbg_reg->dac2;
>>>>> +    }
>>>>> +
>>>>> +    return RESUME_HOST;
>>>>> +}
>>>>> +
>>>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) {
>>>>>    ulong r1, ip, msr, lr;
>>>>> @@ -819,6 +890,11 @@ static void kvmppc_restart_interrupt(struct
>>>>> kvm_vcpu
>>>> *vcpu,
>>>>>    case BOOKE_INTERRUPT_CRITICAL:
>>>>>        unknown_exception(&regs);
>>>>>        break;
>>>>> +    case BOOKE_INTERRUPT_DEBUG:
>>>>> +        /* Save DBSR before preemption is enabled */
>>>>> +        vcpu->arch.dbsr = mfspr(SPRN_DBSR);
>>>>> +        kvmppc_clear_dbsr();
>>>>> +        break;
>>>>>    }
>>>>> }
>>>>> 
>>>>> @@ -1118,18 +1194,10 @@ int kvmppc_handle_exit(struct kvm_run *run,
>>>>> struct
>>>> kvm_vcpu *vcpu,
>>>>>    }
>>>>> 
>>>>>    case BOOKE_INTERRUPT_DEBUG: {
>>>>> -        u32 dbsr;
>>>>> -
>>>>> -        vcpu->arch.pc = mfspr(SPRN_CSRR0);
>>>>> -
>>>>> -        /* clear IAC events in DBSR register */
>>>>> -        dbsr = mfspr(SPRN_DBSR);
>>>>> -        dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
>>>>> -        mtspr(SPRN_DBSR, dbsr);
>>>>> -
>>>>> -        run->exit_reason = KVM_EXIT_DEBUG;
>>>>> +        r = kvmppc_handle_debug(run, vcpu);
>>>>> +        if (r == RESUME_HOST)
>>>>> +            run->exit_reason = KVM_EXIT_DEBUG;
>>>>>        kvmppc_account_exit(vcpu, DEBUG_EXITS);
>>>>> -        r = RESUME_HOST;
>>>>>        break;
>>>>>    }
>>>>> 
>>>>> @@ -1180,7 +1248,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>>>>>    kvmppc_set_msr(vcpu, 0);
>>>>> 
>>>>> #ifndef CONFIG_KVM_BOOKE_HV
>>>>> -    vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
>>>>> +    vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
>>>>>    vcpu->arch.shadow_pid = 1;
>>>>>    vcpu->arch.shared->msr = 0;
>>>>> #endif
>>>>> @@ -1557,12 +1625,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct
>>>>> kvm_vcpu *vcpu,
>>>> struct kvm_one_reg *reg)
>>>>>    return r;
>>>>> }
>>>>> 
>>>>> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>>>> -                     struct kvm_guest_debug *dbg)
>>>>> -{
>>>>> -    return -EINVAL;
>>>>> -}
>>>>> -
>>>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
>>>>> kvm_fpu
>>>>> *fpu) {
>>>>>    return -ENOTSUPP;
>>>>> @@ -1668,6 +1730,145 @@ void kvmppc_decrementer_func(unsigned long data)
>>>>>    kvmppc_set_tsr_bits(vcpu, TSR_DIS); }
>>>>> 
>>>>> +static int kvmppc_booke_set_breakpoint(struct kvm_vcpu *vcpu,
>>>>> +uint64_t addr,
>>>> 
>>>> s/set/add/
>>>> 
>>>>> +                       int index)
>>>>> +{
>>>>> +    struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>>>> +
>>>>> +    switch (index) {
>>>>> +    case 0:
>>>>> +        dbg_reg->dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
>>>>> +        dbg_reg->iac1 = addr;
>>>>> +        break;
>>>>> +    case 1:
>>>>> +        dbg_reg->dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
>>>>> +        dbg_reg->iac2 = addr;
>>>>> +        break;
>>>>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
>>>>> +    case 2:
>>>>> +        dbg_reg->dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
>>>>> +        dbg_reg->iac3 = addr;
>>>>> +        break;
>>>>> +    case 3:
>>>>> +        dbg_reg->dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
>>>>> +        dbg_reg->iac4 = addr;
>>>>> +        break;
>>>>> +#endif
>>>>> +    default:
>>>>> +        return -EINVAL;
>>>>> +    }
>>>> 
>>>>   dbg_reg->dbcr0 |= DBCR0_IDM;
>>>> 
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static int kvmppc_booke_set_watchpoint(struct kvm_vcpu *vcpu,
>>>>> +uint64_t addr,
>>>> 
>>>> s/set/add/
>>>> 
>>>>> +                       int type, int index)
>>>>> +{
>>>>> +    struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>>>> +
>>>>> +    switch (index) {
>>>>> +    case 0:
>>>>> +        if (type & KVMPPC_DEBUG_WATCH_READ)
>>>>> +            dbg_reg->dbcr0 |= DBCR0_DAC1R | DBCR0_IDM;
>>>>> +        if (type & KVMPPC_DEBUG_WATCH_WRITE)
>>>>> +            dbg_reg->dbcr0 |= DBCR0_DAC1W | DBCR0_IDM;
>>>>> +        dbg_reg->dac1 = addr;
>>>>> +        break;
>>>>> +    case 1:
>>>>> +        if (type & KVMPPC_DEBUG_WATCH_READ)
>>>>> +            dbg_reg->dbcr0 |= DBCR0_DAC2R | DBCR0_IDM;
>>>>> +        if (type & KVMPPC_DEBUG_WATCH_WRITE)
>>>>> +            dbg_reg->dbcr0 |= DBCR0_DAC2W | DBCR0_IDM;
>>>>> +        dbg_reg->dac2 = addr;
>>>>> +        break;
>>>>> +    default:
>>>>> +        return -EINVAL;
>>>>> +    }
>>>> 
>>>>   dbg_reg->dbcr0 |= DBCR0_IDM;
>>>> 
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>>>> +                     struct kvm_guest_debug *dbg)
>>>>> +{
>>>>> +    struct debug_reg *dbg_reg;
>>>>> +    int n, b = 0, w = 0;
>>>>> +
>>>>> +    if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
>>>>> +        /* Clear All debug events */
>>>>> +        vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>>>> +        vcpu->guest_debug = 0;
>>>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>>>> +        /*
>>>>> +         * When user space is not using the debug resources
>>>>> +         * then allow guest to change the MSR.DE.
>>>>> +         */
>>>>> +        vcpu->arch.shadow_msrp &= ~MSRP_DEP; #endif
>>>> 
>>>> guest_may_set_msr_de(vcpu, true);
>>>> 
>>>>> +        return 0;
>>>>> +    }
>>>>> +
>>>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>>>> +    /*
>>>>> +     * When user space is using the debug resource then
>>>>> +     * do not allow guest to change the MSR.DE.
>>>>> +     */
>>>>> +    vcpu->arch.shadow_msrp |= MSRP_DEP; #endif
>>>> 
>>>> guest_may_set_msr_de(vcpu, false);
>>>> 
>>>>> +    vcpu->guest_debug = dbg->control;
>>>>> +    vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>>>> +    /* Set DBCR0_EDM in guest visible DBCR0 register. */
>>>>> +    vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
>>>>> +
>>>>> +    if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
>>>>> +        vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
>>>>> +
>>>>> +    if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
>>>>> +        return 0;
>>>>> +
>>>>> +    /* Code below handles only HW breakpoints */
>>>>> +    dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>>>> +
>>>>> +    /*
>>>>> +     * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
>>>>> +     * to occur when MSR.PR is set.
>>>>> +     * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
>>>>> +     * should clear DBCR1 and DBCR2. And EPCR.DUVD is used to control
>>>>> +     * that debug events will not come in hypervisor (GS = 0).
>>> 
>>> Will rework the above comment as discussed.
>>> 
>>>> 
>>>> This is still wrong. We want to trap in PR=1. It's what PR KVM does,
>>>> it's what TCG does. There is no point in making HV KVM behave differently.
>>> 
>>> This comment is not valid.
>>> 
>>>> 
>>>>> +     */
>>>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>>>> +    dbg_reg->dbcr1 = 0;
>>>>> +    dbg_reg->dbcr2 = 0;
>>>>> +#else
>>>>> +    dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
>>>>> +              DBCR1_IAC4US;
>>>>> +    dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #endif
>>>>> +
>>>>> +    for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
>>>>> +        uint64_t addr = dbg->arch.bp[n].addr;
>>>>> +        uint32_t type = dbg->arch.bp[n].type;
>>>>> +
>>>>> +        if (type == KVMPPC_DEBUG_NONE)
>>>>> +            continue;
>>>>> +
>>>>> +        if (type & !(KVMPPC_DEBUG_WATCH_READ |
>>>>> +                 KVMPPC_DEBUG_WATCH_WRITE |
>>>>> +                 KVMPPC_DEBUG_BREAKPOINT))
>>>>> +            return -EINVAL;
>>>>> +
>>>>> +        if (type & KVMPPC_DEBUG_BREAKPOINT) {
>>>>> +            /* Setting H/W breakpoint */
>>>>> +            if (kvmppc_booke_set_breakpoint(vcpu, addr, b++))
>>>> 
>>>> Please pass dbg_reg into the function
>>>> 
>>>>> +                return -EINVAL;
>>>>> +        } else {
>>>>> +            /* Setting H/W watchpoint */
>>>>> +            if (kvmppc_booke_set_watchpoint(vcpu, addr, type, w++))
>>>> 
>>>> here too
>>>> 
>>>>> +                return -EINVAL;
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) {
>>>>>    vcpu->cpu = smp_processor_id();
>>>>> @@ -1678,6 +1879,11 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu
>>>>> *vcpu) {
>>>>>    current->thread.kvm_vcpu = NULL;
>>>>>    vcpu->cpu = -1;
>>>>> +
>>>>> +    /* Disable all debug events */
>>>>> +    mtspr(SPRN_DBCR0, 0x0);
>>>> 
>>>> Why? Wouldn't normal preemption handling take care of this already?
>>> 
>>> Yes, normal preemption will take care. On vcpu_put we do not clear EPCR.DUVD
>> and DBCR1/2.
>>> So debug event will not be taken in host.
>>> 
>>> So yes, it is not needed here.
>>> 
>>>> 
>>>>> +    /* Clear pending debug event in DBSR */
>>>>> +    kvmppc_clear_dbsr();
>>>> 
>>>> Is there a chance we will ever have to do this? On debug exits from
>>>> guest context we already clear dbsr.
>>> 
>>> DBSR can have events captured but not delivered as interrupt if MSR.DE is
>> clear. I know with qemu debug stub we do not allow msr.de to be cleared by
>> guest.
>> 
>> I see. In that case we'd have to save the original DBSR value into vcpu->arch
>> though, no? Otherwise we might lose guest debug events on preemption if we
>> simply clear it.
> 
> We do not want to keep the delayed debug events as they does not carry much value. Example: taking h/w breakpoint at some other execution address.

That's for the guest to decide, no? If the guest wants to poll debug events rather than receive interrupts, we should enable it to do so.

Alex

> 
> -Bharat
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood May 13, 2013, 2:33 p.m. UTC | #6
On 05/11/2013 08:18:50 AM, Alexander Graf wrote:
> 
> 
> Am 11.05.2013 um 12:24 schrieb Bhushan Bharat-R65777  
> <R65777@freescale.com>:
> > We do not want to keep the delayed debug events as they does not  
> carry much value. Example: taking h/w breakpoint at some other  
> execution address.
> 
> That's for the guest to decide, no?

No more so than with the various other things that don't get fully  
emulated.

> If the guest wants to poll debug events rather than receive  
> interrupts, we should enable it to do so.

Someone who cares can enable it to do so.  Note that the value of  
deferred debug events is so low that it got ripped out of the hardware  
starting with e6500.  Didn't we discuss this a while back?

-Scott
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 838a577..1b29945 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -524,7 +524,10 @@  struct kvm_vcpu_arch {
 	u32 eptcfg;
 	u32 epr;
 	u32 crit_save;
+	/* guest debug registers*/
 	struct debug_reg dbg_reg;
+	/* shadow debug registers */
+	struct debug_reg shadow_dbg_reg;
 #endif
 	gpa_t paddr_accessed;
 	gva_t vaddr_accessed;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index ded0607..f5077c2 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -27,6 +27,7 @@ 
 #define __KVM_HAVE_PPC_SMT
 #define __KVM_HAVE_IRQCHIP
 #define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_GUEST_DEBUG
 
 struct kvm_regs {
 	__u64 pc;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ef99536..6a44ad4 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -133,6 +133,29 @@  static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+	/* Synchronize guest's desire to get debug interrupts into shadow MSR */
+#ifndef CONFIG_KVM_BOOKE_HV
+	vcpu->arch.shadow_msr &= ~MSR_DE;
+	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE;
+#endif
+
+	/* Force enable debug interrupts when user space wants to debug */
+	if (vcpu->guest_debug) {
+#ifdef CONFIG_KVM_BOOKE_HV
+		/*
+		 * Since there is no shadow MSR, sync MSR_DE into the guest
+		 * visible MSR.
+		 */
+		vcpu->arch.shared->msr |= MSR_DE;
+#else
+		vcpu->arch.shadow_msr |= MSR_DE;
+		vcpu->arch.shared->msr &= ~MSR_DE;
+#endif
+	}
+}
+
 /*
  * Helper function for "full" MSR writes.  No need to call this if only
  * EE/CE/ME/DE/RI are changing.
@@ -150,6 +173,7 @@  void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 	kvmppc_mmu_msr_notify(vcpu, old_msr);
 	kvmppc_vcpu_sync_spe(vcpu);
 	kvmppc_vcpu_sync_fpu(vcpu);
+	kvmppc_vcpu_sync_debug(vcpu);
 }
 
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
@@ -655,6 +679,7 @@  int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int ret, s;
+	struct debug_reg debug;
 #ifdef CONFIG_PPC_FPU
 	unsigned int fpscr;
 	int fpexc_mode;
@@ -699,11 +724,27 @@  int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	kvmppc_load_guest_fp(vcpu);
 #endif
 
+	/*
+	 * Clear current->thread.dbcr0 so that kernel does not
+	 * restore h/w registers on context switch in vcpu running state.
+	 */
+	/* Save thread->debug context on stack */
+	memcpy(&debug, &current->thread.debug, sizeof(struct debug_reg));
+	/* Load vcpu debug context in thread->debug */
+	memcpy(&current->thread.debug, &vcpu->arch.shadow_dbg_reg,
+	       sizeof(struct debug_reg));
+
+	switch_booke_debug_regs(&current->thread);
+
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
 	/* No need for kvm_guest_exit. It's done in handle_exit.
 	   We also get here with interrupts enabled. */
 
+	/* Restore userspace context in thread->dbcr0 from stack*/
+	memcpy(&current->thread.debug, &debug, sizeof(struct debug_reg));
+	switch_booke_debug_regs(&current->thread);
+
 #ifdef CONFIG_PPC_FPU
 	kvmppc_save_guest_fp(vcpu);
 
@@ -759,6 +800,36 @@  static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 }
 
+/*
+ * Currently we do not support debug resource emulation to guest,
+ * so always exit to user space irrespective of user space is
+ * expecting the debug exception or not. This is unexpected event
+ * and let us leave the action on user space.
+ */
+static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+	u32 dbsr = vcpu->arch.dbsr;
+
+	run->debug.arch.status = 0;
+	run->debug.arch.address = vcpu->arch.pc;
+
+	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
+		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
+	} else {
+		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
+			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
+		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
+			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
+		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
+			run->debug.arch.address = dbg_reg->dac1;
+		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
+			run->debug.arch.address = dbg_reg->dac2;
+	}
+
+	return RESUME_HOST;
+}
+
 static void kvmppc_fill_pt_regs(struct pt_regs *regs)
 {
 	ulong r1, ip, msr, lr;
@@ -819,6 +890,11 @@  static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
 	case BOOKE_INTERRUPT_CRITICAL:
 		unknown_exception(&regs);
 		break;
+	case BOOKE_INTERRUPT_DEBUG:
+		/* Save DBSR before preemption is enabled */
+		vcpu->arch.dbsr = mfspr(SPRN_DBSR);
+		kvmppc_clear_dbsr();
+		break;
 	}
 }
 
@@ -1118,18 +1194,10 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	}
 
 	case BOOKE_INTERRUPT_DEBUG: {
-		u32 dbsr;
-
-		vcpu->arch.pc = mfspr(SPRN_CSRR0);
-
-		/* clear IAC events in DBSR register */
-		dbsr = mfspr(SPRN_DBSR);
-		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
-		mtspr(SPRN_DBSR, dbsr);
-
-		run->exit_reason = KVM_EXIT_DEBUG;
+		r = kvmppc_handle_debug(run, vcpu);
+		if (r == RESUME_HOST)
+			run->exit_reason = KVM_EXIT_DEBUG;
 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
-		r = RESUME_HOST;
 		break;
 	}
 
@@ -1180,7 +1248,7 @@  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	kvmppc_set_msr(vcpu, 0);
 
 #ifndef CONFIG_KVM_BOOKE_HV
-	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
+	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
 	vcpu->arch.shadow_pid = 1;
 	vcpu->arch.shared->msr = 0;
 #endif
@@ -1557,12 +1625,6 @@  int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	return r;
 }
 
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-					 struct kvm_guest_debug *dbg)
-{
-	return -EINVAL;
-}
-
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -ENOTSUPP;
@@ -1668,6 +1730,145 @@  void kvmppc_decrementer_func(unsigned long data)
 	kvmppc_set_tsr_bits(vcpu, TSR_DIS);
 }
 
+static int kvmppc_booke_set_breakpoint(struct kvm_vcpu *vcpu, uint64_t addr,
+				       int index)
+{
+	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+
+	switch (index) {
+	case 0:
+		dbg_reg->dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
+		dbg_reg->iac1 = addr;
+		break;
+	case 1:
+		dbg_reg->dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
+		dbg_reg->iac2 = addr;
+		break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	case 2:
+		dbg_reg->dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
+		dbg_reg->iac3 = addr;
+		break;
+	case 3:
+		dbg_reg->dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
+		dbg_reg->iac4 = addr;
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int kvmppc_booke_set_watchpoint(struct kvm_vcpu *vcpu, uint64_t addr,
+				       int type, int index)
+{
+	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+
+	switch (index) {
+	case 0:
+		if (type & KVMPPC_DEBUG_WATCH_READ)
+			dbg_reg->dbcr0 |= DBCR0_DAC1R | DBCR0_IDM;
+		if (type & KVMPPC_DEBUG_WATCH_WRITE)
+			dbg_reg->dbcr0 |= DBCR0_DAC1W | DBCR0_IDM;
+		dbg_reg->dac1 = addr;
+		break;
+	case 1:
+		if (type & KVMPPC_DEBUG_WATCH_READ)
+			dbg_reg->dbcr0 |= DBCR0_DAC2R | DBCR0_IDM;
+		if (type & KVMPPC_DEBUG_WATCH_WRITE)
+			dbg_reg->dbcr0 |= DBCR0_DAC2W | DBCR0_IDM;
+		dbg_reg->dac2 = addr;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					 struct kvm_guest_debug *dbg)
+{
+	struct debug_reg *dbg_reg;
+	int n, b = 0, w = 0;
+
+	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
+		/* Clear All debug events */
+		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+		vcpu->guest_debug = 0;
+#ifdef CONFIG_KVM_BOOKE_HV
+		/*
+		 * When user space is not using the debug resources
+		 * then allow guest to change the MSR.DE.
+		 */
+		vcpu->arch.shadow_msrp &= ~MSRP_DEP;
+#endif
+		return 0;
+	}
+
+#ifdef CONFIG_KVM_BOOKE_HV
+	/*
+	 * When user space is using the debug resource then
+	 * do not allow guest to change the MSR.DE.
+	 */
+	vcpu->arch.shadow_msrp |= MSRP_DEP;
+#endif
+	vcpu->guest_debug = dbg->control;
+	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+	/* Set DBCR0_EDM in guest visible DBCR0 register. */
+	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+
+	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+		return 0;
+
+	/* Code below handles only HW breakpoints */
+	dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+
+	/*
+	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
+	 * to occur when MSR.PR is set.
+	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
+	 * should clear DBCR1 and DBCR2. And EPCR.DUVD is used to control
+	 * that debug events will not come in hypervisor (GS = 0).
+	 */
+#ifdef CONFIG_KVM_BOOKE_HV
+	dbg_reg->dbcr1 = 0;
+	dbg_reg->dbcr2 = 0;
+#else
+	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
+			  DBCR1_IAC4US;
+	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+#endif
+
+	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
+		uint64_t addr = dbg->arch.bp[n].addr;
+		uint32_t type = dbg->arch.bp[n].type;
+
+		if (type == KVMPPC_DEBUG_NONE)
+			continue;
+
+		if (type & !(KVMPPC_DEBUG_WATCH_READ |
+			     KVMPPC_DEBUG_WATCH_WRITE |
+			     KVMPPC_DEBUG_BREAKPOINT))
+			return -EINVAL;
+
+		if (type & KVMPPC_DEBUG_BREAKPOINT) {
+			/* Setting H/W breakpoint */
+			if (kvmppc_booke_set_breakpoint(vcpu, addr, b++))
+				return -EINVAL;
+		} else {
+			/* Setting H/W watchpoint */
+			if (kvmppc_booke_set_watchpoint(vcpu, addr, type, w++))
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = smp_processor_id();
@@ -1678,6 +1879,11 @@  void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	current->thread.kvm_vcpu = NULL;
 	vcpu->cpu = -1;
+
+	/* Disable all debug events */
+	mtspr(SPRN_DBCR0, 0x0);
+	/* Clear pending debug event in DBSR */
+	kvmppc_clear_dbsr();
 }
 
 int __init kvmppc_booke_init(void)
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 5fd1ba6..a1ff67d 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -129,4 +129,9 @@  static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
 		giveup_fpu(current);
 #endif
 }
+
+static inline void kvmppc_clear_dbsr(void)
+{
+	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
+}
 #endif /* __KVM_BOOKE_H__ */