Patchwork [1/2] KVM: PPC: booke: Add watchdog emulation

login
register
mail settings
Submitter Bharat Bhushan
Date Aug. 3, 2012, 5:30 a.m.
Message ID <1343971845-17995-1-git-send-email-Bharat.Bhushan@freescale.com>
Download mbox | patch
Permalink /patch/174887/
State New
Headers show

Comments

Bharat Bhushan - Aug. 3, 2012, 5:30 a.m.
This patch adds the watchdog emulation in KVM. The watchdog
emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl.
The kernel timer are used for watchdog emulation and emulates
h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
it is configured.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
[bharat.bhushan@freescale.com: reworked patch]
Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
v7:
 - kvmppc_core_dequeue_watchdog() and kvmppc_core_enque_watchdog()
   are made staic
 - Clear the KVM_REQ_WATCHDOG request when TSR_ENW | ENW_WIS cleared
   rather than checking these bits on handling the request.
   Below changes (pasted for quick understanding)
Alexander Graf - Aug. 7, 2012, 10:08 a.m.
On 03.08.2012, at 07:30, Bharat Bhushan wrote:

> This patch adds the watchdog emulation in KVM. The watchdog
> emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl.
> The kernel timer are used for watchdog emulation and emulates
> h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
> if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
> it is configured.
> 
> Signed-off-by: Liu Yu <yu.liu@freescale.com>
> Signed-off-by: Scott Wood <scottwood@freescale.com>
> [bharat.bhushan@freescale.com: reworked patch]
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
> v7:
> - kvmppc_core_dequeue_watchdog() and kvmppc_core_enque_watchdog()
>   are made staic
> - Clear the KVM_REQ_WATCHDOG request when TSR_ENW | ENW_WIS cleared
>   rather than checking these bits on handling the request.
>   Below changes (pasted for quick understanding)
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index 7eedaeb..a0f5922 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -629,8 +629,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>                goto out;
>        }
> 
> -       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
> -           (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
> +       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
>                kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
>                ret = 0;
>                goto out;
> @@ -1098,8 +1097,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
>                }
>        }
> 
> -       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
> -           (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
> +       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
>                run->exit_reason = KVM_EXIT_WATCHDOG;
>                r = RESUME_HOST | (r & RESUME_FLAG_NV);
>        }
> @@ -1251,8 +1249,10 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
> 
>                vcpu->arch.tsr = sregs->u.e.tsr;
> 
> -               if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
> +               if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) {
> +                       clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
>                        arm_next_watchdog(vcpu);
> +               }
> 
>                update_timer_ints(vcpu);
>        }
> @@ -1434,8 +1434,10 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
>         * We may have stopped the watchdog due to
>         * being stuck on final expiration.
>         */
> -       if (tsr_bits & (TSR_ENW | TSR_WIS))
> +       if (tsr_bits & (TSR_ENW | TSR_WIS)) {
> +               clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
>                arm_next_watchdog(vcpu);
> +       }
> ----------
> 
> v6:
> - Added kvmppc_subarch_vcpu_unit/uninit() for subarch specific initialization
> - Using spin_lock_irqsave()
> - Using del_timer_sync().
> 
> v5:
> - Checking that TSR_ENW/TSR_WIS are still set on KVM_EXIT_WATCHDOG.
> - Moved watchdog_next_timeout() in lock.
> 
> arch/powerpc/include/asm/kvm_host.h  |    3 +
> arch/powerpc/include/asm/kvm_ppc.h   |    2 +
> arch/powerpc/include/asm/reg_booke.h |    7 ++
> arch/powerpc/kvm/book3s.c            |    9 ++
> arch/powerpc/kvm/booke.c             |  158 ++++++++++++++++++++++++++++++++++
> arch/powerpc/kvm/booke_emulate.c     |    8 ++
> arch/powerpc/kvm/powerpc.c           |   14 +++-
> include/linux/kvm.h                  |    2 +
> include/linux/kvm_host.h             |    1 +
> 9 files changed, 202 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 572ad01..873ec11 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -469,6 +469,8 @@ struct kvm_vcpu_arch {
> 	ulong fault_esr;
> 	ulong queued_dear;
> 	ulong queued_esr;
> +	spinlock_t wdt_lock;
> +	struct timer_list wdt_timer;
> 	u32 tlbcfg[4];
> 	u32 mmucfg;
> 	u32 epr;
> @@ -484,6 +486,7 @@ struct kvm_vcpu_arch {
> 	u8 osi_needed;
> 	u8 osi_enabled;
> 	u8 papr_enabled;
> +	u8 watchdog_enabled;
> 	u8 sane;
> 	u8 cpu_type;
> 	u8 hcall_needed;
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 0124937..1438a5e 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> extern void kvmppc_decrementer_func(unsigned long data);
> extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
> +extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
> +extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
> 
> /* Core-specific hooks */
> 
> diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
> index 2d916c4..e07e6af 100644
> --- a/arch/powerpc/include/asm/reg_booke.h
> +++ b/arch/powerpc/include/asm/reg_booke.h
> @@ -539,6 +539,13 @@
> #define TCR_FIE		0x00800000	/* FIT Interrupt Enable */
> #define TCR_ARE		0x00400000	/* Auto Reload Enable */
> 
> +#ifdef CONFIG_E500
> +#define TCR_GET_WP(tcr)  ((((tcr) & 0xC0000000) >> 30) | \
> +			      (((tcr) & 0x1E0000) >> 15))
> +#else
> +#define TCR_GET_WP(tcr)  (((tcr) & 0xC0000000) >> 30)
> +#endif
> +
> /* Bit definitions for the TSR. */
> #define TSR_ENW		0x80000000	/* Enable Next Watchdog */
> #define TSR_WIS		0x40000000	/* WDT Interrupt Status */
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 3f2a836..e946665 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> 	return 0;
> }
> 
> +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
> +{
> +	return 0;
> +}
> +
> +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
> +{
> +}
> +
> int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
> {
> 	int i;
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index d25a097..a0f5922 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
> 	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
> }
> 
> +static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
> +{
> +	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
> +}
> +
> +static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
> +{
> +	clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
> +}
> +
> static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
> {
> #ifdef CONFIG_KVM_BOOKE_HV
> @@ -325,6 +335,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> 		msr_mask = MSR_CE | MSR_ME | MSR_DE;
> 		int_class = INT_CLASS_NONCRIT;
> 		break;
> +	case BOOKE_IRQPRIO_WATCHDOG:
> 	case BOOKE_IRQPRIO_CRITICAL:
> 	case BOOKE_IRQPRIO_DBELL_CRIT:
> 		allowed = vcpu->arch.shared->msr & MSR_CE;
> @@ -404,12 +415,114 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> 	return allowed;
> }
> 
> +/*
> + * Return the number of jiffies until the next timeout.  If the timeout is
> + * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
> + * because the larger value can break the timer APIs.
> + */
> +static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
> +{
> +	u64 tb, wdt_tb, wdt_ticks = 0;
> +	u64 nr_jiffies = 0;
> +	u32 period = TCR_GET_WP(vcpu->arch.tcr);
> +
> +	wdt_tb = 1ULL << (63 - period);
> +	tb = get_tb();
> +	/*
> +	 * The watchdog timeout will hapeen when TB bit corresponding
> +	 * to watchdog will toggle from 0 to 1.
> +	 */
> +	if (tb & wdt_tb)
> +		wdt_ticks = wdt_tb;
> +
> +	wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
> +
> +	/* Convert timebase ticks to jiffies */
> +	nr_jiffies = wdt_ticks;
> +
> +	if (do_div(nr_jiffies, tb_ticks_per_jiffy))
> +		nr_jiffies++;
> +
> +	return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
> +}
> +
> +static void arm_next_watchdog(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long nr_jiffies;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
> +	nr_jiffies = watchdog_next_timeout(vcpu);
> +	/*
> +	 * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
> +	 * then do not run the watchdog timer as this can break timer APIs.
> +	 */
> +	if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
> +		mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
> +	else
> +		del_timer(&vcpu->arch.wdt_timer);
> +	spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
> +}
> +
> +void kvmppc_watchdog_func(unsigned long data)
> +{
> +	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> +	u32 tsr, new_tsr;
> +	int final;
> +
> +	do {
> +		new_tsr = tsr = vcpu->arch.tsr;
> +		final = 0;
> +
> +		/* Time out event */
> +		if (tsr & TSR_ENW) {
> +			if (tsr & TSR_WIS)
> +				final = 1;
> +			else
> +				new_tsr = tsr | TSR_WIS;
> +		} else {
> +			new_tsr = tsr | TSR_ENW;
> +		}
> +	} while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
> +
> +	if (new_tsr & TSR_WIS) {
> +		smp_wmb();
> +		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
> +		kvm_vcpu_kick(vcpu);
> +	}
> +
> +	/*
> +	 * If this is final watchdog expiry and some action is required
> +	 * then exit to userspace.
> +	 */
> +	if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
> +	    vcpu->arch.watchdog_enabled) {
> +		smp_wmb();
> +		kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
> +		kvm_vcpu_kick(vcpu);
> +	}
> +
> +	/*
> +	 * Stop running the watchdog timer after final expiration to
> +	 * prevent the host from being flooded with timers if the
> +	 * guest sets a short period.
> +	 * Timers will resume when TSR/TCR is updated next time.
> +	 */
> +	if (!final)
> +		arm_next_watchdog(vcpu);
> +}
> +
> static void update_timer_ints(struct kvm_vcpu *vcpu)
> {
> 	if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
> 		kvmppc_core_queue_dec(vcpu);
> 	else
> 		kvmppc_core_dequeue_dec(vcpu);
> +
> +	if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
> +		kvmppc_core_queue_watchdog(vcpu);
> +	else
> +		kvmppc_core_dequeue_watchdog(vcpu);
> }
> 
> static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
> @@ -516,6 +629,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
> 		goto out;
> 	}
> 
> +	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
> +		kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
> +		ret = 0;
> +		goto out;
> +	}
> +
> 	kvm_guest_enter();
> 
> #ifdef CONFIG_PPC_FPU
> @@ -978,6 +1097,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 		}
> 	}
> 
> +	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
> +		run->exit_reason = KVM_EXIT_WATCHDOG;
> +		r = RESUME_HOST | (r & RESUME_FLAG_NV);
> +	}
> +
> 	return r;
> }
> 
> @@ -1011,6 +1135,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> 	return r;
> }
> 
> +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
> +{
> +	/* setup watchdog timer once */
> +	spin_lock_init(&vcpu->arch.wdt_lock);
> +	setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
> +		    (unsigned long)vcpu);
> +
> +	return 0;
> +}
> +
> +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
> +{
> +	del_timer_sync(&vcpu->arch.wdt_timer);
> +}
> +
> int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
> {
> 	int i;
> @@ -1106,7 +1245,15 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
> 	}
> 
> 	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
> +		u32 old_tsr = vcpu->arch.tsr;
> +
> 		vcpu->arch.tsr = sregs->u.e.tsr;
> +
> +		if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) {
> +			clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
> +			arm_next_watchdog(vcpu);
> +		}
> +
> 		update_timer_ints(vcpu);
> 	}
> 
> @@ -1267,6 +1414,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
> void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
> {
> 	vcpu->arch.tcr = new_tcr;
> +	arm_next_watchdog(vcpu);
> 	update_timer_ints(vcpu);
> }
> 
> @@ -1281,6 +1429,16 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
> void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
> {
> 	clear_bits(tsr_bits, &vcpu->arch.tsr);
> +
> +	/*
> +	 * We may have stopped the watchdog due to
> +	 * being stuck on final expiration.
> +	 */
> +	if (tsr_bits & (TSR_ENW | TSR_WIS)) {
> +		clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
> +		arm_next_watchdog(vcpu);

Just a thought: Would it make sense to fold the clear_bit into arm_next_watchdog()?

Otherwise looks good to me :)


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan - Aug. 7, 2012, 3:59 p.m.
> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Tuesday, August 07, 2012 3:38 PM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Bhushan Bharat-R65777
> Subject: Re: [PATCH 1/2] KVM: PPC: booke: Add watchdog emulation
> 
> 
> On 03.08.2012, at 07:30, Bharat Bhushan wrote:
> 
> > This patch adds the watchdog emulation in KVM. The watchdog
> > emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl.
> > The kernel timer are used for watchdog emulation and emulates
> > h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
> > if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
> > it is configured.
> >
> > Signed-off-by: Liu Yu <yu.liu@freescale.com>
> > Signed-off-by: Scott Wood <scottwood@freescale.com>
> > [bharat.bhushan@freescale.com: reworked patch]
> > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > ---
> > v7:
> > - kvmppc_core_dequeue_watchdog() and kvmppc_core_enque_watchdog()
> >   are made staic
> > - Clear the KVM_REQ_WATCHDOG request when TSR_ENW | ENW_WIS cleared
> >   rather than checking these bits on handling the request.
> >   Below changes (pasted for quick understanding)
> > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> > index 7eedaeb..a0f5922 100644
> > --- a/arch/powerpc/kvm/booke.c
> > +++ b/arch/powerpc/kvm/booke.c
> > @@ -629,8 +629,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct
> kvm_vcpu *vcpu)
> >                goto out;
> >        }
> >
> > -       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
> > -           (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
> > +       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
> >                kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
> >                ret = 0;
> >                goto out;
> > @@ -1098,8 +1097,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> kvm_vcpu *vcpu,
> >                }
> >        }
> >
> > -       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
> > -           (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
> > +       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
> >                run->exit_reason = KVM_EXIT_WATCHDOG;
> >                r = RESUME_HOST | (r & RESUME_FLAG_NV);
> >        }
> > @@ -1251,8 +1249,10 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
> >
> >                vcpu->arch.tsr = sregs->u.e.tsr;
> >
> > -               if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
> > +               if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) {
> > +                       clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
> >                        arm_next_watchdog(vcpu);
> > +               }
> >
> >                update_timer_ints(vcpu);
> >        }
> > @@ -1434,8 +1434,10 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32
> tsr_bits)
> >         * We may have stopped the watchdog due to
> >         * being stuck on final expiration.
> >         */
> > -       if (tsr_bits & (TSR_ENW | TSR_WIS))
> > +       if (tsr_bits & (TSR_ENW | TSR_WIS)) {
> > +               clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
> >                arm_next_watchdog(vcpu);
> > +       }
> > ----------
> >
> > v6:
> > - Added kvmppc_subarch_vcpu_unit/uninit() for subarch specific initialization
> > - Using spin_lock_irqsave()
> > - Using del_timer_sync().
> >
> > v5:
> > - Checking that TSR_ENW/TSR_WIS are still set on KVM_EXIT_WATCHDOG.
> > - Moved watchdog_next_timeout() in lock.
> >
> > arch/powerpc/include/asm/kvm_host.h  |    3 +
> > arch/powerpc/include/asm/kvm_ppc.h   |    2 +
> > arch/powerpc/include/asm/reg_booke.h |    7 ++
> > arch/powerpc/kvm/book3s.c            |    9 ++
> > arch/powerpc/kvm/booke.c             |  158 ++++++++++++++++++++++++++++++++++
> > arch/powerpc/kvm/booke_emulate.c     |    8 ++
> > arch/powerpc/kvm/powerpc.c           |   14 +++-
> > include/linux/kvm.h                  |    2 +
> > include/linux/kvm_host.h             |    1 +
> > 9 files changed, 202 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/kvm_host.h
> b/arch/powerpc/include/asm/kvm_host.h
> > index 572ad01..873ec11 100644
> > --- a/arch/powerpc/include/asm/kvm_host.h
> > +++ b/arch/powerpc/include/asm/kvm_host.h
> > @@ -469,6 +469,8 @@ struct kvm_vcpu_arch {
> > 	ulong fault_esr;
> > 	ulong queued_dear;
> > 	ulong queued_esr;
> > +	spinlock_t wdt_lock;
> > +	struct timer_list wdt_timer;
> > 	u32 tlbcfg[4];
> > 	u32 mmucfg;
> > 	u32 epr;
> > @@ -484,6 +486,7 @@ struct kvm_vcpu_arch {
> > 	u8 osi_needed;
> > 	u8 osi_enabled;
> > 	u8 papr_enabled;
> > +	u8 watchdog_enabled;
> > 	u8 sane;
> > 	u8 cpu_type;
> > 	u8 hcall_needed;
> > diff --git a/arch/powerpc/include/asm/kvm_ppc.h
> b/arch/powerpc/include/asm/kvm_ppc.h
> > index 0124937..1438a5e 100644
> > --- a/arch/powerpc/include/asm/kvm_ppc.h
> > +++ b/arch/powerpc/include/asm/kvm_ppc.h
> > @@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> > extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> > extern void kvmppc_decrementer_func(unsigned long data);
> > extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
> > +extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
> > +extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
> >
> > /* Core-specific hooks */
> >
> > diff --git a/arch/powerpc/include/asm/reg_booke.h
> b/arch/powerpc/include/asm/reg_booke.h
> > index 2d916c4..e07e6af 100644
> > --- a/arch/powerpc/include/asm/reg_booke.h
> > +++ b/arch/powerpc/include/asm/reg_booke.h
> > @@ -539,6 +539,13 @@
> > #define TCR_FIE		0x00800000	/* FIT Interrupt Enable */
> > #define TCR_ARE		0x00400000	/* Auto Reload Enable */
> >
> > +#ifdef CONFIG_E500
> > +#define TCR_GET_WP(tcr)  ((((tcr) & 0xC0000000) >> 30) | \
> > +			      (((tcr) & 0x1E0000) >> 15))
> > +#else
> > +#define TCR_GET_WP(tcr)  (((tcr) & 0xC0000000) >> 30)
> > +#endif
> > +
> > /* Bit definitions for the TSR. */
> > #define TSR_ENW		0x80000000	/* Enable Next Watchdog */
> > #define TSR_WIS		0x40000000	/* WDT Interrupt Status */
> > diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> > index 3f2a836..e946665 100644
> > --- a/arch/powerpc/kvm/book3s.c
> > +++ b/arch/powerpc/kvm/book3s.c
> > @@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> > 	return 0;
> > }
> >
> > +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
> > +{
> > +	return 0;
> > +}
> > +
> > +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
> > +{
> > +}
> > +
> > int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
> > {
> > 	int i;
> > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> > index d25a097..a0f5922 100644
> > --- a/arch/powerpc/kvm/booke.c
> > +++ b/arch/powerpc/kvm/booke.c
> > @@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
> > 	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
> > }
> >
> > +static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
> > +{
> > +	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
> > +}
> > +
> > +static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
> > +{
> > +	clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
> > +}
> > +
> > static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
> > {
> > #ifdef CONFIG_KVM_BOOKE_HV
> > @@ -325,6 +335,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu
> *vcpu,
> > 		msr_mask = MSR_CE | MSR_ME | MSR_DE;
> > 		int_class = INT_CLASS_NONCRIT;
> > 		break;
> > +	case BOOKE_IRQPRIO_WATCHDOG:
> > 	case BOOKE_IRQPRIO_CRITICAL:
> > 	case BOOKE_IRQPRIO_DBELL_CRIT:
> > 		allowed = vcpu->arch.shared->msr & MSR_CE;
> > @@ -404,12 +415,114 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu
> *vcpu,
> > 	return allowed;
> > }
> >
> > +/*
> > + * Return the number of jiffies until the next timeout.  If the timeout is
> > + * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
> > + * because the larger value can break the timer APIs.
> > + */
> > +static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
> > +{
> > +	u64 tb, wdt_tb, wdt_ticks = 0;
> > +	u64 nr_jiffies = 0;
> > +	u32 period = TCR_GET_WP(vcpu->arch.tcr);
> > +
> > +	wdt_tb = 1ULL << (63 - period);
> > +	tb = get_tb();
> > +	/*
> > +	 * The watchdog timeout will hapeen when TB bit corresponding
> > +	 * to watchdog will toggle from 0 to 1.
> > +	 */
> > +	if (tb & wdt_tb)
> > +		wdt_ticks = wdt_tb;
> > +
> > +	wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
> > +
> > +	/* Convert timebase ticks to jiffies */
> > +	nr_jiffies = wdt_ticks;
> > +
> > +	if (do_div(nr_jiffies, tb_ticks_per_jiffy))
> > +		nr_jiffies++;
> > +
> > +	return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
> > +}
> > +
> > +static void arm_next_watchdog(struct kvm_vcpu *vcpu)
> > +{
> > +	unsigned long nr_jiffies;
> > +	unsigned long flags;
> > +
> > +	spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
> > +	nr_jiffies = watchdog_next_timeout(vcpu);
> > +	/*
> > +	 * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
> > +	 * then do not run the watchdog timer as this can break timer APIs.
> > +	 */
> > +	if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
> > +		mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
> > +	else
> > +		del_timer(&vcpu->arch.wdt_timer);
> > +	spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
> > +}
> > +
> > +void kvmppc_watchdog_func(unsigned long data)
> > +{
> > +	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> > +	u32 tsr, new_tsr;
> > +	int final;
> > +
> > +	do {
> > +		new_tsr = tsr = vcpu->arch.tsr;
> > +		final = 0;
> > +
> > +		/* Time out event */
> > +		if (tsr & TSR_ENW) {
> > +			if (tsr & TSR_WIS)
> > +				final = 1;
> > +			else
> > +				new_tsr = tsr | TSR_WIS;
> > +		} else {
> > +			new_tsr = tsr | TSR_ENW;
> > +		}
> > +	} while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
> > +
> > +	if (new_tsr & TSR_WIS) {
> > +		smp_wmb();
> > +		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
> > +		kvm_vcpu_kick(vcpu);
> > +	}
> > +
> > +	/*
> > +	 * If this is final watchdog expiry and some action is required
> > +	 * then exit to userspace.
> > +	 */
> > +	if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
> > +	    vcpu->arch.watchdog_enabled) {
> > +		smp_wmb();
> > +		kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
> > +		kvm_vcpu_kick(vcpu);
> > +	}
> > +
> > +	/*
> > +	 * Stop running the watchdog timer after final expiration to
> > +	 * prevent the host from being flooded with timers if the
> > +	 * guest sets a short period.
> > +	 * Timers will resume when TSR/TCR is updated next time.
> > +	 */
> > +	if (!final)
> > +		arm_next_watchdog(vcpu);
> > +}
> > +
> > static void update_timer_ints(struct kvm_vcpu *vcpu)
> > {
> > 	if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
> > 		kvmppc_core_queue_dec(vcpu);
> > 	else
> > 		kvmppc_core_dequeue_dec(vcpu);
> > +
> > +	if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
> > +		kvmppc_core_queue_watchdog(vcpu);
> > +	else
> > +		kvmppc_core_dequeue_watchdog(vcpu);
> > }
> >
> > static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
> > @@ -516,6 +629,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct
> kvm_vcpu *vcpu)
> > 		goto out;
> > 	}
> >
> > +	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
> > +		kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
> > +		ret = 0;
> > +		goto out;
> > +	}
> > +
> > 	kvm_guest_enter();
> >
> > #ifdef CONFIG_PPC_FPU
> > @@ -978,6 +1097,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> kvm_vcpu *vcpu,
> > 		}
> > 	}
> >
> > +	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
> > +		run->exit_reason = KVM_EXIT_WATCHDOG;
> > +		r = RESUME_HOST | (r & RESUME_FLAG_NV);
> > +	}
> > +
> > 	return r;
> > }
> >
> > @@ -1011,6 +1135,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> > 	return r;
> > }
> >
> > +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
> > +{
> > +	/* setup watchdog timer once */
> > +	spin_lock_init(&vcpu->arch.wdt_lock);
> > +	setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
> > +		    (unsigned long)vcpu);
> > +
> > +	return 0;
> > +}
> > +
> > +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
> > +{
> > +	del_timer_sync(&vcpu->arch.wdt_timer);
> > +}
> > +
> > int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
> > {
> > 	int i;
> > @@ -1106,7 +1245,15 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
> > 	}
> >
> > 	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
> > +		u32 old_tsr = vcpu->arch.tsr;
> > +
> > 		vcpu->arch.tsr = sregs->u.e.tsr;
> > +
> > +		if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) {
> > +			clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
> > +			arm_next_watchdog(vcpu);
> > +		}
> > +
> > 		update_timer_ints(vcpu);
> > 	}
> >
> > @@ -1267,6 +1414,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
> > void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
> > {
> > 	vcpu->arch.tcr = new_tcr;
> > +	arm_next_watchdog(vcpu);
> > 	update_timer_ints(vcpu);
> > }
> >
> > @@ -1281,6 +1429,16 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32
> tsr_bits)
> > void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
> > {
> > 	clear_bits(tsr_bits, &vcpu->arch.tsr);
> > +
> > +	/*
> > +	 * We may have stopped the watchdog due to
> > +	 * being stuck on final expiration.
> > +	 */
> > +	if (tsr_bits & (TSR_ENW | TSR_WIS)) {
> > +		clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
> > +		arm_next_watchdog(vcpu);
> 
> Just a thought: Would it make sense to fold the clear_bit into
> arm_next_watchdog()?

We want to clear_bit(REQ_WATCHDOG) only when ENW or WIS are cleared. Arm_next_watchdog() is called from some more places where these bit are not cleared. 

We can do in arm_next_watchdog() in something like this:

if (vcpu->arch.tsr & (TSR_ENW | TSR_WIS) != TSR_ENW | TSR_WIS)
      clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);

Thanks
-Bharat

> 
> Otherwise looks good to me :)
> 
> 
> Alex
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf - Aug. 8, 2012, 11:09 a.m.
On 07.08.2012, at 17:59, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de]
>> Sent: Tuesday, August 07, 2012 3:38 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Bhushan Bharat-R65777
>> Subject: Re: [PATCH 1/2] KVM: PPC: booke: Add watchdog emulation
>> 
>> 
>> On 03.08.2012, at 07:30, Bharat Bhushan wrote:
>> 
>>> This patch adds the watchdog emulation in KVM. The watchdog
>>> emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl.
>>> The kernel timer are used for watchdog emulation and emulates
>>> h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
>>> if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
>>> it is configured.
>>> 
>>> Signed-off-by: Liu Yu <yu.liu@freescale.com>
>>> Signed-off-by: Scott Wood <scottwood@freescale.com>
>>> [bharat.bhushan@freescale.com: reworked patch]
>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>> ---
>>> v7:
>>> - kvmppc_core_dequeue_watchdog() and kvmppc_core_enque_watchdog()
>>>  are made staic
>>> - Clear the KVM_REQ_WATCHDOG request when TSR_ENW | ENW_WIS cleared
>>>  rather than checking these bits on handling the request.
>>>  Below changes (pasted for quick understanding)
>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
>>> index 7eedaeb..a0f5922 100644
>>> --- a/arch/powerpc/kvm/booke.c
>>> +++ b/arch/powerpc/kvm/booke.c
>>> @@ -629,8 +629,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct
>> kvm_vcpu *vcpu)
>>>               goto out;
>>>       }
>>> 
>>> -       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
>>> -           (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
>>> +       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
>>>               kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
>>>               ret = 0;
>>>               goto out;
>>> @@ -1098,8 +1097,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
>> kvm_vcpu *vcpu,
>>>               }
>>>       }
>>> 
>>> -       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
>>> -           (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
>>> +       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
>>>               run->exit_reason = KVM_EXIT_WATCHDOG;
>>>               r = RESUME_HOST | (r & RESUME_FLAG_NV);
>>>       }
>>> @@ -1251,8 +1249,10 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
>>> 
>>>               vcpu->arch.tsr = sregs->u.e.tsr;
>>> 
>>> -               if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
>>> +               if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) {
>>> +                       clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
>>>                       arm_next_watchdog(vcpu);
>>> +               }
>>> 
>>>               update_timer_ints(vcpu);
>>>       }
>>> @@ -1434,8 +1434,10 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32
>> tsr_bits)
>>>        * We may have stopped the watchdog due to
>>>        * being stuck on final expiration.
>>>        */
>>> -       if (tsr_bits & (TSR_ENW | TSR_WIS))
>>> +       if (tsr_bits & (TSR_ENW | TSR_WIS)) {
>>> +               clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
>>>               arm_next_watchdog(vcpu);
>>> +       }
>>> ----------
>>> 
>>> v6:
>>> - Added kvmppc_subarch_vcpu_unit/uninit() for subarch specific initialization
>>> - Using spin_lock_irqsave()
>>> - Using del_timer_sync().
>>> 
>>> v5:
>>> - Checking that TSR_ENW/TSR_WIS are still set on KVM_EXIT_WATCHDOG.
>>> - Moved watchdog_next_timeout() in lock.
>>> 
>>> arch/powerpc/include/asm/kvm_host.h  |    3 +
>>> arch/powerpc/include/asm/kvm_ppc.h   |    2 +
>>> arch/powerpc/include/asm/reg_booke.h |    7 ++
>>> arch/powerpc/kvm/book3s.c            |    9 ++
>>> arch/powerpc/kvm/booke.c             |  158 ++++++++++++++++++++++++++++++++++
>>> arch/powerpc/kvm/booke_emulate.c     |    8 ++
>>> arch/powerpc/kvm/powerpc.c           |   14 +++-
>>> include/linux/kvm.h                  |    2 +
>>> include/linux/kvm_host.h             |    1 +
>>> 9 files changed, 202 insertions(+), 2 deletions(-)
>>> 
>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>> b/arch/powerpc/include/asm/kvm_host.h
>>> index 572ad01..873ec11 100644
>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>> @@ -469,6 +469,8 @@ struct kvm_vcpu_arch {
>>> 	ulong fault_esr;
>>> 	ulong queued_dear;
>>> 	ulong queued_esr;
>>> +	spinlock_t wdt_lock;
>>> +	struct timer_list wdt_timer;
>>> 	u32 tlbcfg[4];
>>> 	u32 mmucfg;
>>> 	u32 epr;
>>> @@ -484,6 +486,7 @@ struct kvm_vcpu_arch {
>>> 	u8 osi_needed;
>>> 	u8 osi_enabled;
>>> 	u8 papr_enabled;
>>> +	u8 watchdog_enabled;
>>> 	u8 sane;
>>> 	u8 cpu_type;
>>> 	u8 hcall_needed;
>>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
>> b/arch/powerpc/include/asm/kvm_ppc.h
>>> index 0124937..1438a5e 100644
>>> --- a/arch/powerpc/include/asm/kvm_ppc.h
>>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
>>> @@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
>>> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
>>> extern void kvmppc_decrementer_func(unsigned long data);
>>> extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
>>> +extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
>>> +extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
>>> 
>>> /* Core-specific hooks */
>>> 
>>> diff --git a/arch/powerpc/include/asm/reg_booke.h
>> b/arch/powerpc/include/asm/reg_booke.h
>>> index 2d916c4..e07e6af 100644
>>> --- a/arch/powerpc/include/asm/reg_booke.h
>>> +++ b/arch/powerpc/include/asm/reg_booke.h
>>> @@ -539,6 +539,13 @@
>>> #define TCR_FIE		0x00800000	/* FIT Interrupt Enable */
>>> #define TCR_ARE		0x00400000	/* Auto Reload Enable */
>>> 
>>> +#ifdef CONFIG_E500
>>> +#define TCR_GET_WP(tcr)  ((((tcr) & 0xC0000000) >> 30) | \
>>> +			      (((tcr) & 0x1E0000) >> 15))
>>> +#else
>>> +#define TCR_GET_WP(tcr)  (((tcr) & 0xC0000000) >> 30)
>>> +#endif
>>> +
>>> /* Bit definitions for the TSR. */
>>> #define TSR_ENW		0x80000000	/* Enable Next Watchdog */
>>> #define TSR_WIS		0x40000000	/* WDT Interrupt Status */
>>> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
>>> index 3f2a836..e946665 100644
>>> --- a/arch/powerpc/kvm/book3s.c
>>> +++ b/arch/powerpc/kvm/book3s.c
>>> @@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>>> 	return 0;
>>> }
>>> 
>>> +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
>>> +{
>>> +	return 0;
>>> +}
>>> +
>>> +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
>>> +{
>>> +}
>>> +
>>> int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
>>> {
>>> 	int i;
>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
>>> index d25a097..a0f5922 100644
>>> --- a/arch/powerpc/kvm/booke.c
>>> +++ b/arch/powerpc/kvm/booke.c
>>> @@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
>>> 	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
>>> }
>>> 
>>> +static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
>>> +{
>>> +	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
>>> +}
>>> +
>>> +static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
>>> +{
>>> +	clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
>>> +}
>>> +
>>> static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
>>> {
>>> #ifdef CONFIG_KVM_BOOKE_HV
>>> @@ -325,6 +335,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu
>> *vcpu,
>>> 		msr_mask = MSR_CE | MSR_ME | MSR_DE;
>>> 		int_class = INT_CLASS_NONCRIT;
>>> 		break;
>>> +	case BOOKE_IRQPRIO_WATCHDOG:
>>> 	case BOOKE_IRQPRIO_CRITICAL:
>>> 	case BOOKE_IRQPRIO_DBELL_CRIT:
>>> 		allowed = vcpu->arch.shared->msr & MSR_CE;
>>> @@ -404,12 +415,114 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu
>> *vcpu,
>>> 	return allowed;
>>> }
>>> 
>>> +/*
>>> + * Return the number of jiffies until the next timeout.  If the timeout is
>>> + * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
>>> + * because the larger value can break the timer APIs.
>>> + */
>>> +static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
>>> +{
>>> +	u64 tb, wdt_tb, wdt_ticks = 0;
>>> +	u64 nr_jiffies = 0;
>>> +	u32 period = TCR_GET_WP(vcpu->arch.tcr);
>>> +
>>> +	wdt_tb = 1ULL << (63 - period);
>>> +	tb = get_tb();
>>> +	/*
>>> +	 * The watchdog timeout will hapeen when TB bit corresponding
>>> +	 * to watchdog will toggle from 0 to 1.
>>> +	 */
>>> +	if (tb & wdt_tb)
>>> +		wdt_ticks = wdt_tb;
>>> +
>>> +	wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
>>> +
>>> +	/* Convert timebase ticks to jiffies */
>>> +	nr_jiffies = wdt_ticks;
>>> +
>>> +	if (do_div(nr_jiffies, tb_ticks_per_jiffy))
>>> +		nr_jiffies++;
>>> +
>>> +	return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
>>> +}
>>> +
>>> +static void arm_next_watchdog(struct kvm_vcpu *vcpu)
>>> +{
>>> +	unsigned long nr_jiffies;
>>> +	unsigned long flags;
>>> +
>>> +	spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
>>> +	nr_jiffies = watchdog_next_timeout(vcpu);
>>> +	/*
>>> +	 * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
>>> +	 * then do not run the watchdog timer as this can break timer APIs.
>>> +	 */
>>> +	if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
>>> +		mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
>>> +	else
>>> +		del_timer(&vcpu->arch.wdt_timer);
>>> +	spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
>>> +}
>>> +
>>> +void kvmppc_watchdog_func(unsigned long data)
>>> +{
>>> +	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>>> +	u32 tsr, new_tsr;
>>> +	int final;
>>> +
>>> +	do {
>>> +		new_tsr = tsr = vcpu->arch.tsr;
>>> +		final = 0;
>>> +
>>> +		/* Time out event */
>>> +		if (tsr & TSR_ENW) {
>>> +			if (tsr & TSR_WIS)
>>> +				final = 1;
>>> +			else
>>> +				new_tsr = tsr | TSR_WIS;
>>> +		} else {
>>> +			new_tsr = tsr | TSR_ENW;
>>> +		}
>>> +	} while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
>>> +
>>> +	if (new_tsr & TSR_WIS) {
>>> +		smp_wmb();
>>> +		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
>>> +		kvm_vcpu_kick(vcpu);
>>> +	}
>>> +
>>> +	/*
>>> +	 * If this is final watchdog expiry and some action is required
>>> +	 * then exit to userspace.
>>> +	 */
>>> +	if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
>>> +	    vcpu->arch.watchdog_enabled) {
>>> +		smp_wmb();
>>> +		kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
>>> +		kvm_vcpu_kick(vcpu);
>>> +	}
>>> +
>>> +	/*
>>> +	 * Stop running the watchdog timer after final expiration to
>>> +	 * prevent the host from being flooded with timers if the
>>> +	 * guest sets a short period.
>>> +	 * Timers will resume when TSR/TCR is updated next time.
>>> +	 */
>>> +	if (!final)
>>> +		arm_next_watchdog(vcpu);
>>> +}
>>> +
>>> static void update_timer_ints(struct kvm_vcpu *vcpu)
>>> {
>>> 	if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
>>> 		kvmppc_core_queue_dec(vcpu);
>>> 	else
>>> 		kvmppc_core_dequeue_dec(vcpu);
>>> +
>>> +	if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
>>> +		kvmppc_core_queue_watchdog(vcpu);
>>> +	else
>>> +		kvmppc_core_dequeue_watchdog(vcpu);
>>> }
>>> 
>>> static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
>>> @@ -516,6 +629,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct
>> kvm_vcpu *vcpu)
>>> 		goto out;
>>> 	}
>>> 
>>> +	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
>>> +		kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
>>> +		ret = 0;
>>> +		goto out;
>>> +	}
>>> +
>>> 	kvm_guest_enter();
>>> 
>>> #ifdef CONFIG_PPC_FPU
>>> @@ -978,6 +1097,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
>> kvm_vcpu *vcpu,
>>> 		}
>>> 	}
>>> 
>>> +	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
>>> +		run->exit_reason = KVM_EXIT_WATCHDOG;
>>> +		r = RESUME_HOST | (r & RESUME_FLAG_NV);
>>> +	}
>>> +
>>> 	return r;
>>> }
>>> 
>>> @@ -1011,6 +1135,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>>> 	return r;
>>> }
>>> 
>>> +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
>>> +{
>>> +	/* setup watchdog timer once */
>>> +	spin_lock_init(&vcpu->arch.wdt_lock);
>>> +	setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
>>> +		    (unsigned long)vcpu);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
>>> +{
>>> +	del_timer_sync(&vcpu->arch.wdt_timer);
>>> +}
>>> +
>>> int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
>>> {
>>> 	int i;
>>> @@ -1106,7 +1245,15 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
>>> 	}
>>> 
>>> 	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
>>> +		u32 old_tsr = vcpu->arch.tsr;
>>> +
>>> 		vcpu->arch.tsr = sregs->u.e.tsr;
>>> +
>>> +		if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) {
>>> +			clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
>>> +			arm_next_watchdog(vcpu);
>>> +		}
>>> +
>>> 		update_timer_ints(vcpu);
>>> 	}
>>> 
>>> @@ -1267,6 +1414,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
>>> void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
>>> {
>>> 	vcpu->arch.tcr = new_tcr;
>>> +	arm_next_watchdog(vcpu);
>>> 	update_timer_ints(vcpu);
>>> }
>>> 
>>> @@ -1281,6 +1429,16 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32
>> tsr_bits)
>>> void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
>>> {
>>> 	clear_bits(tsr_bits, &vcpu->arch.tsr);
>>> +
>>> +	/*
>>> +	 * We may have stopped the watchdog due to
>>> +	 * being stuck on final expiration.
>>> +	 */
>>> +	if (tsr_bits & (TSR_ENW | TSR_WIS)) {
>>> +		clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
>>> +		arm_next_watchdog(vcpu);
>> 
>> Just a thought: Would it make sense to fold the clear_bit into
>> arm_next_watchdog()?
> 
> We want to clear_bit(REQ_WATCHDOG) only when ENW or WIS are cleared. Arm_next_watchdog() is called from some more places where these bit are not cleared. 
> 
> We can do in arm_next_watchdog() in something like this:
> 
> if (vcpu->arch.tsr & (TSR_ENW | TSR_WIS) != TSR_ENW | TSR_WIS)
>      clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);

If that also works, it'd make things easier, yes. I like the QEMU principle of having functions that you can basically call any time and they always recalculate all their state. That makes it less complex to find out when they are ok to be called.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 7eedaeb..a0f5922 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -629,8 +629,7 @@  int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                goto out;
        }
 
-       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
-           (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
+       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
                kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
                ret = 0;
                goto out;
@@ -1098,8 +1097,7 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                }
        }
 
-       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
-           (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
+       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
                run->exit_reason = KVM_EXIT_WATCHDOG;
                r = RESUME_HOST | (r & RESUME_FLAG_NV);
        }
@@ -1251,8 +1249,10 @@  static int set_sregs_base(struct kvm_vcpu *vcpu,

                vcpu->arch.tsr = sregs->u.e.tsr;
 
-               if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
+               if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) {
+                       clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
                        arm_next_watchdog(vcpu);
+               }
        
                update_timer_ints(vcpu);
        }
@@ -1434,8 +1434,10 @@  void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
         * We may have stopped the watchdog due to
         * being stuck on final expiration.
         */
-       if (tsr_bits & (TSR_ENW | TSR_WIS))
+       if (tsr_bits & (TSR_ENW | TSR_WIS)) {
+               clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
                arm_next_watchdog(vcpu);
+       }
----------

v6:
 - Added kvmppc_subarch_vcpu_unit/uninit() for subarch specific initialization
 - Using spin_lock_irqsave()
 - Using del_timer_sync().

v5:
 - Checking that TSR_ENW/TSR_WIS are still set on KVM_EXIT_WATCHDOG.
 - Moved watchdog_next_timeout() in lock.

 arch/powerpc/include/asm/kvm_host.h  |    3 +
 arch/powerpc/include/asm/kvm_ppc.h   |    2 +
 arch/powerpc/include/asm/reg_booke.h |    7 ++
 arch/powerpc/kvm/book3s.c            |    9 ++
 arch/powerpc/kvm/booke.c             |  158 ++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/booke_emulate.c     |    8 ++
 arch/powerpc/kvm/powerpc.c           |   14 +++-
 include/linux/kvm.h                  |    2 +
 include/linux/kvm_host.h             |    1 +
 9 files changed, 202 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 572ad01..873ec11 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -469,6 +469,8 @@  struct kvm_vcpu_arch {
 	ulong fault_esr;
 	ulong queued_dear;
 	ulong queued_esr;
+	spinlock_t wdt_lock;
+	struct timer_list wdt_timer;
 	u32 tlbcfg[4];
 	u32 mmucfg;
 	u32 epr;
@@ -484,6 +486,7 @@  struct kvm_vcpu_arch {
 	u8 osi_needed;
 	u8 osi_enabled;
 	u8 papr_enabled;
+	u8 watchdog_enabled;
 	u8 sane;
 	u8 cpu_type;
 	u8 hcall_needed;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 0124937..1438a5e 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -68,6 +68,8 @@  extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 extern void kvmppc_decrementer_func(unsigned long data);
 extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
+extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 /* Core-specific hooks */
 
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 2d916c4..e07e6af 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -539,6 +539,13 @@ 
 #define TCR_FIE		0x00800000	/* FIT Interrupt Enable */
 #define TCR_ARE		0x00400000	/* Auto Reload Enable */
 
+#ifdef CONFIG_E500
+#define TCR_GET_WP(tcr)  ((((tcr) & 0xC0000000) >> 30) | \
+			      (((tcr) & 0x1E0000) >> 15))
+#else
+#define TCR_GET_WP(tcr)  (((tcr) & 0xC0000000) >> 30)
+#endif
+
 /* Bit definitions for the TSR. */
 #define TSR_ENW		0x80000000	/* Enable Next Watchdog */
 #define TSR_WIS		0x40000000	/* WDT Interrupt Status */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 3f2a836..e946665 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -411,6 +411,15 @@  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index d25a097..a0f5922 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -206,6 +206,16 @@  void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
 	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
 }
 
+static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
+}
+
+static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
+{
+	clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
+}
+
 static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
@@ -325,6 +335,7 @@  static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 		msr_mask = MSR_CE | MSR_ME | MSR_DE;
 		int_class = INT_CLASS_NONCRIT;
 		break;
+	case BOOKE_IRQPRIO_WATCHDOG:
 	case BOOKE_IRQPRIO_CRITICAL:
 	case BOOKE_IRQPRIO_DBELL_CRIT:
 		allowed = vcpu->arch.shared->msr & MSR_CE;
@@ -404,12 +415,114 @@  static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	return allowed;
 }
 
+/*
+ * Return the number of jiffies until the next timeout.  If the timeout is
+ * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
+ * because the larger value can break the timer APIs.
+ */
+static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
+{
+	u64 tb, wdt_tb, wdt_ticks = 0;
+	u64 nr_jiffies = 0;
+	u32 period = TCR_GET_WP(vcpu->arch.tcr);
+
+	wdt_tb = 1ULL << (63 - period);
+	tb = get_tb();
+	/*
+	 * The watchdog timeout will hapeen when TB bit corresponding
+	 * to watchdog will toggle from 0 to 1.
+	 */
+	if (tb & wdt_tb)
+		wdt_ticks = wdt_tb;
+
+	wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
+
+	/* Convert timebase ticks to jiffies */
+	nr_jiffies = wdt_ticks;
+
+	if (do_div(nr_jiffies, tb_ticks_per_jiffy))
+		nr_jiffies++;
+
+	return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+}
+
+static void arm_next_watchdog(struct kvm_vcpu *vcpu)
+{
+	unsigned long nr_jiffies;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
+	nr_jiffies = watchdog_next_timeout(vcpu);
+	/*
+	 * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
+	 * then do not run the watchdog timer as this can break timer APIs.
+	 */
+	if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+		mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
+	else
+		del_timer(&vcpu->arch.wdt_timer);
+	spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
+}
+
+void kvmppc_watchdog_func(unsigned long data)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+	u32 tsr, new_tsr;
+	int final;
+
+	do {
+		new_tsr = tsr = vcpu->arch.tsr;
+		final = 0;
+
+		/* Time out event */
+		if (tsr & TSR_ENW) {
+			if (tsr & TSR_WIS)
+				final = 1;
+			else
+				new_tsr = tsr | TSR_WIS;
+		} else {
+			new_tsr = tsr | TSR_ENW;
+		}
+	} while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
+
+	if (new_tsr & TSR_WIS) {
+		smp_wmb();
+		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
+
+	/*
+	 * If this is final watchdog expiry and some action is required
+	 * then exit to userspace.
+	 */
+	if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
+	    vcpu->arch.watchdog_enabled) {
+		smp_wmb();
+		kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
+
+	/*
+	 * Stop running the watchdog timer after final expiration to
+	 * prevent the host from being flooded with timers if the
+	 * guest sets a short period.
+	 * Timers will resume when TSR/TCR is updated next time.
+	 */
+	if (!final)
+		arm_next_watchdog(vcpu);
+}
+
 static void update_timer_ints(struct kvm_vcpu *vcpu)
 {
 	if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
 		kvmppc_core_queue_dec(vcpu);
 	else
 		kvmppc_core_dequeue_dec(vcpu);
+
+	if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
+		kvmppc_core_queue_watchdog(vcpu);
+	else
+		kvmppc_core_dequeue_watchdog(vcpu);
 }
 
 static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
@@ -516,6 +629,12 @@  int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
+	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
+		kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
+		ret = 0;
+		goto out;
+	}
+
 	kvm_guest_enter();
 
 #ifdef CONFIG_PPC_FPU
@@ -978,6 +1097,11 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 	}
 
+	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
+		run->exit_reason = KVM_EXIT_WATCHDOG;
+		r = RESUME_HOST | (r & RESUME_FLAG_NV);
+	}
+
 	return r;
 }
 
@@ -1011,6 +1135,21 @@  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	/* setup watchdog timer once */
+	spin_lock_init(&vcpu->arch.wdt_lock);
+	setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
+		    (unsigned long)vcpu);
+
+	return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	del_timer_sync(&vcpu->arch.wdt_timer);
+}
+
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
@@ -1106,7 +1245,15 @@  static int set_sregs_base(struct kvm_vcpu *vcpu,
 	}
 
 	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+		u32 old_tsr = vcpu->arch.tsr;
+
 		vcpu->arch.tsr = sregs->u.e.tsr;
+
+		if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) {
+			clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
+			arm_next_watchdog(vcpu);
+		}
+
 		update_timer_ints(vcpu);
 	}
 
@@ -1267,6 +1414,7 @@  void kvmppc_core_commit_memory_region(struct kvm *kvm,
 void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
 {
 	vcpu->arch.tcr = new_tcr;
+	arm_next_watchdog(vcpu);
 	update_timer_ints(vcpu);
 }
 
@@ -1281,6 +1429,16 @@  void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
 void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
 {
 	clear_bits(tsr_bits, &vcpu->arch.tsr);
+
+	/*
+	 * We may have stopped the watchdog due to
+	 * being stuck on final expiration.
+	 */
+	if (tsr_bits & (TSR_ENW | TSR_WIS)) {
+		clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
+		arm_next_watchdog(vcpu);
+	}
+
 	update_timer_ints(vcpu);
 }
 
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 12834bb..5a66ade 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -145,6 +145,14 @@  int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 		kvmppc_clr_tsr_bits(vcpu, spr_val);
 		break;
 	case SPRN_TCR:
+		/*
+		 * WRC is a 2-bit field that is supposed to preserve its
+		 * value once written to non-zero.
+		 */
+		if (vcpu->arch.tcr & TCR_WRC_MASK) {
+			spr_val &= ~TCR_WRC_MASK;
+			spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
+		}
 		kvmppc_set_tcr(vcpu, spr_val);
 		break;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a69ca4a..da484a6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -224,6 +224,7 @@  int kvm_dev_ioctl_check_extension(long ext)
 	switch (ext) {
 #ifdef CONFIG_BOOKE
 	case KVM_CAP_PPC_BOOKE_SREGS:
+	case KVM_CAP_PPC_BOOKE_WATCHDOG:
 #else
 	case KVM_CAP_PPC_SEGSTATE:
 	case KVM_CAP_PPC_HIOR:
@@ -382,6 +383,8 @@  enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
+	int ret;
+
 	hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
 	tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
 	vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
@@ -390,13 +393,14 @@  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_KVM_EXIT_TIMING
 	mutex_init(&vcpu->arch.exit_timing_lock);
 #endif
-
-	return 0;
+	ret = kvmppc_subarch_vcpu_init(vcpu);
+	return ret;
 }
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 	kvmppc_mmu_destroy(vcpu);
+	kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -641,6 +645,12 @@  static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		r = 0;
 		vcpu->arch.papr_enabled = true;
 		break;
+#ifdef CONFIG_BOOKE
+	case KVM_CAP_PPC_BOOKE_WATCHDOG:
+		r = 0;
+		vcpu->arch.watchdog_enabled = true;
+		break;
+#endif
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
 	case KVM_CAP_SW_TLB: {
 		struct kvm_config_tlb cfg;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c03e59e..f4f5be8 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -163,6 +163,7 @@  struct kvm_pit_config {
 #define KVM_EXIT_OSI              18
 #define KVM_EXIT_PAPR_HCALL	  19
 #define KVM_EXIT_S390_UCONTROL	  20
+#define KVM_EXIT_WATCHDOG         21
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
@@ -620,6 +621,7 @@  struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
+#define KVM_CAP_PPC_BOOKE_WATCHDOG 81
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dbc65f9..b38adc2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -69,6 +69,7 @@ 
 #define KVM_REQ_IMMEDIATE_EXIT    15
 #define KVM_REQ_PMU               16
 #define KVM_REQ_PMI               17
+#define KVM_REQ_WATCHDOG          18
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0