diff mbox

[2/2] KVM: PPC: booke: Add watchdog emulation

Message ID 1342760362-10822-1-git-send-email-Bharat.Bhushan@freescale.com
State New, archived
Headers show

Commit Message

Bharat Bhushan July 20, 2012, 4:59 a.m. UTC
This patch adds the watchdog emulation in KVM. The watchdog
emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_WDT) ioctl.
The kernel timer are used for watchdog emulation and emulates
h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
it is configured.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
[bharat.bhushan@freescale.com: reworked patch]

---
v5:
 - Checking that TSR_ENW/TSR_WIS are still set on KVM_EXIT_WATCHDOG.
 - Moved watchdog_next_timeout() in lock.

v4:
 - in v3 i forgot to add Scott Wood and Liu Yu signoff

v3:
 - Using KVM_REQ_WATCHDOG for userspace exit.
 - TSR changes left for vcpu thread.
 - Other review comments on v2

 arch/powerpc/include/asm/kvm_host.h  |    3 +
 arch/powerpc/include/asm/kvm_ppc.h   |    3 +
 arch/powerpc/include/asm/reg_booke.h |    7 ++
 arch/powerpc/kvm/booke.c             |  140 ++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/booke_emulate.c     |    8 ++
 arch/powerpc/kvm/powerpc.c           |   19 +++++-
 include/linux/kvm.h                  |    2 +
 include/linux/kvm_host.h             |    1 +
 8 files changed, 182 insertions(+), 1 deletions(-)

Comments

Bharat Bhushan July 20, 2012, 4:59 a.m. UTC | #1
Please ignore this as I forget to get [v5] is subject.. 

Thanks
-Bharat

> -----Original Message-----
> From: Bhushan Bharat-R65777
> Sent: Friday, July 20, 2012 10:29 AM
> To: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de
> Cc: Bhushan Bharat-R65777
> Subject: [PATCH 2/2] KVM: PPC: booke: Add watchdog emulation
> 
> This patch adds the watchdog emulation in KVM. The watchdog emulation is enabled
> by KVM_ENABLE_CAP(KVM_CAP_PPC_WDT) ioctl.
> The kernel timer are used for watchdog emulation and emulates h/w watchdog state
> machine. On watchdog timer expiry, it exit to QEMU if TCR.WRC is non ZERO. QEMU
> can reset/shutdown etc depending upon how it is configured.
> 
> Signed-off-by: Liu Yu <yu.liu@freescale.com>
> Signed-off-by: Scott Wood <scottwood@freescale.com>
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> [bharat.bhushan@freescale.com: reworked patch]
> 
> ---
> v5:
>  - Checking that TSR_ENW/TSR_WIS are still set on KVM_EXIT_WATCHDOG.
>  - Moved watchdog_next_timeout() in lock.
> 
> v4:
>  - in v3 i forgot to add Scott Wood and Liu Yu signoff
> 
> v3:
>  - Using KVM_REQ_WATCHDOG for userspace exit.
>  - TSR changes left for vcpu thread.
>  - Other review comments on v2
> 
>  arch/powerpc/include/asm/kvm_host.h  |    3 +
>  arch/powerpc/include/asm/kvm_ppc.h   |    3 +
>  arch/powerpc/include/asm/reg_booke.h |    7 ++
>  arch/powerpc/kvm/booke.c             |  140 ++++++++++++++++++++++++++++++++++
>  arch/powerpc/kvm/booke_emulate.c     |    8 ++
>  arch/powerpc/kvm/powerpc.c           |   19 +++++-
>  include/linux/kvm.h                  |    2 +
>  include/linux/kvm_host.h             |    1 +
>  8 files changed, 182 insertions(+), 1 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h
> b/arch/powerpc/include/asm/kvm_host.h
> index 50ea12f..43cac48 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -467,6 +467,8 @@ struct kvm_vcpu_arch {
>  	ulong fault_esr;
>  	ulong queued_dear;
>  	ulong queued_esr;
> +	spinlock_t wdt_lock;
> +	struct timer_list wdt_timer;
>  	u32 tlbcfg[4];
>  	u32 mmucfg;
>  	u32 epr;
> @@ -482,6 +484,7 @@ struct kvm_vcpu_arch {
>  	u8 osi_needed;
>  	u8 osi_enabled;
>  	u8 papr_enabled;
> +	u8 watchdog_enabled;
>  	u8 sane;
>  	u8 cpu_type;
>  	u8 hcall_needed;
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
> b/arch/powerpc/include/asm/kvm_ppc.h
> index 0124937..e5cf4b9 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -67,6 +67,7 @@ extern int kvmppc_emulate_mmio(struct kvm_run *run, struct
> kvm_vcpu *vcpu);  extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);  extern
> u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);  extern void
> kvmppc_decrementer_func(unsigned long data);
> +extern void kvmppc_watchdog_func(unsigned long data);
>  extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
> 
>  /* Core-specific hooks */
> @@ -104,6 +105,8 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu
> *vcpu,
>                                         struct kvm_interrupt *irq);  extern void
> kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
>                                           struct kvm_interrupt *irq);
> +extern void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu); extern
> +void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu);
> 
>  extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
>                                    unsigned int op, int *advance); diff --git
> a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
> index 2d916c4..e07e6af 100644
> --- a/arch/powerpc/include/asm/reg_booke.h
> +++ b/arch/powerpc/include/asm/reg_booke.h
> @@ -539,6 +539,13 @@
>  #define TCR_FIE		0x00800000	/* FIT Interrupt Enable */
>  #define TCR_ARE		0x00400000	/* Auto Reload Enable */
> 
> +#ifdef CONFIG_E500
> +#define TCR_GET_WP(tcr)  ((((tcr) & 0xC0000000) >> 30) | \
> +			      (((tcr) & 0x1E0000) >> 15))
> +#else
> +#define TCR_GET_WP(tcr)  (((tcr) & 0xC0000000) >> 30) #endif
> +
>  /* Bit definitions for the TSR. */
>  #define TSR_ENW		0x80000000	/* Enable Next Watchdog */
>  #define TSR_WIS		0x40000000	/* WDT Interrupt Status */
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index
> d25a097..71b4ce9 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
>  	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
> }
> 
> +void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu) {
> +	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG); }
> +
> +void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) {
> +	clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions); }
> +
>  static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
> {  #ifdef CONFIG_KVM_BOOKE_HV @@ -325,6 +335,7 @@ static int
> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>  		msr_mask = MSR_CE | MSR_ME | MSR_DE;
>  		int_class = INT_CLASS_NONCRIT;
>  		break;
> +	case BOOKE_IRQPRIO_WATCHDOG:
>  	case BOOKE_IRQPRIO_CRITICAL:
>  	case BOOKE_IRQPRIO_DBELL_CRIT:
>  		allowed = vcpu->arch.shared->msr & MSR_CE; @@ -404,12 +415,113 @@
> static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>  	return allowed;
>  }
> 
> +/*
> + * Return the number of jiffies until the next timeout.  If the timeout
> +is
> + * longer than the NEXT_TIMER_MAX_DELTA, then return
> +NEXT_TIMER_MAX_DELTA
> + * because the larger value can break the timer APIs.
> + */
> +static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu) {
> +	u64 tb, wdt_tb, wdt_ticks = 0;
> +	u64 nr_jiffies = 0;
> +	u32 period = TCR_GET_WP(vcpu->arch.tcr);
> +
> +	wdt_tb = 1ULL << (63 - period);
> +	tb = get_tb();
> +	/*
> +	 * The watchdog timeout will hapeen when TB bit corresponding
> +	 * to watchdog will toggle from 0 to 1.
> +	 */
> +	if (tb & wdt_tb)
> +		wdt_ticks = wdt_tb;
> +
> +	wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
> +
> +	/* Convert timebase ticks to jiffies */
> +	nr_jiffies = wdt_ticks;
> +
> +	if (do_div(nr_jiffies, tb_ticks_per_jiffy))
> +		nr_jiffies++;
> +
> +	return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA); }
> +
> +static void arm_next_watchdog(struct kvm_vcpu *vcpu) {
> +	unsigned long nr_jiffies;
> +
> +	spin_lock(&vcpu->arch.wdt_lock);
> +	nr_jiffies = watchdog_next_timeout(vcpu);
> +	/*
> +	 * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
> +	 * then do not run the watchdog timer as this can break timer APIs.
> +	 */
> +	if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
> +		mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
> +	else
> +		del_timer(&vcpu->arch.wdt_timer);
> +	spin_unlock(&vcpu->arch.wdt_lock);
> +}
> +
> +void kvmppc_watchdog_func(unsigned long data) {
> +	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> +	u32 tsr, new_tsr;
> +	int final;
> +
> +	do {
> +		new_tsr = tsr = vcpu->arch.tsr;
> +		final = 0;
> +
> +		/* Time out event */
> +		if (tsr & TSR_ENW) {
> +			if (tsr & TSR_WIS)
> +				final = 1;
> +			else
> +				new_tsr = tsr | TSR_WIS;
> +		} else {
> +			new_tsr = tsr | TSR_ENW;
> +		}
> +	} while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
> +
> +	if (new_tsr & TSR_WIS) {
> +		smp_wmb();
> +		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
> +		kvm_vcpu_kick(vcpu);
> +	}
> +
> +	/*
> +	 * If this is final watchdog expiry and some action is required
> +	 * then exit to userspace.
> +	 */
> +	if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
> +	    vcpu->arch.watchdog_enabled) {
> +		smp_wmb();
> +		kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
> +		kvm_vcpu_kick(vcpu);
> +	}
> +
> +	/*
> +	 * Stop running the watchdog timer after final expiration to
> +	 * prevent the host from being flooded with timers if the
> +	 * guest sets a short period.
> +	 * Timers will resume when TSR/TCR is updated next time.
> +	 */
> +	if (!final)
> +		arm_next_watchdog(vcpu);
> +}
> +
>  static void update_timer_ints(struct kvm_vcpu *vcpu)  {
>  	if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
>  		kvmppc_core_queue_dec(vcpu);
>  	else
>  		kvmppc_core_dequeue_dec(vcpu);
> +
> +	if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
> +		kvmppc_core_queue_watchdog(vcpu);
> +	else
> +		kvmppc_core_dequeue_watchdog(vcpu);
>  }
> 
>  static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) @@ -516,6
> +628,13 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>  		goto out;
>  	}
> 
> +	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
> +	    (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
> +		kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
> +		ret = 0;
> +		goto out;
> +	}
> +
>  	kvm_guest_enter();
> 
>  #ifdef CONFIG_PPC_FPU
> @@ -978,6 +1097,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> kvm_vcpu *vcpu,
>  		}
>  	}
> 
> +	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
> +	    (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
> +		run->exit_reason = KVM_EXIT_WATCHDOG;
> +		r = RESUME_HOST | (r & RESUME_FLAG_NV);
> +	}
> +
>  	return r;
>  }
> 
> @@ -1106,7 +1231,13 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
>  	}
> 
>  	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
> +		u32 old_tsr = vcpu->arch.tsr;
> +
>  		vcpu->arch.tsr = sregs->u.e.tsr;
> +
> +		if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
> +			arm_next_watchdog(vcpu);
> +
>  		update_timer_ints(vcpu);
>  	}
> 
> @@ -1267,6 +1398,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
> void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)  {
>  	vcpu->arch.tcr = new_tcr;
> +	arm_next_watchdog(vcpu);
>  	update_timer_ints(vcpu);
>  }
> 
> @@ -1281,6 +1413,14 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32
> tsr_bits)  void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)  {
>  	clear_bits(tsr_bits, &vcpu->arch.tsr);
> +
> +	/*
> +	 * We may have stopped the watchdog due to
> +	 * being stuck on final expiration.
> +	 */
> +	if (tsr_bits & (TSR_ENW | TSR_WIS))
> +		arm_next_watchdog(vcpu);
> +
>  	update_timer_ints(vcpu);
>  }
> 
> diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
> index 12834bb..5a66ade 100644
> --- a/arch/powerpc/kvm/booke_emulate.c
> +++ b/arch/powerpc/kvm/booke_emulate.c
> @@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int
> sprn, ulong spr_val)
>  		kvmppc_clr_tsr_bits(vcpu, spr_val);
>  		break;
>  	case SPRN_TCR:
> +		/*
> +		 * WRC is a 2-bit field that is supposed to preserve its
> +		 * value once written to non-zero.
> +		 */
> +		if (vcpu->arch.tcr & TCR_WRC_MASK) {
> +			spr_val &= ~TCR_WRC_MASK;
> +			spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
> +		}
>  		kvmppc_set_tcr(vcpu, spr_val);
>  		break;
> 
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index
> 87f4dc8..63457cb 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -220,6 +220,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	switch (ext) {
>  #ifdef CONFIG_BOOKE
>  	case KVM_CAP_PPC_BOOKE_SREGS:
> +	case KVM_CAP_PPC_BOOKE_WATCHDOG:
>  #else
>  	case KVM_CAP_PPC_SEGSTATE:
>  	case KVM_CAP_PPC_HIOR:
> @@ -386,13 +387,23 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)  #ifdef
> CONFIG_KVM_EXIT_TIMING
>  	mutex_init(&vcpu->arch.exit_timing_lock);
>  #endif
> -
> +#ifdef CONFIG_BOOKE
> +	spin_lock_init(&vcpu->arch.wdt_lock);
> +	/* setup watchdog timer once */
> +	setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
> +		    (unsigned long)vcpu);
> +#endif
>  	return 0;
>  }
> 
>  void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)  {
>  	kvmppc_mmu_destroy(vcpu);
> +#ifdef CONFIG_BOOKE
> +	spin_lock(&vcpu->arch.wdt_lock);
> +	del_timer(&vcpu->arch.wdt_timer);
> +	spin_unlock(&vcpu->arch.wdt_lock);
> +#endif
>  }
> 
>  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -637,6 +648,12 @@
> static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
>  		r = 0;
>  		vcpu->arch.papr_enabled = true;
>  		break;
> +#ifdef CONFIG_BOOKE
> +	case KVM_CAP_PPC_BOOKE_WATCHDOG:
> +		r = 0;
> +		vcpu->arch.watchdog_enabled = true;
> +		break;
> +#endif
>  #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
>  	case KVM_CAP_SW_TLB: {
>  		struct kvm_config_tlb cfg;
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 2ce09aa..9ff5aa5
> 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -163,6 +163,7 @@ struct kvm_pit_config {
>  #define KVM_EXIT_OSI              18
>  #define KVM_EXIT_PAPR_HCALL	  19
>  #define KVM_EXIT_S390_UCONTROL	  20
> +#define KVM_EXIT_WATCHDOG         21
> 
>  /* For KVM_EXIT_INTERNAL_ERROR */
>  #define KVM_INTERNAL_ERROR_EMULATION 1
> @@ -618,6 +619,7 @@ struct kvm_ppc_smmu_info {  #define
> KVM_CAP_PPC_GET_SMMU_INFO 78  #define KVM_CAP_S390_COW 79  #define
> KVM_CAP_PPC_ALLOC_HTAB 80
> +#define KVM_CAP_PPC_BOOKE_WATCHDOG 81
> 
>  #ifdef KVM_CAP_IRQ_ROUTING
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index
> 27ac8a4..41d32be 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -69,6 +69,7 @@
>  #define KVM_REQ_IMMEDIATE_EXIT    15
>  #define KVM_REQ_PMU               16
>  #define KVM_REQ_PMI               17
> +#define KVM_REQ_WATCHDOG          18
> 
>  #define KVM_USERSPACE_IRQ_SOURCE_ID	0
> 
> --
> 1.7.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 50ea12f..43cac48 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -467,6 +467,8 @@  struct kvm_vcpu_arch {
 	ulong fault_esr;
 	ulong queued_dear;
 	ulong queued_esr;
+	spinlock_t wdt_lock;
+	struct timer_list wdt_timer;
 	u32 tlbcfg[4];
 	u32 mmucfg;
 	u32 epr;
@@ -482,6 +484,7 @@  struct kvm_vcpu_arch {
 	u8 osi_needed;
 	u8 osi_enabled;
 	u8 papr_enabled;
+	u8 watchdog_enabled;
 	u8 sane;
 	u8 cpu_type;
 	u8 hcall_needed;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 0124937..e5cf4b9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -67,6 +67,7 @@  extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 extern void kvmppc_decrementer_func(unsigned long data);
+extern void kvmppc_watchdog_func(unsigned long data);
 extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
 
 /* Core-specific hooks */
@@ -104,6 +105,8 @@  extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                        struct kvm_interrupt *irq);
 extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
                                          struct kvm_interrupt *irq);
+extern void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                                   unsigned int op, int *advance);
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 2d916c4..e07e6af 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -539,6 +539,13 @@ 
 #define TCR_FIE		0x00800000	/* FIT Interrupt Enable */
 #define TCR_ARE		0x00400000	/* Auto Reload Enable */
 
+#ifdef CONFIG_E500
+#define TCR_GET_WP(tcr)  ((((tcr) & 0xC0000000) >> 30) | \
+			      (((tcr) & 0x1E0000) >> 15))
+#else
+#define TCR_GET_WP(tcr)  (((tcr) & 0xC0000000) >> 30)
+#endif
+
 /* Bit definitions for the TSR. */
 #define TSR_ENW		0x80000000	/* Enable Next Watchdog */
 #define TSR_WIS		0x40000000	/* WDT Interrupt Status */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index d25a097..71b4ce9 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -206,6 +206,16 @@  void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
 	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
 }
 
+void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
+}
+
+void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
+{
+	clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
+}
+
 static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
@@ -325,6 +335,7 @@  static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 		msr_mask = MSR_CE | MSR_ME | MSR_DE;
 		int_class = INT_CLASS_NONCRIT;
 		break;
+	case BOOKE_IRQPRIO_WATCHDOG:
 	case BOOKE_IRQPRIO_CRITICAL:
 	case BOOKE_IRQPRIO_DBELL_CRIT:
 		allowed = vcpu->arch.shared->msr & MSR_CE;
@@ -404,12 +415,113 @@  static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	return allowed;
 }
 
+/*
+ * Return the number of jiffies until the next timeout.  If the timeout is
+ * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
+ * because the larger value can break the timer APIs.
+ */
+static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
+{
+	u64 tb, wdt_tb, wdt_ticks = 0;
+	u64 nr_jiffies = 0;
+	u32 period = TCR_GET_WP(vcpu->arch.tcr);
+
+	wdt_tb = 1ULL << (63 - period);
+	tb = get_tb();
+	/*
+	 * The watchdog timeout will hapeen when TB bit corresponding
+	 * to watchdog will toggle from 0 to 1.
+	 */
+	if (tb & wdt_tb)
+		wdt_ticks = wdt_tb;
+
+	wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
+
+	/* Convert timebase ticks to jiffies */
+	nr_jiffies = wdt_ticks;
+
+	if (do_div(nr_jiffies, tb_ticks_per_jiffy))
+		nr_jiffies++;
+
+	return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+}
+
+static void arm_next_watchdog(struct kvm_vcpu *vcpu)
+{
+	unsigned long nr_jiffies;
+
+	spin_lock(&vcpu->arch.wdt_lock);
+	nr_jiffies = watchdog_next_timeout(vcpu);
+	/*
+	 * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
+	 * then do not run the watchdog timer as this can break timer APIs.
+	 */
+	if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+		mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
+	else
+		del_timer(&vcpu->arch.wdt_timer);
+	spin_unlock(&vcpu->arch.wdt_lock);
+}
+
+void kvmppc_watchdog_func(unsigned long data)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+	u32 tsr, new_tsr;
+	int final;
+
+	do {
+		new_tsr = tsr = vcpu->arch.tsr;
+		final = 0;
+
+		/* Time out event */
+		if (tsr & TSR_ENW) {
+			if (tsr & TSR_WIS)
+				final = 1;
+			else
+				new_tsr = tsr | TSR_WIS;
+		} else {
+			new_tsr = tsr | TSR_ENW;
+		}
+	} while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
+
+	if (new_tsr & TSR_WIS) {
+		smp_wmb();
+		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
+
+	/*
+	 * If this is final watchdog expiry and some action is required
+	 * then exit to userspace.
+	 */
+	if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
+	    vcpu->arch.watchdog_enabled) { 
+		smp_wmb();
+		kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
+
+	/*
+	 * Stop running the watchdog timer after final expiration to
+	 * prevent the host from being flooded with timers if the
+	 * guest sets a short period.
+	 * Timers will resume when TSR/TCR is updated next time.
+	 */
+	if (!final)
+		arm_next_watchdog(vcpu);
+}
+
 static void update_timer_ints(struct kvm_vcpu *vcpu)
 {
 	if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
 		kvmppc_core_queue_dec(vcpu);
 	else
 		kvmppc_core_dequeue_dec(vcpu);
+
+	if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
+		kvmppc_core_queue_watchdog(vcpu);
+	else
+		kvmppc_core_dequeue_watchdog(vcpu);
 }
 
 static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
@@ -516,6 +628,13 @@  int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
+	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
+	    (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
+		kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
+		ret = 0;
+		goto out;
+	}
+
 	kvm_guest_enter();
 
 #ifdef CONFIG_PPC_FPU
@@ -978,6 +1097,12 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 	}
 
+	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
+	    (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
+		run->exit_reason = KVM_EXIT_WATCHDOG;
+		r = RESUME_HOST | (r & RESUME_FLAG_NV);
+	}
+
 	return r;
 }
 
@@ -1106,7 +1231,13 @@  static int set_sregs_base(struct kvm_vcpu *vcpu,
 	}
 
 	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+		u32 old_tsr = vcpu->arch.tsr;
+
 		vcpu->arch.tsr = sregs->u.e.tsr;
+
+		if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
+			arm_next_watchdog(vcpu);
+
 		update_timer_ints(vcpu);
 	}
 
@@ -1267,6 +1398,7 @@  void kvmppc_core_commit_memory_region(struct kvm *kvm,
 void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
 {
 	vcpu->arch.tcr = new_tcr;
+	arm_next_watchdog(vcpu);
 	update_timer_ints(vcpu);
 }
 
@@ -1281,6 +1413,14 @@  void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
 void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
 {
 	clear_bits(tsr_bits, &vcpu->arch.tsr);
+
+	/*
+	 * We may have stopped the watchdog due to
+	 * being stuck on final expiration.
+	 */
+	if (tsr_bits & (TSR_ENW | TSR_WIS))
+		arm_next_watchdog(vcpu);
+
 	update_timer_ints(vcpu);
 }
 
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 12834bb..5a66ade 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -145,6 +145,14 @@  int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 		kvmppc_clr_tsr_bits(vcpu, spr_val);
 		break;
 	case SPRN_TCR:
+		/*
+		 * WRC is a 2-bit field that is supposed to preserve its
+		 * value once written to non-zero.
+		 */
+		if (vcpu->arch.tcr & TCR_WRC_MASK) {
+			spr_val &= ~TCR_WRC_MASK;
+			spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
+		}
 		kvmppc_set_tcr(vcpu, spr_val);
 		break;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 87f4dc8..63457cb 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -220,6 +220,7 @@  int kvm_dev_ioctl_check_extension(long ext)
 	switch (ext) {
 #ifdef CONFIG_BOOKE
 	case KVM_CAP_PPC_BOOKE_SREGS:
+	case KVM_CAP_PPC_BOOKE_WATCHDOG:
 #else
 	case KVM_CAP_PPC_SEGSTATE:
 	case KVM_CAP_PPC_HIOR:
@@ -386,13 +387,23 @@  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_KVM_EXIT_TIMING
 	mutex_init(&vcpu->arch.exit_timing_lock);
 #endif
-
+#ifdef CONFIG_BOOKE
+	spin_lock_init(&vcpu->arch.wdt_lock);
+	/* setup watchdog timer once */
+	setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
+		    (unsigned long)vcpu);
+#endif
 	return 0;
 }
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 	kvmppc_mmu_destroy(vcpu);
+#ifdef CONFIG_BOOKE
+	spin_lock(&vcpu->arch.wdt_lock);
+	del_timer(&vcpu->arch.wdt_timer);
+	spin_unlock(&vcpu->arch.wdt_lock);
+#endif
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -637,6 +648,12 @@  static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		r = 0;
 		vcpu->arch.papr_enabled = true;
 		break;
+#ifdef CONFIG_BOOKE
+	case KVM_CAP_PPC_BOOKE_WATCHDOG:
+		r = 0;
+		vcpu->arch.watchdog_enabled = true;
+		break;
+#endif
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
 	case KVM_CAP_SW_TLB: {
 		struct kvm_config_tlb cfg;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2ce09aa..9ff5aa5 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -163,6 +163,7 @@  struct kvm_pit_config {
 #define KVM_EXIT_OSI              18
 #define KVM_EXIT_PAPR_HCALL	  19
 #define KVM_EXIT_S390_UCONTROL	  20
+#define KVM_EXIT_WATCHDOG         21
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
@@ -618,6 +619,7 @@  struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
+#define KVM_CAP_PPC_BOOKE_WATCHDOG 81
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 27ac8a4..41d32be 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -69,6 +69,7 @@ 
 #define KVM_REQ_IMMEDIATE_EXIT    15
 #define KVM_REQ_PMU               16
 #define KVM_REQ_PMI               17
+#define KVM_REQ_WATCHDOG          18
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0