diff mbox

Bug inkvm_set_irq

Message ID 20110308111320.GA8002@redhat.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Michael S. Tsirkin March 8, 2011, 11:13 a.m. UTC
On Fri, Mar 04, 2011 at 10:39:05AM +0100, Jean-Philippe Menil wrote:
> Yes, it's a 2.6.37.2 kernel.

OK, here's a debugging patch.
Please run with slab debugging as previously until you see
'eventfd bug detected!' in dmesg or until there is a crash.
It might be also useful to enable timestampts on printk with
 Symbol: PRINTK_TIME [=y]
  │ Type  : boolean
  │ Prompt: Show timing information on printks
 
once you see the error, please upload the
full dmesg output somewhere to we can track what
goes on.

Hopefully there won't be an oops this time which
should make it easier for you to test (no need to
reboot).


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jean-Philippe Menil March 9, 2011, 12:28 p.m. UTC | #1
Le 08/03/2011 12:13, Michael S. Tsirkin a écrit :
> On Fri, Mar 04, 2011 at 10:39:05AM +0100, Jean-Philippe Menil wrote:
>> Yes, it's a 2.6.37.2 kernel.
> OK, here's a debugging patch.
> Please run with slab debugging as previously until you see
> 'eventfd bug detected!' in dmesg or until there is a crash.
> It might be also useful to enable timestampts on printk with
>   Symbol: PRINTK_TIME [=y]
>    │ Type  : boolean
>    │ Prompt: Show timing information on printks
>
> once you see the error, please upload the
> full dmesg output somewhere to we can track what
> goes on.
>
> Hopefully there won't be an oops this time which
> should make it easier for you to test (no need to
> reboot).
>
>
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index c1f1e3c..3cb679b 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -32,6 +32,7 @@
>   #include<linux/eventfd.h>
>   #include<linux/kernel.h>
>   #include<linux/slab.h>
> +#include<linux/nmi.h>
>
>   #include "iodev.h"
>
> @@ -43,6 +44,8 @@
>    * --------------------------------------------------------------------
>    */
>
> +#define KVM_BAD_PTR ((void*)(long)(0x6b6b6b6b6b6b6b6bull))
> +
>   struct _irqfd {
>   	struct kvm               *kvm;
>   	struct eventfd_ctx       *eventfd;
> @@ -61,6 +64,13 @@ irqfd_inject(struct work_struct *work)
>   {
>   	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
>   	struct kvm *kvm = irqfd->kvm;
> +	if (kvm == KVM_BAD_PTR) {
> +		printk(KERN_ERR "Eventfd bug detected!\n");
> +		printk(KERN_ERR "%s(work=%p,irqfd=%p,kvm=%p,gsi=%d)\n", __func__,
> +			work, irqfd, kvm, irqfd->gsi);
> +		trigger_all_cpu_backtrace();
> +		return;
> +	}
>
>   	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
>   	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
> @@ -75,6 +85,8 @@ irqfd_shutdown(struct work_struct *work)
>   	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
>   	u64 cnt;
>
> +	printk(KERN_ERR "%s(work=%p,irqfd=%p,kvm=%p, gsi=%d)\n", __func__,
> +	       work, irqfd, irqfd->kvm, irqfd->gsi);
>   	/*
>   	 * Synchronize with the wait-queue and unhook ourselves to prevent
>   	 * further events.
> @@ -91,6 +103,8 @@ irqfd_shutdown(struct work_struct *work)
>   	 * It is now safe to release the object's resources
>   	 */
>   	eventfd_ctx_put(irqfd->eventfd);
> +	printk(KERN_ERR "kfree at %s(work=%p,irqfd=%p)\n", __func__,
> +	       work, irqfd);
>   	kfree(irqfd);
>   }
>
> @@ -111,6 +125,8 @@ static void
>   irqfd_deactivate(struct _irqfd *irqfd)
>   {
>   	BUG_ON(!irqfd_is_active(irqfd));
> +	printk(KERN_ERR "%s(irqfd=%p,kvm=%p, gsi=%d)\n", __func__,
> +	       irqfd, irqfd->kvm, irqfd->gsi);
>
>   	list_del_init(&irqfd->list);
>
> @@ -178,6 +194,8 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
>
>   	irqfd->kvm = kvm;
>   	irqfd->gsi = gsi;
> +	printk(KERN_ERR "%s(irqfd=%p,kvm=%p, gsi=%d)\n", __func__,
> +	       irqfd, kvm, gsi);
>   	INIT_LIST_HEAD(&irqfd->list);
>   	INIT_WORK(&irqfd->inject, irqfd_inject);
>   	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
> @@ -264,6 +282,8 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
>   	struct _irqfd *irqfd, *tmp;
>   	struct eventfd_ctx *eventfd;
>
> +	printk(KERN_ERR "%s(kvm=%p, gsi=%d)\n", __func__,
> +	       kvm, gsi);
>   	eventfd = eventfd_ctx_fdget(fd);
>   	if (IS_ERR(eventfd))
>   		return PTR_ERR(eventfd);
> @@ -305,6 +325,7 @@ void
>   kvm_irqfd_release(struct kvm *kvm)
>   {
>   	struct _irqfd *irqfd, *tmp;
> +	printk(KERN_ERR "%s(kvm=%p)\n", __func__, kvm);
>
>   	spin_lock_irq(&kvm->irqfds.lock);
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hi,

I boot the host with the patched kernel yesterday.
No crach until now, but two "Eventfd bug detected!" in the log at "Mar  
9 02:04:31" and "Mar  9 02:15:17"
You can find part of the log at the following adress:
http://filex.univ-nantes.fr/get?k=jL4Fe7yfSMN57toAH7V

It a split file of the kern.log (1,4G), so if you need another part of 
the log, let me know.

Thanks for all.

Regards.
Michael S. Tsirkin March 9, 2011, 1 p.m. UTC | #2
On Wed, Mar 09, 2011 at 01:28:43PM +0100, Jean-Philippe Menil wrote:
> Le 08/03/2011 12:13, Michael S. Tsirkin a écrit :
> >On Fri, Mar 04, 2011 at 10:39:05AM +0100, Jean-Philippe Menil wrote:
> >>Yes, it's a 2.6.37.2 kernel.
> >OK, here's a debugging patch.
> >Please run with slab debugging as previously until you see
> >'eventfd bug detected!' in dmesg or until there is a crash.
> >It might be also useful to enable timestampts on printk with
> >  Symbol: PRINTK_TIME [=y]
> >   │ Type  : boolean
> >   │ Prompt: Show timing information on printks
> >
> >once you see the error, please upload the
> >full dmesg output somewhere to we can track what
> >goes on.
> >
> >Hopefully there won't be an oops this time which
> >should make it easier for you to test (no need to
> >reboot).
> >
> >
> >diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> >index c1f1e3c..3cb679b 100644
> >--- a/virt/kvm/eventfd.c
> >+++ b/virt/kvm/eventfd.c
> >@@ -32,6 +32,7 @@
> >  #include<linux/eventfd.h>
> >  #include<linux/kernel.h>
> >  #include<linux/slab.h>
> >+#include<linux/nmi.h>
> >
> >  #include "iodev.h"
> >
> >@@ -43,6 +44,8 @@
> >   * --------------------------------------------------------------------
> >   */
> >
> >+#define KVM_BAD_PTR ((void*)(long)(0x6b6b6b6b6b6b6b6bull))
> >+
> >  struct _irqfd {
> >  	struct kvm               *kvm;
> >  	struct eventfd_ctx       *eventfd;
> >@@ -61,6 +64,13 @@ irqfd_inject(struct work_struct *work)
> >  {
> >  	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
> >  	struct kvm *kvm = irqfd->kvm;
> >+	if (kvm == KVM_BAD_PTR) {
> >+		printk(KERN_ERR "Eventfd bug detected!\n");
> >+		printk(KERN_ERR "%s(work=%p,irqfd=%p,kvm=%p,gsi=%d)\n", __func__,
> >+			work, irqfd, kvm, irqfd->gsi);
> >+		trigger_all_cpu_backtrace();
> >+		return;
> >+	}
> >
> >  	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
> >  	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
> >@@ -75,6 +85,8 @@ irqfd_shutdown(struct work_struct *work)
> >  	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
> >  	u64 cnt;
> >
> >+	printk(KERN_ERR "%s(work=%p,irqfd=%p,kvm=%p, gsi=%d)\n", __func__,
> >+	       work, irqfd, irqfd->kvm, irqfd->gsi);
> >  	/*
> >  	 * Synchronize with the wait-queue and unhook ourselves to prevent
> >  	 * further events.
> >@@ -91,6 +103,8 @@ irqfd_shutdown(struct work_struct *work)
> >  	 * It is now safe to release the object's resources
> >  	 */
> >  	eventfd_ctx_put(irqfd->eventfd);
> >+	printk(KERN_ERR "kfree at %s(work=%p,irqfd=%p)\n", __func__,
> >+	       work, irqfd);
> >  	kfree(irqfd);
> >  }
> >
> >@@ -111,6 +125,8 @@ static void
> >  irqfd_deactivate(struct _irqfd *irqfd)
> >  {
> >  	BUG_ON(!irqfd_is_active(irqfd));
> >+	printk(KERN_ERR "%s(irqfd=%p,kvm=%p, gsi=%d)\n", __func__,
> >+	       irqfd, irqfd->kvm, irqfd->gsi);
> >
> >  	list_del_init(&irqfd->list);
> >
> >@@ -178,6 +194,8 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> >
> >  	irqfd->kvm = kvm;
> >  	irqfd->gsi = gsi;
> >+	printk(KERN_ERR "%s(irqfd=%p,kvm=%p, gsi=%d)\n", __func__,
> >+	       irqfd, kvm, gsi);
> >  	INIT_LIST_HEAD(&irqfd->list);
> >  	INIT_WORK(&irqfd->inject, irqfd_inject);
> >  	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
> >@@ -264,6 +282,8 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
> >  	struct _irqfd *irqfd, *tmp;
> >  	struct eventfd_ctx *eventfd;
> >
> >+	printk(KERN_ERR "%s(kvm=%p, gsi=%d)\n", __func__,
> >+	       kvm, gsi);
> >  	eventfd = eventfd_ctx_fdget(fd);
> >  	if (IS_ERR(eventfd))
> >  		return PTR_ERR(eventfd);
> >@@ -305,6 +325,7 @@ void
> >  kvm_irqfd_release(struct kvm *kvm)
> >  {
> >  	struct _irqfd *irqfd, *tmp;
> >+	printk(KERN_ERR "%s(kvm=%p)\n", __func__, kvm);
> >
> >  	spin_lock_irq(&kvm->irqfds.lock);
> >
> >--
> >To unsubscribe from this list: send the line "unsubscribe netdev" in
> >the body of a message to majordomo@vger.kernel.org
> >More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Hi,
> 
> I boot the host with the patched kernel yesterday.
> No crach until now, but two "Eventfd bug detected!" in the log at
> "Mar  9 02:04:31" and "Mar  9 02:15:17"
> You can find part of the log at the following adress:
> http://filex.univ-nantes.fr/get?k=jL4Fe7yfSMN57toAH7V
> 
> It a split file of the kern.log (1,4G), so if you need another part
> of the log, let me know.
> 
> Thanks for all.
> 
> Regards.

Downloading, it's big :)
What about some 1000 lines before and after Eventfd bug detected! line?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jean-Philippe Menil March 9, 2011, 1:12 p.m. UTC | #3
Le 09/03/2011 14:00, Michael S. Tsirkin a écrit :
> On Wed, Mar 09, 2011 at 01:28:43PM +0100, Jean-Philippe Menil wrote:
>> Le 08/03/2011 12:13, Michael S. Tsirkin a écrit :
>>> On Fri, Mar 04, 2011 at 10:39:05AM +0100, Jean-Philippe Menil wrote:
>>>> Yes, it's a 2.6.37.2 kernel.
>>> OK, here's a debugging patch.
>>> Please run with slab debugging as previously until you see
>>> 'eventfd bug detected!' in dmesg or until there is a crash.
>>> It might be also useful to enable timestampts on printk with
>>>   Symbol: PRINTK_TIME [=y]
>>>    │ Type  : boolean
>>>    │ Prompt: Show timing information on printks
>>>
>>> once you see the error, please upload the
>>> full dmesg output somewhere to we can track what
>>> goes on.
>>>
>>> Hopefully there won't be an oops this time which
>>> should make it easier for you to test (no need to
>>> reboot).
>>>
>>>
>>> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
>>> index c1f1e3c..3cb679b 100644
>>> --- a/virt/kvm/eventfd.c
>>> +++ b/virt/kvm/eventfd.c
>>> @@ -32,6 +32,7 @@
>>>   #include<linux/eventfd.h>
>>>   #include<linux/kernel.h>
>>>   #include<linux/slab.h>
>>> +#include<linux/nmi.h>
>>>
>>>   #include "iodev.h"
>>>
>>> @@ -43,6 +44,8 @@
>>>    * --------------------------------------------------------------------
>>>    */
>>>
>>> +#define KVM_BAD_PTR ((void*)(long)(0x6b6b6b6b6b6b6b6bull))
>>> +
>>>   struct _irqfd {
>>>   	struct kvm               *kvm;
>>>   	struct eventfd_ctx       *eventfd;
>>> @@ -61,6 +64,13 @@ irqfd_inject(struct work_struct *work)
>>>   {
>>>   	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
>>>   	struct kvm *kvm = irqfd->kvm;
>>> +	if (kvm == KVM_BAD_PTR) {
>>> +		printk(KERN_ERR "Eventfd bug detected!\n");
>>> +		printk(KERN_ERR "%s(work=%p,irqfd=%p,kvm=%p,gsi=%d)\n", __func__,
>>> +			work, irqfd, kvm, irqfd->gsi);
>>> +		trigger_all_cpu_backtrace();
>>> +		return;
>>> +	}
>>>
>>>   	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
>>>   	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
>>> @@ -75,6 +85,8 @@ irqfd_shutdown(struct work_struct *work)
>>>   	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
>>>   	u64 cnt;
>>>
>>> +	printk(KERN_ERR "%s(work=%p,irqfd=%p,kvm=%p, gsi=%d)\n", __func__,
>>> +	       work, irqfd, irqfd->kvm, irqfd->gsi);
>>>   	/*
>>>   	 * Synchronize with the wait-queue and unhook ourselves to prevent
>>>   	 * further events.
>>> @@ -91,6 +103,8 @@ irqfd_shutdown(struct work_struct *work)
>>>   	 * It is now safe to release the object's resources
>>>   	 */
>>>   	eventfd_ctx_put(irqfd->eventfd);
>>> +	printk(KERN_ERR "kfree at %s(work=%p,irqfd=%p)\n", __func__,
>>> +	       work, irqfd);
>>>   	kfree(irqfd);
>>>   }
>>>
>>> @@ -111,6 +125,8 @@ static void
>>>   irqfd_deactivate(struct _irqfd *irqfd)
>>>   {
>>>   	BUG_ON(!irqfd_is_active(irqfd));
>>> +	printk(KERN_ERR "%s(irqfd=%p,kvm=%p, gsi=%d)\n", __func__,
>>> +	       irqfd, irqfd->kvm, irqfd->gsi);
>>>
>>>   	list_del_init(&irqfd->list);
>>>
>>> @@ -178,6 +194,8 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
>>>
>>>   	irqfd->kvm = kvm;
>>>   	irqfd->gsi = gsi;
>>> +	printk(KERN_ERR "%s(irqfd=%p,kvm=%p, gsi=%d)\n", __func__,
>>> +	       irqfd, kvm, gsi);
>>>   	INIT_LIST_HEAD(&irqfd->list);
>>>   	INIT_WORK(&irqfd->inject, irqfd_inject);
>>>   	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
>>> @@ -264,6 +282,8 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
>>>   	struct _irqfd *irqfd, *tmp;
>>>   	struct eventfd_ctx *eventfd;
>>>
>>> +	printk(KERN_ERR "%s(kvm=%p, gsi=%d)\n", __func__,
>>> +	       kvm, gsi);
>>>   	eventfd = eventfd_ctx_fdget(fd);
>>>   	if (IS_ERR(eventfd))
>>>   		return PTR_ERR(eventfd);
>>> @@ -305,6 +325,7 @@ void
>>>   kvm_irqfd_release(struct kvm *kvm)
>>>   {
>>>   	struct _irqfd *irqfd, *tmp;
>>> +	printk(KERN_ERR "%s(kvm=%p)\n", __func__, kvm);
>>>
>>>   	spin_lock_irq(&kvm->irqfds.lock);
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Hi,
>>
>> I boot the host with the patched kernel yesterday.
>> No crach until now, but two "Eventfd bug detected!" in the log at
>> "Mar  9 02:04:31" and "Mar  9 02:15:17"
>> You can find part of the log at the following adress:
>> http://filex.univ-nantes.fr/get?k=jL4Fe7yfSMN57toAH7V
>>
>> It a split file of the kern.log (1,4G), so if you need another part
>> of the log, let me know.
>>
>> Thanks for all.
>>
>> Regards.
> Downloading, it's big :)
> What about some 1000 lines before and after Eventfd bug detected! line?
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yes, sorry about that.
I could have split my log into a smaller file.

I was a little afraid of not transmit enough informations, and i was a 
bit wide.
I hope you can find usefull trace anyway.

Regards.
diff mbox

Patch

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c1f1e3c..3cb679b 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -32,6 +32,7 @@ 
 #include <linux/eventfd.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/nmi.h>
 
 #include "iodev.h"
 
@@ -43,6 +44,8 @@ 
  * --------------------------------------------------------------------
  */
 
+#define KVM_BAD_PTR ((void*)(long)(0x6b6b6b6b6b6b6b6bull))
+
 struct _irqfd {
 	struct kvm               *kvm;
 	struct eventfd_ctx       *eventfd;
@@ -61,6 +64,13 @@  irqfd_inject(struct work_struct *work)
 {
 	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
 	struct kvm *kvm = irqfd->kvm;
+	if (kvm == KVM_BAD_PTR) {
+		printk(KERN_ERR "Eventfd bug detected!\n");
+		printk(KERN_ERR "%s(work=%p,irqfd=%p,kvm=%p,gsi=%d)\n", __func__,
+			work, irqfd, kvm, irqfd->gsi);
+		trigger_all_cpu_backtrace();
+		return;
+	}
 
 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
@@ -75,6 +85,8 @@  irqfd_shutdown(struct work_struct *work)
 	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
 	u64 cnt;
 
+	printk(KERN_ERR "%s(work=%p,irqfd=%p,kvm=%p, gsi=%d)\n", __func__,
+	       work, irqfd, irqfd->kvm, irqfd->gsi);
 	/*
 	 * Synchronize with the wait-queue and unhook ourselves to prevent
 	 * further events.
@@ -91,6 +103,8 @@  irqfd_shutdown(struct work_struct *work)
 	 * It is now safe to release the object's resources
 	 */
 	eventfd_ctx_put(irqfd->eventfd);
+	printk(KERN_ERR "kfree at %s(work=%p,irqfd=%p)\n", __func__,
+	       work, irqfd);
 	kfree(irqfd);
 }
 
@@ -111,6 +125,8 @@  static void
 irqfd_deactivate(struct _irqfd *irqfd)
 {
 	BUG_ON(!irqfd_is_active(irqfd));
+	printk(KERN_ERR "%s(irqfd=%p,kvm=%p, gsi=%d)\n", __func__,
+	       irqfd, irqfd->kvm, irqfd->gsi);
 
 	list_del_init(&irqfd->list);
 
@@ -178,6 +194,8 @@  kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 
 	irqfd->kvm = kvm;
 	irqfd->gsi = gsi;
+	printk(KERN_ERR "%s(irqfd=%p,kvm=%p, gsi=%d)\n", __func__,
+	       irqfd, kvm, gsi);
 	INIT_LIST_HEAD(&irqfd->list);
 	INIT_WORK(&irqfd->inject, irqfd_inject);
 	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
@@ -264,6 +282,8 @@  kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
 	struct _irqfd *irqfd, *tmp;
 	struct eventfd_ctx *eventfd;
 
+	printk(KERN_ERR "%s(kvm=%p, gsi=%d)\n", __func__,
+	       kvm, gsi);
 	eventfd = eventfd_ctx_fdget(fd);
 	if (IS_ERR(eventfd))
 		return PTR_ERR(eventfd);
@@ -305,6 +325,7 @@  void
 kvm_irqfd_release(struct kvm *kvm)
 {
 	struct _irqfd *irqfd, *tmp;
+	printk(KERN_ERR "%s(kvm=%p)\n", __func__, kvm);
 
 	spin_lock_irq(&kvm->irqfds.lock);