Patchwork [PATCHv3] report that QEMU process was killed by a signal

login
register
mail settings
Submitter Gleb Natapov
Date March 15, 2011, 11:56 a.m.
Message ID <20110315115604.GY10151@redhat.com>
Download mbox | patch
Permalink /patch/86966/
State New
Headers show

Comments

Gleb Natapov - March 15, 2011, 11:56 a.m.
Currently when rogue script kills QEMU process (using TERM/INT/HUP
signal) it looks indistinguishable from system shutdown. Lets report
that QEMU was killed and leave some clues about the killer identity.
  
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
  v1->v2:
    - print message from a main loop instead of signal handler
  v2->v3
    - use pid_t to store pid instead of int 

--
			Gleb.
Gleb Natapov - March 25, 2011, 12:04 p.m.
Ping?

On Tue, Mar 15, 2011 at 01:56:04PM +0200, Gleb Natapov wrote:
> Currently when rogue script kills QEMU process (using TERM/INT/HUP
> signal) it looks indistinguishable from system shutdown. Lets report
> that QEMU was killed and leave some clues about the killer identity.
>   
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>   v1->v2:
>     - print message from a main loop instead of signal handler
>   v2->v3
>     - use pid_t to store pid instead of int 
> 
> diff --git a/os-posix.c b/os-posix.c
> index 38c29d1..36d937c 100644
> --- a/os-posix.c
> +++ b/os-posix.c
> @@ -61,9 +61,9 @@ void os_setup_early_signal_handling(void)
>      sigaction(SIGPIPE, &act, NULL);
>  }
>  
> -static void termsig_handler(int signal)
> +static void termsig_handler(int signal, siginfo_t *info, void *c)
>  {
> -    qemu_system_shutdown_request();
> +    qemu_system_killed(info->si_signo, info->si_pid);
>  }
>  
>  static void sigchld_handler(int signal)
> @@ -76,7 +76,8 @@ void os_setup_signal_handling(void)
>      struct sigaction act;
>  
>      memset(&act, 0, sizeof(act));
> -    act.sa_handler = termsig_handler;
> +    act.sa_sigaction = termsig_handler;
> +    act.sa_flags = SA_SIGINFO;
>      sigaction(SIGINT,  &act, NULL);
>      sigaction(SIGHUP,  &act, NULL);
>      sigaction(SIGTERM, &act, NULL);
> diff --git a/sysemu.h b/sysemu.h
> index 0a83ab9..f868500 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -66,6 +66,8 @@ void qemu_system_vmstop_request(int reason);
>  int qemu_shutdown_requested(void);
>  int qemu_reset_requested(void);
>  int qemu_powerdown_requested(void);
> +void qemu_system_killed(int signal, pid_t pid);
> +void qemu_kill_report(void);
>  extern qemu_irq qemu_system_powerdown;
>  void qemu_system_reset(void);
>  
> diff --git a/vl.c b/vl.c
> index 5e007a7..000c845 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -1213,7 +1213,8 @@ typedef struct QEMUResetEntry {
>  static QTAILQ_HEAD(reset_handlers, QEMUResetEntry) reset_handlers =
>      QTAILQ_HEAD_INITIALIZER(reset_handlers);
>  static int reset_requested;
> -static int shutdown_requested;
> +static int shutdown_requested, shutdown_signal = -1;
> +static pid_t shutdown_pid;
>  static int powerdown_requested;
>  static int debug_requested;
>  static int vmstop_requested;
> @@ -1225,6 +1226,15 @@ int qemu_shutdown_requested(void)
>      return r;
>  }
>  
> +void qemu_kill_report(void)
> +{
> +    if (shutdown_signal != -1) {
> +        fprintf(stderr, "Got signal %d from pid %d\n",
> +                         shutdown_signal, shutdown_pid);
> +        shutdown_signal = -1;
> +    }
> +}
> +
>  int qemu_reset_requested(void)
>  {
>      int r = reset_requested;
> @@ -1298,6 +1308,13 @@ void qemu_system_reset_request(void)
>      qemu_notify_event();
>  }
>  
> +void qemu_system_killed(int signal, pid_t pid)
> +{
> +    shutdown_signal = signal;
> +    shutdown_pid = pid;
> +    qemu_system_shutdown_request();
> +}
> +
>  void qemu_system_shutdown_request(void)
>  {
>      shutdown_requested = 1;
> @@ -1441,6 +1458,7 @@ static void main_loop(void)
>              vm_stop(VMSTOP_DEBUG);
>          }
>          if (qemu_shutdown_requested()) {
> +            qemu_kill_report();
>              monitor_protocol_event(QEVENT_SHUTDOWN, NULL);
>              if (no_shutdown) {
>                  vm_stop(VMSTOP_SHUTDOWN);
> --
> 			Gleb.

--
			Gleb.
Blue Swirl - March 26, 2011, 1:50 p.m.
On Fri, Mar 25, 2011 at 2:04 PM, Gleb Natapov <gleb@redhat.com> wrote:
> Ping?

Does not work:
INT:
Got signal 951049944 from pid 0
TERM:
Got signal -1553068904 from pid 0
HUP:
Got signal 1 from pid 16185
Even here the pid is not correct, it should be 3098.
Gleb Natapov - March 26, 2011, 1:55 p.m.
On Sat, Mar 26, 2011 at 03:50:46PM +0200, Blue Swirl wrote:
> On Fri, Mar 25, 2011 at 2:04 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > Ping?
> 
> Does not work:
> INT:
> Got signal 951049944 from pid 0
> TERM:
> Got signal -1553068904 from pid 0
You use SDL correct? This is SDL problem and I fixed it in SDL upstream.

> HUP:
> Got signal 1 from pid 16185
> Even here the pid is not correct, it should be 3098.
HUP should work. Why do you think that pid should be 3098? Bash has its
own build in kill command IIRC.

--
			Gleb.
Blue Swirl - March 26, 2011, 2:12 p.m.
On Sat, Mar 26, 2011 at 3:55 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Sat, Mar 26, 2011 at 03:50:46PM +0200, Blue Swirl wrote:
>> On Fri, Mar 25, 2011 at 2:04 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > Ping?
>>
>> Does not work:
>> INT:
>> Got signal 951049944 from pid 0
>> TERM:
>> Got signal -1553068904 from pid 0
> You use SDL correct? This is SDL problem and I fixed it in SDL upstream.

OK, with VNC it works.

>> HUP:
>> Got signal 1 from pid 16185
>> Even here the pid is not correct, it should be 3098.
> HUP should work. Why do you think that pid should be 3098? Bash has its
> own build in kill command IIRC.

Right, I used killall which isn't a builtin, sorry. Thanks, applied.
Peter Maydell - March 30, 2011, 6:39 p.m.
On 15 March 2011 11:56, Gleb Natapov <gleb@redhat.com> wrote:
> Currently when rogue script kills QEMU process (using TERM/INT/HUP
> signal) it looks indistinguishable from system shutdown. Lets report
> that QEMU was killed and leave some clues about the killer identity.

Unfortunately this patch causes qemu to segfault when killed
via ^C (at least on my Ubuntu maverick system). This is because
it registers a signal handler with sigaction, but then later
the SDL library is initialised and it reinstalls our handler
with plain old signal:

        ohandler = signal(SIGINT, SDL_HandleSIG);
        if ( ohandler != SIG_DFL )
                signal(SIGINT, ohandler);

This is clearly buggy but on the other hand SDL is pretty widely
deployed and it's the default QEMU video output method, so I think
we need to work around it :-(

The most straightforward fix is to get the signal number from
argument one and not to bother printing the PID that killed us.

-- PMM
Gleb Natapov - March 30, 2011, 6:43 p.m.
On Wed, Mar 30, 2011 at 07:39:31PM +0100, Peter Maydell wrote:
> On 15 March 2011 11:56, Gleb Natapov <gleb@redhat.com> wrote:
> > Currently when rogue script kills QEMU process (using TERM/INT/HUP
> > signal) it looks indistinguishable from system shutdown. Lets report
> > that QEMU was killed and leave some clues about the killer identity.
> 
> Unfortunately this patch causes qemu to segfault when killed
> via ^C (at least on my Ubuntu maverick system). This is because
> it registers a signal handler with sigaction, but then later
> the SDL library is initialised and it reinstalls our handler
> with plain old signal:
> 
>         ohandler = signal(SIGINT, SDL_HandleSIG);
>         if ( ohandler != SIG_DFL )
>                 signal(SIGINT, ohandler);
> 
I fixed this in SDL upstream.

> This is clearly buggy but on the other hand SDL is pretty widely
> deployed and it's the default QEMU video output method, so I think
> we need to work around it :-(
> 
> The most straightforward fix is to get the signal number from
> argument one and not to bother printing the PID that killed us.
> 
For debugging purposes pid is useful. We cam register signal handler
after SDL is initialized though (if waiting for SDL update is not an
option).

--
			Gleb.
Anthony Liguori - March 30, 2011, 6:49 p.m.
On 03/30/2011 01:39 PM, Peter Maydell wrote:
> On 15 March 2011 11:56, Gleb Natapov<gleb@redhat.com>  wrote:
>> Currently when rogue script kills QEMU process (using TERM/INT/HUP
>> signal) it looks indistinguishable from system shutdown. Lets report
>> that QEMU was killed and leave some clues about the killer identity.
> Unfortunately this patch causes qemu to segfault when killed
> via ^C (at least on my Ubuntu maverick system). This is because
> it registers a signal handler with sigaction, but then later
> the SDL library is initialised and it reinstalls our handler
> with plain old signal:
>
>          ohandler = signal(SIGINT, SDL_HandleSIG);
>          if ( ohandler != SIG_DFL )
>                  signal(SIGINT, ohandler);
>
> This is clearly buggy but on the other hand SDL is pretty widely
> deployed and it's the default QEMU video output method, so I think
> we need to work around it :-(
>
> The most straightforward fix is to get the signal number from
> argument one and not to bother printing the PID that killed us.

Or just #ifdefing this to 0 for now and then once an SDL version is 
released that works correctly, adding an appropriate guard.

Regards,

Anthony Liguori

> -- PMM
>
Gleb Natapov - March 30, 2011, 6:51 p.m.
On Wed, Mar 30, 2011 at 01:49:10PM -0500, Anthony Liguori wrote:
> On 03/30/2011 01:39 PM, Peter Maydell wrote:
> >On 15 March 2011 11:56, Gleb Natapov<gleb@redhat.com>  wrote:
> >>Currently when rogue script kills QEMU process (using TERM/INT/HUP
> >>signal) it looks indistinguishable from system shutdown. Lets report
> >>that QEMU was killed and leave some clues about the killer identity.
> >Unfortunately this patch causes qemu to segfault when killed
> >via ^C (at least on my Ubuntu maverick system). This is because
> >it registers a signal handler with sigaction, but then later
> >the SDL library is initialised and it reinstalls our handler
> >with plain old signal:
> >
> >         ohandler = signal(SIGINT, SDL_HandleSIG);
> >         if ( ohandler != SIG_DFL )
> >                 signal(SIGINT, ohandler);
> >
> >This is clearly buggy but on the other hand SDL is pretty widely
> >deployed and it's the default QEMU video output method, so I think
> >we need to work around it :-(
> >
> >The most straightforward fix is to get the signal number from
> >argument one and not to bother printing the PID that killed us.
> 
> Or just #ifdefing this to 0 for now and then once an SDL version is
> released that works correctly, adding an appropriate guard.
> 
I prefer to move signal init after SDL init.

--
			Gleb.
Peter Maydell - March 30, 2011, 6:53 p.m.
On 30 March 2011 19:43, Gleb Natapov <gleb@redhat.com> wrote:
> On Wed, Mar 30, 2011 at 07:39:31PM +0100, Peter Maydell wrote:
>> Unfortunately this patch causes qemu to segfault when killed
>> via ^C (at least on my Ubuntu maverick system). This is because
>> it registers a signal handler with sigaction, but then later
>> the SDL library is initialised and it reinstalls our handler
>> with plain old signal:
>>
>>         ohandler = signal(SIGINT, SDL_HandleSIG);
>>         if ( ohandler != SIG_DFL )
>>                 signal(SIGINT, ohandler);
>>
> I fixed this in SDL upstream.

Cool, but we can't rely on the SDL we're linked against being
a bugfixed version.

>> This is clearly buggy but on the other hand SDL is pretty widely
>> deployed and it's the default QEMU video output method, so I think
>> we need to work around it :-(
>>
>> The most straightforward fix is to get the signal number from
>> argument one and not to bother printing the PID that killed us.
>>
> For debugging purposes pid is useful. We cam register signal handler
> after SDL is initialized though (if waiting for SDL update is not an
> option).

I'm not convinced about the utility of printing the pid, personally.
Most programs get along fine without printing anything when
they receive a terminal signal. However you're right that it should
be straightforward enough to move the signal init. In fact it
looks like this got broken somewhere along the line, the
call to os_setup_signal_handling() is already preceded with a comment:

    /* must be after terminal init, SDL library changes signal handlers */
    os_setup_signal_handling();

...we just aren't actually after the terminal init any more :-(

-- PMM
malc - March 30, 2011, 9:35 p.m.
On Wed, 30 Mar 2011, Peter Maydell wrote:

> On 15 March 2011 11:56, Gleb Natapov <gleb@redhat.com> wrote:
> > Currently when rogue script kills QEMU process (using TERM/INT/HUP
> > signal) it looks indistinguishable from system shutdown. Lets report
> > that QEMU was killed and leave some clues about the killer identity.
> 
> Unfortunately this patch causes qemu to segfault when killed
> via ^C (at least on my Ubuntu maverick system). This is because
> it registers a signal handler with sigaction, but then later
> the SDL library is initialised and it reinstalls our handler
> with plain old signal:
> 
>         ohandler = signal(SIGINT, SDL_HandleSIG);
>         if ( ohandler != SIG_DFL )
>                 signal(SIGINT, ohandler);
> 
> This is clearly buggy but on the other hand SDL is pretty widely
> deployed and it's the default QEMU video output method, so I think
> we need to work around it :-(
> 
> The most straightforward fix is to get the signal number from
> argument one and not to bother printing the PID that killed us.
> 

Maybe using SDL_INIT_NOPARACHUTE is worth doing?
Gleb Natapov - March 31, 2011, 5:35 a.m.
On Thu, Mar 31, 2011 at 01:35:31AM +0400, malc wrote:
> On Wed, 30 Mar 2011, Peter Maydell wrote:
> 
> > On 15 March 2011 11:56, Gleb Natapov <gleb@redhat.com> wrote:
> > > Currently when rogue script kills QEMU process (using TERM/INT/HUP
> > > signal) it looks indistinguishable from system shutdown. Lets report
> > > that QEMU was killed and leave some clues about the killer identity.
> > 
> > Unfortunately this patch causes qemu to segfault when killed
> > via ^C (at least on my Ubuntu maverick system). This is because
> > it registers a signal handler with sigaction, but then later
> > the SDL library is initialised and it reinstalls our handler
> > with plain old signal:
> > 
> >         ohandler = signal(SIGINT, SDL_HandleSIG);
> >         if ( ohandler != SIG_DFL )
> >                 signal(SIGINT, ohandler);
> > 
> > This is clearly buggy but on the other hand SDL is pretty widely
> > deployed and it's the default QEMU video output method, so I think
> > we need to work around it :-(
> > 
> > The most straightforward fix is to get the signal number from
> > argument one and not to bother printing the PID that killed us.
> > 
> 
> Maybe using SDL_INIT_NOPARACHUTE is worth doing?
> 
We do it already, but SDL mangles signal handlers anyway. The funny
thing is that parachute code actually correctly use sigaction when
available.

--
			Gleb.

Patch

diff --git a/os-posix.c b/os-posix.c
index 38c29d1..36d937c 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -61,9 +61,9 @@  void os_setup_early_signal_handling(void)
     sigaction(SIGPIPE, &act, NULL);
 }
 
-static void termsig_handler(int signal)
+static void termsig_handler(int signal, siginfo_t *info, void *c)
 {
-    qemu_system_shutdown_request();
+    qemu_system_killed(info->si_signo, info->si_pid);
 }
 
 static void sigchld_handler(int signal)
@@ -76,7 +76,8 @@  void os_setup_signal_handling(void)
     struct sigaction act;
 
     memset(&act, 0, sizeof(act));
-    act.sa_handler = termsig_handler;
+    act.sa_sigaction = termsig_handler;
+    act.sa_flags = SA_SIGINFO;
     sigaction(SIGINT,  &act, NULL);
     sigaction(SIGHUP,  &act, NULL);
     sigaction(SIGTERM, &act, NULL);
diff --git a/sysemu.h b/sysemu.h
index 0a83ab9..f868500 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -66,6 +66,8 @@  void qemu_system_vmstop_request(int reason);
 int qemu_shutdown_requested(void);
 int qemu_reset_requested(void);
 int qemu_powerdown_requested(void);
+void qemu_system_killed(int signal, pid_t pid);
+void qemu_kill_report(void);
 extern qemu_irq qemu_system_powerdown;
 void qemu_system_reset(void);
 
diff --git a/vl.c b/vl.c
index 5e007a7..000c845 100644
--- a/vl.c
+++ b/vl.c
@@ -1213,7 +1213,8 @@  typedef struct QEMUResetEntry {
 static QTAILQ_HEAD(reset_handlers, QEMUResetEntry) reset_handlers =
     QTAILQ_HEAD_INITIALIZER(reset_handlers);
 static int reset_requested;
-static int shutdown_requested;
+static int shutdown_requested, shutdown_signal = -1;
+static pid_t shutdown_pid;
 static int powerdown_requested;
 static int debug_requested;
 static int vmstop_requested;
@@ -1225,6 +1226,15 @@  int qemu_shutdown_requested(void)
     return r;
 }
 
+void qemu_kill_report(void)
+{
+    if (shutdown_signal != -1) {
+        fprintf(stderr, "Got signal %d from pid %d\n",
+                         shutdown_signal, shutdown_pid);
+        shutdown_signal = -1;
+    }
+}
+
 int qemu_reset_requested(void)
 {
     int r = reset_requested;
@@ -1298,6 +1308,13 @@  void qemu_system_reset_request(void)
     qemu_notify_event();
 }
 
+void qemu_system_killed(int signal, pid_t pid)
+{
+    shutdown_signal = signal;
+    shutdown_pid = pid;
+    qemu_system_shutdown_request();
+}
+
 void qemu_system_shutdown_request(void)
 {
     shutdown_requested = 1;
@@ -1441,6 +1458,7 @@  static void main_loop(void)
             vm_stop(VMSTOP_DEBUG);
         }
         if (qemu_shutdown_requested()) {
+            qemu_kill_report();
             monitor_protocol_event(QEVENT_SHUTDOWN, NULL);
             if (no_shutdown) {
                 vm_stop(VMSTOP_SHUTDOWN);