Patchwork GuestAgent: PIDFILE remains when daemon start fails

login
register
mail settings
Submitter MATSUDA, Daiki
Date Jan. 5, 2012, 10:26 p.m.
Message ID <4F062390.6070007@intellilink.co.jp>
Download mbox | patch
Permalink /patch/134551/
State New
Headers show

Comments

MATSUDA, Daiki - Jan. 5, 2012, 10:26 p.m.
Hi, all.

I am trying QEMU Guest Agent and encountered a small bug. It is that the
PIDFILE remains when daemon start fails. And maybe forgotton to g_free().

MATSUDA, Daiki
Michael Roth - Jan. 6, 2012, 12:18 a.m.
On 01/05/2012 04:26 PM, MATSUDA, Daiki wrote:
> Hi, all.
>
> I am trying QEMU Guest Agent and encountered a small bug. It is that the
> PIDFILE remains when daemon start fails. And maybe forgotton to g_free().
>
> MATSUDA, Daiki
>

Thanks for the patch. There was some contention in the past about 
whether or not to clean up pidfiles when there was abnormal termination, 
but personally I like this approach better. Luiz?:

http://www.mail-archive.com/qemu-devel@nongnu.org/msg67889.html

Patch looks good. We're missing some cleanup (gio channels, g_main_loop, 
etc.), but that can be added later, and some of those make more sense to 
have in main(). One suggestion below though:

>
>
> diff -uNrp qemu/qemu-ga.c qemu-c47f3223658119219bbe0b8d09da733d1c06e76f/qemu-ga.c
> --- qemu/qemu-ga.c	2012-01-05 01:06:25.000000000 +0900
> +++ qemu-c47f3223658119219bbe0b8d09da733d1c06e76f/qemu-ga.c	2012-01-06 07:07:03.807872085 +0900
> @@ -49,6 +49,13 @@ struct GAState {
>  };
>
>  static struct GAState *ga_state;
> +const char *pidfile = QGA_PIDFILE_DEFAULT;
> +
> +static void cleanup(void)
> +{
> +    g_free(ga_state);
> +    unlink(pidfile);
> +}
>
>  static void quit_handler(int sig)
>  {
> @@ -70,6 +77,7 @@ static void register_signal_handlers(voi
>      ret = sigaction(SIGINT, &sigact, NULL);
>      if (ret == -1) {
>          g_error("error configuring signal handler: %s", strerror(errno));
> +        cleanup();
>          exit(EXIT_FAILURE);
>      }
>      ret = sigaction(SIGTERM, &sigact, NULL);
> @@ -485,6 +493,7 @@ static void init_guest_agent(GAState *s)
>      if (s->path == NULL) {
>          if (strcmp(s->method, "virtio-serial") != 0) {
>              g_critical("must specify a path for this channel");
> +            cleanup();

we should probably just do a "return false;" or something here, check 
for return value in main(), and call cleanup()/exit() there. Looks a 
little nicer at least, and make it easier to determine when to cleanup 
(hard to tell whether init_guest_agent() was run before/after pidfile 
creation here, for instance, but obvious in main()). Same with 
register_signal_handlers() actually.

>              exit(EXIT_FAILURE);
>          }
>          /* try the default path for the virtio-serial port */
> @@ -496,17 +505,20 @@ static void init_guest_agent(GAState *s)
>          fd = qemu_open(s->path, O_RDWR | O_NONBLOCK | O_ASYNC);
>          if (fd == -1) {
>              g_critical("error opening channel: %s", strerror(errno));
> +            cleanup();
>              exit(EXIT_FAILURE);
>          }
>          ret = conn_channel_add(s, fd);
>          if (ret) {
>              g_critical("error adding channel to main loop");
> +            cleanup();
>              exit(EXIT_FAILURE);
>          }
>      } else if (strcmp(s->method, "isa-serial") == 0) {
>          fd = qemu_open(s->path, O_RDWR | O_NOCTTY);
>          if (fd == -1) {
>              g_critical("error opening channel: %s", strerror(errno));
> +            cleanup();

same here. etc.

>              exit(EXIT_FAILURE);
>          }
>          tcgetattr(fd, &tio);
> @@ -533,15 +545,18 @@ static void init_guest_agent(GAState *s)
>          fd = unix_listen(s->path, NULL, strlen(s->path));
>          if (fd == -1) {
>              g_critical("error opening path: %s", strerror(errno));
> +            cleanup();
>              exit(EXIT_FAILURE);
>          }
>          ret = listen_channel_add(s, fd, true);
>          if (ret) {
>              g_critical("error binding/listening to specified socket");
> +            cleanup();
>              exit(EXIT_FAILURE);
>          }
>      } else {
>          g_critical("unsupported channel method/type: %s", s->method);
> +        cleanup();
>          exit(EXIT_FAILURE);
>      }
>
> @@ -552,7 +567,7 @@ static void init_guest_agent(GAState *s)
>  int main(int argc, char **argv)
>  {
>      const char *sopt = "hVvdm:p:l:f:b:";
> -    const char *method = NULL, *path = NULL, *pidfile = QGA_PIDFILE_DEFAULT;
> +    const char *method = NULL, *path = NULL;
>      const struct option lopt[] = {
>          { "help", 0, NULL, 'h' },
>          { "version", 0, NULL, 'V' },
> @@ -662,7 +677,7 @@ int main(int argc, char **argv)
>      g_main_loop_run(ga_state->main_loop);
>
>      ga_command_state_cleanup_all(ga_state->command_state);
> -    unlink(pidfile);
> +    cleanup();
>
>      return 0;
>  }
Daniel P. Berrange - Jan. 6, 2012, 10:56 a.m.
On Thu, Jan 05, 2012 at 06:18:26PM -0600, Michael Roth wrote:
> On 01/05/2012 04:26 PM, MATSUDA, Daiki wrote:
> >Hi, all.
> >
> >I am trying QEMU Guest Agent and encountered a small bug. It is that the
> >PIDFILE remains when daemon start fails. And maybe forgotton to g_free().
> >
> >MATSUDA, Daiki
> >
> 
> Thanks for the patch. There was some contention in the past about
> whether or not to clean up pidfiles when there was abnormal
> termination, but personally I like this approach better.

Yep, this still leaves open the problem of pidfile cleanup when the
daemon crashes. For libvirtd we recently switched over to a crash-safe
pidfile acquisition design, that uses fcntl(F_SETLK) to maintain
exclusive access over the pidfile. With this you don't need to worry
about forgetting to unlink() on termination, since the POSIX lock is
automatically released when process exits (or crashes).

If you're interested in copying the algorithm libvirt uses for pidfiles
then look at the virPidFileAcquirePath() function, starting line 308,
and also corresponding virPidFileReleasePath() to (optionally) call
on shutdown:

http://libvirt.org/git/?p=libvirt.git;a=blob;f=src/util/virpidfile.c;hb=HEAD

Regards,
Daniel
Michael Roth - Jan. 6, 2012, 5 p.m.
On 01/06/2012 04:56 AM, Daniel P. Berrange wrote:
> On Thu, Jan 05, 2012 at 06:18:26PM -0600, Michael Roth wrote:
>> On 01/05/2012 04:26 PM, MATSUDA, Daiki wrote:
>>> Hi, all.
>>>
>>> I am trying QEMU Guest Agent and encountered a small bug. It is that the
>>> PIDFILE remains when daemon start fails. And maybe forgotton to g_free().
>>>
>>> MATSUDA, Daiki
>>>
>>
>> Thanks for the patch. There was some contention in the past about
>> whether or not to clean up pidfiles when there was abnormal
>> termination, but personally I like this approach better.
>
> Yep, this still leaves open the problem of pidfile cleanup when the
> daemon crashes. For libvirtd we recently switched over to a crash-safe
> pidfile acquisition design, that uses fcntl(F_SETLK) to maintain
> exclusive access over the pidfile. With this you don't need to worry
> about forgetting to unlink() on termination, since the POSIX lock is
> automatically released when process exits (or crashes).

Yup, we did the same at some point via lockf(). An argument was made 
that stale PID files from unresolved crashes should stick around, so we 
dropped it. I think we should re-evaluate that decision...libvirt taking 
the same approach is pretty good precedence for me. I don't expect to 
have state from crashed programs interrupting attempts to restart them, 
it's more an unpleasant surprise than a feature, I think.

In any case, we should clean PID files when we know/report the reason 
for the exit, at least. So the patches are good in that regard, but 
unecessary if we go back to the F_SETLK approach, which I'd prefer.

>
> If you're interested in copying the algorithm libvirt uses for pidfiles
> then look at the virPidFileAcquirePath() function, starting line 308,
> and also corresponding virPidFileReleasePath() to (optionally) call
> on shutdown:
>
> http://libvirt.org/git/?p=libvirt.git;a=blob;f=src/util/virpidfile.c;hb=HEAD
>
> Regards,
> Daniel
Daniel P. Berrange - Jan. 6, 2012, 5:05 p.m.
On Fri, Jan 06, 2012 at 11:00:42AM -0600, Michael Roth wrote:
> On 01/06/2012 04:56 AM, Daniel P. Berrange wrote:
> >On Thu, Jan 05, 2012 at 06:18:26PM -0600, Michael Roth wrote:
> >>On 01/05/2012 04:26 PM, MATSUDA, Daiki wrote:
> >>>Hi, all.
> >>>
> >>>I am trying QEMU Guest Agent and encountered a small bug. It is that the
> >>>PIDFILE remains when daemon start fails. And maybe forgotton to g_free().
> >>>
> >>>MATSUDA, Daiki
> >>>
> >>
> >>Thanks for the patch. There was some contention in the past about
> >>whether or not to clean up pidfiles when there was abnormal
> >>termination, but personally I like this approach better.
> >
> >Yep, this still leaves open the problem of pidfile cleanup when the
> >daemon crashes. For libvirtd we recently switched over to a crash-safe
> >pidfile acquisition design, that uses fcntl(F_SETLK) to maintain
> >exclusive access over the pidfile. With this you don't need to worry
> >about forgetting to unlink() on termination, since the POSIX lock is
> >automatically released when process exits (or crashes).
> 
> Yup, we did the same at some point via lockf(). An argument was made
> that stale PID files from unresolved crashes should stick around, so
> we dropped it. I think we should re-evaluate that decision...libvirt
> taking the same approach is pretty good precedence for me. I don't
> expect to have state from crashed programs interrupting attempts to
> restart them, it's more an unpleasant surprise than a feature, I
> think.

Yeah, I think that is rather unpleasant, particularly for something
like qemu guest agent, which we want to try to ensure is reliably
running. In any case, if qemu guest agent is being launched by
something like SystemD, then you can configure whether systemd
will auto-restart it when it dies with non-zero exit status, so
I don't think we should delibrately leave stale pidfiles for that
scenario.

Regards,
Daniel
Luiz Capitulino - Jan. 6, 2012, 7:06 p.m.
On Fri, 6 Jan 2012 17:05:53 +0000
"Daniel P. Berrange" <berrange@redhat.com> wrote:

> On Fri, Jan 06, 2012 at 11:00:42AM -0600, Michael Roth wrote:
> > On 01/06/2012 04:56 AM, Daniel P. Berrange wrote:
> > >On Thu, Jan 05, 2012 at 06:18:26PM -0600, Michael Roth wrote:
> > >>On 01/05/2012 04:26 PM, MATSUDA, Daiki wrote:
> > >>>Hi, all.
> > >>>
> > >>>I am trying QEMU Guest Agent and encountered a small bug. It is that the
> > >>>PIDFILE remains when daemon start fails. And maybe forgotton to g_free().
> > >>>
> > >>>MATSUDA, Daiki
> > >>>
> > >>
> > >>Thanks for the patch. There was some contention in the past about
> > >>whether or not to clean up pidfiles when there was abnormal
> > >>termination, but personally I like this approach better.

Ok, but can't we use atexit() instead then?

> > >
> > >Yep, this still leaves open the problem of pidfile cleanup when the
> > >daemon crashes. For libvirtd we recently switched over to a crash-safe
> > >pidfile acquisition design, that uses fcntl(F_SETLK) to maintain
> > >exclusive access over the pidfile. With this you don't need to worry
> > >about forgetting to unlink() on termination, since the POSIX lock is
> > >automatically released when process exits (or crashes).
> > 
> > Yup, we did the same at some point via lockf(). An argument was made
> > that stale PID files from unresolved crashes should stick around, so
> > we dropped it. I think we should re-evaluate that decision...libvirt
> > taking the same approach is pretty good precedence for me. I don't
> > expect to have state from crashed programs interrupting attempts to
> > restart them, it's more an unpleasant surprise than a feature, I
> > think.

Ok, I'll agree with you this time. Let's do it.

> 
> Yeah, I think that is rather unpleasant, particularly for something
> like qemu guest agent, which we want to try to ensure is reliably
> running. In any case, if qemu guest agent is being launched by
> something like SystemD, then you can configure whether systemd
> will auto-restart it when it dies with non-zero exit status, so
> I don't think we should delibrately leave stale pidfiles for that
> scenario.
> 
> Regards,
> Daniel
Michael Roth - Jan. 6, 2012, 8:09 p.m.
On 01/06/2012 01:06 PM, Luiz Capitulino wrote:
> On Fri, 6 Jan 2012 17:05:53 +0000
> "Daniel P. Berrange"<berrange@redhat.com>  wrote:
>
>> On Fri, Jan 06, 2012 at 11:00:42AM -0600, Michael Roth wrote:
>>> On 01/06/2012 04:56 AM, Daniel P. Berrange wrote:
>>>> On Thu, Jan 05, 2012 at 06:18:26PM -0600, Michael Roth wrote:
>>>>> On 01/05/2012 04:26 PM, MATSUDA, Daiki wrote:
>>>>>> Hi, all.
>>>>>>
>>>>>> I am trying QEMU Guest Agent and encountered a small bug. It is that the
>>>>>> PIDFILE remains when daemon start fails. And maybe forgotton to g_free().
>>>>>>
>>>>>> MATSUDA, Daiki
>>>>>>
>>>>>
>>>>> Thanks for the patch. There was some contention in the past about
>>>>> whether or not to clean up pidfiles when there was abnormal
>>>>> termination, but personally I like this approach better.
>
> Ok, but can't we use atexit() instead then?

I guess I prefer it to this patch, but I don't believe that covers 
segfaults and the like, so maybe a combination of atexit() and F_SETLK 
would be best (as F_SETLK can still leave stale PID files, they just 
wouldn't obstruct subsequent instances, but we should still clean them 
up whenever we can)

>
>>>>
>>>> Yep, this still leaves open the problem of pidfile cleanup when the
>>>> daemon crashes. For libvirtd we recently switched over to a crash-safe
>>>> pidfile acquisition design, that uses fcntl(F_SETLK) to maintain
>>>> exclusive access over the pidfile. With this you don't need to worry
>>>> about forgetting to unlink() on termination, since the POSIX lock is
>>>> automatically released when process exits (or crashes).
>>>
>>> Yup, we did the same at some point via lockf(). An argument was made
>>> that stale PID files from unresolved crashes should stick around, so
>>> we dropped it. I think we should re-evaluate that decision...libvirt
>>> taking the same approach is pretty good precedence for me. I don't
>>> expect to have state from crashed programs interrupting attempts to
>>> restart them, it's more an unpleasant surprise than a feature, I
>>> think.
>
> Ok, I'll agree with you this time. Let's do it.
>
>>
>> Yeah, I think that is rather unpleasant, particularly for something
>> like qemu guest agent, which we want to try to ensure is reliably
>> running. In any case, if qemu guest agent is being launched by
>> something like SystemD, then you can configure whether systemd
>> will auto-restart it when it dies with non-zero exit status, so
>> I don't think we should delibrately leave stale pidfiles for that
>> scenario.
>>
>> Regards,
>> Daniel
>
Luiz Capitulino - Jan. 6, 2012, 8:18 p.m.
On Fri, 06 Jan 2012 14:09:41 -0600
Michael Roth <mdroth@linux.vnet.ibm.com> wrote:

> On 01/06/2012 01:06 PM, Luiz Capitulino wrote:
> > On Fri, 6 Jan 2012 17:05:53 +0000
> > "Daniel P. Berrange"<berrange@redhat.com>  wrote:
> >
> >> On Fri, Jan 06, 2012 at 11:00:42AM -0600, Michael Roth wrote:
> >>> On 01/06/2012 04:56 AM, Daniel P. Berrange wrote:
> >>>> On Thu, Jan 05, 2012 at 06:18:26PM -0600, Michael Roth wrote:
> >>>>> On 01/05/2012 04:26 PM, MATSUDA, Daiki wrote:
> >>>>>> Hi, all.
> >>>>>>
> >>>>>> I am trying QEMU Guest Agent and encountered a small bug. It is that the
> >>>>>> PIDFILE remains when daemon start fails. And maybe forgotton to g_free().
> >>>>>>
> >>>>>> MATSUDA, Daiki
> >>>>>>
> >>>>>
> >>>>> Thanks for the patch. There was some contention in the past about
> >>>>> whether or not to clean up pidfiles when there was abnormal
> >>>>> termination, but personally I like this approach better.
> >
> > Ok, but can't we use atexit() instead then?
> 
> I guess I prefer it to this patch, but I don't believe that covers 
> segfaults and the like, so maybe a combination of atexit() and F_SETLK 
> would be best (as F_SETLK can still leave stale PID files, they just 
> wouldn't obstruct subsequent instances, but we should still clean them 
> up whenever we can)

Agreed.

> 
> >
> >>>>
> >>>> Yep, this still leaves open the problem of pidfile cleanup when the
> >>>> daemon crashes. For libvirtd we recently switched over to a crash-safe
> >>>> pidfile acquisition design, that uses fcntl(F_SETLK) to maintain
> >>>> exclusive access over the pidfile. With this you don't need to worry
> >>>> about forgetting to unlink() on termination, since the POSIX lock is
> >>>> automatically released when process exits (or crashes).
> >>>
> >>> Yup, we did the same at some point via lockf(). An argument was made
> >>> that stale PID files from unresolved crashes should stick around, so
> >>> we dropped it. I think we should re-evaluate that decision...libvirt
> >>> taking the same approach is pretty good precedence for me. I don't
> >>> expect to have state from crashed programs interrupting attempts to
> >>> restart them, it's more an unpleasant surprise than a feature, I
> >>> think.
> >
> > Ok, I'll agree with you this time. Let's do it.
> >
> >>
> >> Yeah, I think that is rather unpleasant, particularly for something
> >> like qemu guest agent, which we want to try to ensure is reliably
> >> running. In any case, if qemu guest agent is being launched by
> >> something like SystemD, then you can configure whether systemd
> >> will auto-restart it when it dies with non-zero exit status, so
> >> I don't think we should delibrately leave stale pidfiles for that
> >> scenario.
> >>
> >> Regards,
> >> Daniel
> >
>

Patch

diff -uNrp qemu/qemu-ga.c qemu-c47f3223658119219bbe0b8d09da733d1c06e76f/qemu-ga.c
--- qemu/qemu-ga.c	2012-01-05 01:06:25.000000000 +0900
+++ qemu-c47f3223658119219bbe0b8d09da733d1c06e76f/qemu-ga.c	2012-01-06 07:07:03.807872085 +0900
@@ -49,6 +49,13 @@  struct GAState {
 };
 
 static struct GAState *ga_state;
+const char *pidfile = QGA_PIDFILE_DEFAULT;
+
+static void cleanup(void)
+{
+    g_free(ga_state);
+    unlink(pidfile);
+}
 
 static void quit_handler(int sig)
 {
@@ -70,6 +77,7 @@  static void register_signal_handlers(voi
     ret = sigaction(SIGINT, &sigact, NULL);
     if (ret == -1) {
         g_error("error configuring signal handler: %s", strerror(errno));
+        cleanup();
         exit(EXIT_FAILURE);
     }
     ret = sigaction(SIGTERM, &sigact, NULL);
@@ -485,6 +493,7 @@  static void init_guest_agent(GAState *s)
     if (s->path == NULL) {
         if (strcmp(s->method, "virtio-serial") != 0) {
             g_critical("must specify a path for this channel");
+            cleanup();
             exit(EXIT_FAILURE);
         }
         /* try the default path for the virtio-serial port */
@@ -496,17 +505,20 @@  static void init_guest_agent(GAState *s)
         fd = qemu_open(s->path, O_RDWR | O_NONBLOCK | O_ASYNC);
         if (fd == -1) {
             g_critical("error opening channel: %s", strerror(errno));
+            cleanup();
             exit(EXIT_FAILURE);
         }
         ret = conn_channel_add(s, fd);
         if (ret) {
             g_critical("error adding channel to main loop");
+            cleanup();
             exit(EXIT_FAILURE);
         }
     } else if (strcmp(s->method, "isa-serial") == 0) {
         fd = qemu_open(s->path, O_RDWR | O_NOCTTY);
         if (fd == -1) {
             g_critical("error opening channel: %s", strerror(errno));
+            cleanup();
             exit(EXIT_FAILURE);
         }
         tcgetattr(fd, &tio);
@@ -533,15 +545,18 @@  static void init_guest_agent(GAState *s)
         fd = unix_listen(s->path, NULL, strlen(s->path));
         if (fd == -1) {
             g_critical("error opening path: %s", strerror(errno));
+            cleanup();
             exit(EXIT_FAILURE);
         }
         ret = listen_channel_add(s, fd, true);
         if (ret) {
             g_critical("error binding/listening to specified socket");
+            cleanup();
             exit(EXIT_FAILURE);
         }
     } else {
         g_critical("unsupported channel method/type: %s", s->method);
+        cleanup();
         exit(EXIT_FAILURE);
     }
 
@@ -552,7 +567,7 @@  static void init_guest_agent(GAState *s)
 int main(int argc, char **argv)
 {
     const char *sopt = "hVvdm:p:l:f:b:";
-    const char *method = NULL, *path = NULL, *pidfile = QGA_PIDFILE_DEFAULT;
+    const char *method = NULL, *path = NULL;
     const struct option lopt[] = {
         { "help", 0, NULL, 'h' },
         { "version", 0, NULL, 'V' },
@@ -662,7 +677,7 @@  int main(int argc, char **argv)
     g_main_loop_run(ga_state->main_loop);
 
     ga_command_state_cleanup_all(ga_state->command_state);
-    unlink(pidfile);
+    cleanup();
 
     return 0;
 }