diff mbox

[v3] async: aio_context_new(): Handle event_notifier_init failure

Message ID 1410890642-6704-2-git-send-email-cnanakos@grnet.gr
State New
Headers show

Commit Message

Chrysostomos Nanakos Sept. 16, 2014, 6:04 p.m. UTC
If event_notifier_init fails QEMU exits without printing
any error information to the user. This commit adds an error
message on failure:

 # qemu [...]
 qemu: Failed to initialize event notifier: Too many open files in system

Signed-off-by: Chrysostomos Nanakos <cnanakos@grnet.gr>
---
 async.c                  |   16 +++++++++++-----
 include/block/aio.h      |    2 +-
 include/qemu/main-loop.h |    2 +-
 iothread.c               |   11 ++++++++++-
 main-loop.c              |    9 +++++++--
 qemu-img.c               |    8 +++++++-
 qemu-io.c                |    7 ++++++-
 qemu-nbd.c               |    6 +++++-
 tests/test-aio.c         |   10 +++++++++-
 tests/test-thread-pool.c |   10 +++++++++-
 tests/test-throttle.c    |   10 +++++++++-
 vl.c                     |    5 +++--
 12 files changed, 78 insertions(+), 18 deletions(-)

Comments

Eric Blake Sept. 16, 2014, 7:40 p.m. UTC | #1
On 09/16/2014 12:04 PM, Chrysostomos Nanakos wrote:
> If event_notifier_init fails QEMU exits without printing
> any error information to the user. This commit adds an error
> message on failure:
> 
>  # qemu [...]

Showing the actual command line you used would be helpful.

>  qemu: Failed to initialize event notifier: Too many open files in system
> 
> Signed-off-by: Chrysostomos Nanakos <cnanakos@grnet.gr>
> ---
>  async.c                  |   16 +++++++++++-----
>  include/block/aio.h      |    2 +-
>  include/qemu/main-loop.h |    2 +-
>  iothread.c               |   11 ++++++++++-
>  main-loop.c              |    9 +++++++--
>  qemu-img.c               |    8 +++++++-
>  qemu-io.c                |    7 ++++++-
>  qemu-nbd.c               |    6 +++++-
>  tests/test-aio.c         |   10 +++++++++-
>  tests/test-thread-pool.c |   10 +++++++++-
>  tests/test-throttle.c    |   10 +++++++++-
>  vl.c                     |    5 +++--
>  12 files changed, 78 insertions(+), 18 deletions(-)
> 

> -AioContext *aio_context_new(void)
> +AioContext *aio_context_new(Error **errp)
>  {
> +    int ret;
>      AioContext *ctx;
>      ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
> +    ret = event_notifier_init(&ctx->notifier, false);
> +    if (ret < 0) {
> +        g_source_destroy(&ctx->source);

Does g_source_destroy() guarantee that errno is unmolested? If not,

> +        error_setg_errno(errp, -ret, "Failed to initialize event notifier");

then this logs the wrong error. Swap the lines to be safe.

> +        return NULL;
> +    }
> +    aio_set_event_notifier(ctx, &ctx->notifier,
> +                           (EventNotifierHandler *)
> +                           event_notifier_test_and_clear);
>      ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
>      ctx->thread_pool = NULL;
>      qemu_mutex_init(&ctx->bh_lock);
>      rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
> -    event_notifier_init(&ctx->notifier, false);

Is hoisting the event notifier init to occur before the mutex init going
to cause any grief?

> +++ b/main-loop.c

> @@ -138,8 +139,12 @@ int qemu_init_main_loop(void)
>          return ret;
>      }
>  
> +    qemu_aio_context = aio_context_new(&local_error);
> +    if (!qemu_aio_context) {
> +        error_propagate(errp, local_error);
> +        return -1;
> +    }

Can the earlier call to qemu_signal_init() consume any resources that
are now leaked?

> @@ -205,10 +206,17 @@ static void test_cancel(void)
>  int main(int argc, char **argv)
>  {
>      int ret;
> +    Error *local_error;
>  
>      init_clocks();
>  
> -    ctx = aio_context_new();
> +    ctx = aio_context_new(&local_error);
> +    if (!ctx) {
> +        error_report("Failed to create AIO Context: \'%s\'",

Use of \' inside "" is unusual. (Multiple times in your patch)
Benoît Canet Sept. 16, 2014, 7:48 p.m. UTC | #2
The Tuesday 16 Sep 2014 à 13:40:24 (-0600), Eric Blake wrote :
> On 09/16/2014 12:04 PM, Chrysostomos Nanakos wrote:
> > If event_notifier_init fails QEMU exits without printing
> > any error information to the user. This commit adds an error
> > message on failure:
> > 
> >  # qemu [...]
> 
> Showing the actual command line you used would be helpful.
> 
> >  qemu: Failed to initialize event notifier: Too many open files in system
> > 
> > Signed-off-by: Chrysostomos Nanakos <cnanakos@grnet.gr>
> > ---
> >  async.c                  |   16 +++++++++++-----
> >  include/block/aio.h      |    2 +-
> >  include/qemu/main-loop.h |    2 +-
> >  iothread.c               |   11 ++++++++++-
> >  main-loop.c              |    9 +++++++--
> >  qemu-img.c               |    8 +++++++-
> >  qemu-io.c                |    7 ++++++-
> >  qemu-nbd.c               |    6 +++++-
> >  tests/test-aio.c         |   10 +++++++++-
> >  tests/test-thread-pool.c |   10 +++++++++-
> >  tests/test-throttle.c    |   10 +++++++++-
> >  vl.c                     |    5 +++--
> >  12 files changed, 78 insertions(+), 18 deletions(-)
> > 
> 
> > -AioContext *aio_context_new(void)
> > +AioContext *aio_context_new(Error **errp)
> >  {
> > +    int ret;
> >      AioContext *ctx;
> >      ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
> > +    ret = event_notifier_init(&ctx->notifier, false);
> > +    if (ret < 0) {
> > +        g_source_destroy(&ctx->source);
> 
> Does g_source_destroy() guarantee that errno is unmolested? If not,
> 
> > +        error_setg_errno(errp, -ret, "Failed to initialize event notifier");

Actually -ret is passed to error_setg_errno.

See.

void error_set_errno(Error **errp, int os_errno, ErrorClass err_class,          
                     const char *fmt, ...) 
and
    if (os_errno != 0) {                                                        
            err->msg = g_strdup_printf("%s: %s", msg1, strerror(os_errno));  

> Use of \' inside "" is unusual. (Multiple times in your patch)

My fault.
Chrysostomos Nanakos Sept. 17, 2014, 9:16 a.m. UTC | #3
On 09/16/2014 10:40 PM, Eric Blake wrote:
> On 09/16/2014 12:04 PM, Chrysostomos Nanakos wrote:
>> If event_notifier_init fails QEMU exits without printing
>> any error information to the user. This commit adds an error
>> message on failure:
>>
>>   # qemu [...]
> Showing the actual command line you used would be helpful.

The problem raised after having a system with a low limit of open files 
and QEMU with 8 iothread objects. Do you believe that we should add such 
a command line in the commit description? The problem can be easily 
reproduced with any combination of low limit of open files and iothread 
objects or even a low limit of open files without the creation of iothreads.

>
>>   qemu: Failed to initialize event notifier: Too many open files in system
>>
>> Signed-off-by: Chrysostomos Nanakos <cnanakos@grnet.gr>
>> ---
>>   async.c                  |   16 +++++++++++-----
>>   include/block/aio.h      |    2 +-
>>   include/qemu/main-loop.h |    2 +-
>>   iothread.c               |   11 ++++++++++-
>>   main-loop.c              |    9 +++++++--
>>   qemu-img.c               |    8 +++++++-
>>   qemu-io.c                |    7 ++++++-
>>   qemu-nbd.c               |    6 +++++-
>>   tests/test-aio.c         |   10 +++++++++-
>>   tests/test-thread-pool.c |   10 +++++++++-
>>   tests/test-throttle.c    |   10 +++++++++-
>>   vl.c                     |    5 +++--
>>   12 files changed, 78 insertions(+), 18 deletions(-)
>>
>> -AioContext *aio_context_new(void)
>> +AioContext *aio_context_new(Error **errp)
>>   {
>> +    int ret;
>>       AioContext *ctx;
>>       ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
>> +    ret = event_notifier_init(&ctx->notifier, false);
>> +    if (ret < 0) {
>> +        g_source_destroy(&ctx->source);
> Does g_source_destroy() guarantee that errno is unmolested? If not,
>
>> +        error_setg_errno(errp, -ret, "Failed to initialize event notifier");
> then this logs the wrong error. Swap the lines to be safe.

Good catch! I believe that Benoit has covered this point.

>
>> +        return NULL;
>> +    }
>> +    aio_set_event_notifier(ctx, &ctx->notifier,
>> +                           (EventNotifierHandler *)
>> +                           event_notifier_test_and_clear);
>>       ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
>>       ctx->thread_pool = NULL;
>>       qemu_mutex_init(&ctx->bh_lock);
>>       rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
>> -    event_notifier_init(&ctx->notifier, false);
> Is hoisting the event notifier init to occur before the mutex init going
> to cause any grief?

No I can't find any problem, none of the functions use the mutex on 
initialization. The mutex is being used for adding/removing BH's to/from 
the BH list. So it's safe at the moment to init mutex after the 
initialization of the event notifier.

>
>> +++ b/main-loop.c
>> @@ -138,8 +139,12 @@ int qemu_init_main_loop(void)
>>           return ret;
>>       }
>>   
>> +    qemu_aio_context = aio_context_new(&local_error);
>> +    if (!qemu_aio_context) {
>> +        error_propagate(errp, local_error);
>> +        return -1;
>> +    }
> Can the earlier call to qemu_signal_init() consume any resources that
> are now leaked?
  I can only see signal set manipulation and the setup of a signal 
handler. Nothing is leaked here.

>
>> @@ -205,10 +206,17 @@ static void test_cancel(void)
>>   int main(int argc, char **argv)
>>   {
>>       int ret;
>> +    Error *local_error;
>>   
>>       init_clocks();
>>   
>> -    ctx = aio_context_new();
>> +    ctx = aio_context_new(&local_error);
>> +    if (!ctx) {
>> +        error_report("Failed to create AIO Context: \'%s\'",
> Use of \' inside "" is unusual. (Multiple times in your patch)

I will fix that and if everyone agrees I will resend the patch.

Regards,
Chrysostomos.
Eric Blake Sept. 17, 2014, 1:01 p.m. UTC | #4
On 09/17/2014 03:16 AM, Chrysostomos Nanakos wrote:
> On 09/16/2014 10:40 PM, Eric Blake wrote:
>> On 09/16/2014 12:04 PM, Chrysostomos Nanakos wrote:
>>> If event_notifier_init fails QEMU exits without printing
>>> any error information to the user. This commit adds an error
>>> message on failure:
>>>
>>>   # qemu [...]
>> Showing the actual command line you used would be helpful.
> 
> The problem raised after having a system with a low limit of open files
> and QEMU with 8 iothread objects. Do you believe that we should add such
> a command line in the commit description? The problem can be easily
> reproduced with any combination of low limit of open files and iothread
> objects or even a low limit of open files without the creation of
> iothreads.

It is ALWAYS valuable to make your commit message give a good recipe for
how to reproduce the problem your commit is fixing.  It's not a
prerequisite (a patch can be accepted on inspection alone), but giving
full details of how to reproduce it makes life easier when revisiting
the patch a year from now or when writing a test case to ensure no
regressions.
Chrysostomos Nanakos Sept. 17, 2014, 1:18 p.m. UTC | #5
On 09/17/2014 04:01 PM, Eric Blake wrote:
> On 09/17/2014 03:16 AM, Chrysostomos Nanakos wrote:
>> On 09/16/2014 10:40 PM, Eric Blake wrote:
>>> On 09/16/2014 12:04 PM, Chrysostomos Nanakos wrote:
>>>> If event_notifier_init fails QEMU exits without printing
>>>> any error information to the user. This commit adds an error
>>>> message on failure:
>>>>
>>>>    # qemu [...]
>>> Showing the actual command line you used would be helpful.
>> The problem raised after having a system with a low limit of open files
>> and QEMU with 8 iothread objects. Do you believe that we should add such
>> a command line in the commit description? The problem can be easily
>> reproduced with any combination of low limit of open files and iothread
>> objects or even a low limit of open files without the creation of
>> iothreads.
> It is ALWAYS valuable to make your commit message give a good recipe for
> how to reproduce the problem your commit is fixing.  It's not a
> prerequisite (a patch can be accepted on inspection alone), but giving
> full details of how to reproduce it makes life easier when revisiting
> the patch a year from now or when writing a test case to ensure no
> regressions.
>

No problem Eric, thanks for the advice. I will make the appropriates 
changes to the commit log
and resend the patch.

Regards,
Chrysostomos.
diff mbox

Patch

diff --git a/async.c b/async.c
index a99e7f6..6e1b282 100644
--- a/async.c
+++ b/async.c
@@ -289,18 +289,24 @@  static void aio_rfifolock_cb(void *opaque)
     aio_notify(opaque);
 }
 
-AioContext *aio_context_new(void)
+AioContext *aio_context_new(Error **errp)
 {
+    int ret;
     AioContext *ctx;
     ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
+    ret = event_notifier_init(&ctx->notifier, false);
+    if (ret < 0) {
+        g_source_destroy(&ctx->source);
+        error_setg_errno(errp, -ret, "Failed to initialize event notifier");
+        return NULL;
+    }
+    aio_set_event_notifier(ctx, &ctx->notifier,
+                           (EventNotifierHandler *)
+                           event_notifier_test_and_clear);
     ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
     ctx->thread_pool = NULL;
     qemu_mutex_init(&ctx->bh_lock);
     rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
-    event_notifier_init(&ctx->notifier, false);
-    aio_set_event_notifier(ctx, &ctx->notifier, 
-                           (EventNotifierHandler *)
-                           event_notifier_test_and_clear);
     timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
 
     return ctx;
diff --git a/include/block/aio.h b/include/block/aio.h
index 4603c0f..f3d5517 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -99,7 +99,7 @@  void aio_set_dispatching(AioContext *ctx, bool dispatching);
  * They also provide bottom halves, a service to execute a piece of code
  * as soon as possible.
  */
-AioContext *aio_context_new(void);
+AioContext *aio_context_new(Error **errp);
 
 /**
  * aio_context_ref:
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 6f0200a..62c68c0 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -42,7 +42,7 @@ 
  *
  * In the case of QEMU tools, this will also start/initialize timers.
  */
-int qemu_init_main_loop(void);
+int qemu_init_main_loop(Error **errp);
 
 /**
  * main_loop_wait: Run one iteration of the main loop.
diff --git a/iothread.c b/iothread.c
index d9403cf..6e394a0 100644
--- a/iothread.c
+++ b/iothread.c
@@ -17,6 +17,7 @@ 
 #include "block/aio.h"
 #include "sysemu/iothread.h"
 #include "qmp-commands.h"
+#include "qemu/error-report.h"
 
 #define IOTHREADS_PATH "/objects"
 
@@ -53,6 +54,9 @@  static void iothread_instance_finalize(Object *obj)
 {
     IOThread *iothread = IOTHREAD(obj);
 
+    if (!iothread->ctx) {
+        return;
+    }
     iothread->stopping = true;
     aio_notify(iothread->ctx);
     qemu_thread_join(&iothread->thread);
@@ -63,10 +67,15 @@  static void iothread_instance_finalize(Object *obj)
 
 static void iothread_complete(UserCreatable *obj, Error **errp)
 {
+    Error *local_error = NULL;
     IOThread *iothread = IOTHREAD(obj);
 
     iothread->stopping = false;
-    iothread->ctx = aio_context_new();
+    iothread->ctx = aio_context_new(&local_error);
+    if (!iothread->ctx) {
+        error_propagate(errp, local_error);
+        return;
+    }
     iothread->thread_id = -1;
 
     qemu_mutex_init(&iothread->init_done_lock);
diff --git a/main-loop.c b/main-loop.c
index 3cc79f8..2f83d80 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -126,10 +126,11 @@  void qemu_notify_event(void)
 
 static GArray *gpollfds;
 
-int qemu_init_main_loop(void)
+int qemu_init_main_loop(Error **errp)
 {
     int ret;
     GSource *src;
+    Error *local_error = NULL;
 
     init_clocks();
 
@@ -138,8 +139,12 @@  int qemu_init_main_loop(void)
         return ret;
     }
 
+    qemu_aio_context = aio_context_new(&local_error);
+    if (!qemu_aio_context) {
+        error_propagate(errp, local_error);
+        return -1;
+    }
     gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
-    qemu_aio_context = aio_context_new();
     src = aio_get_g_source(qemu_aio_context);
     g_source_attach(src, NULL);
     g_source_unref(src);
diff --git a/qemu-img.c b/qemu-img.c
index 91d1ac3..dbf0904 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2879,6 +2879,7 @@  int main(int argc, char **argv)
 {
     const img_cmd_t *cmd;
     const char *cmdname;
+    Error *local_error = NULL;
     int c;
     static const struct option long_options[] = {
         {"help", no_argument, 0, 'h'},
@@ -2893,7 +2894,12 @@  int main(int argc, char **argv)
     error_set_progname(argv[0]);
     qemu_init_exec_dir(argv[0]);
 
-    qemu_init_main_loop();
+    if (qemu_init_main_loop(&local_error)) {
+        error_report("%s", error_get_pretty(local_error));
+        error_free(local_error);
+        exit(EXIT_FAILURE);
+    }
+
     bdrv_init();
     if (argc < 2) {
         error_exit("Not enough arguments");
diff --git a/qemu-io.c b/qemu-io.c
index d2ab694..66cf3ef 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -379,6 +379,7 @@  int main(int argc, char **argv)
     int c;
     int opt_index = 0;
     int flags = BDRV_O_UNMAP;
+    Error *local_error = NULL;
 
 #ifdef CONFIG_POSIX
     signal(SIGPIPE, SIG_IGN);
@@ -444,7 +445,11 @@  int main(int argc, char **argv)
         exit(1);
     }
 
-    qemu_init_main_loop();
+    if (qemu_init_main_loop(&local_error)) {
+        error_report("%s", error_get_pretty(local_error));
+        error_free(local_error);
+        exit(1);
+    }
     bdrv_init();
 
     /* initialize commands */
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 9bc152e..de9963f 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -674,7 +674,11 @@  int main(int argc, char **argv)
         snprintf(sockpath, 128, SOCKET_PATH, basename(device));
     }
 
-    qemu_init_main_loop();
+    if (qemu_init_main_loop(&local_err)) {
+        error_report("%s", error_get_pretty(local_err));
+        error_free(local_err);
+        exit(EXIT_FAILURE);
+    }
     bdrv_init();
     atexit(bdrv_close_all);
 
diff --git a/tests/test-aio.c b/tests/test-aio.c
index c6a8713..e2b8968 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -14,6 +14,7 @@ 
 #include "block/aio.h"
 #include "qemu/timer.h"
 #include "qemu/sockets.h"
+#include "qemu/error-report.h"
 
 static AioContext *ctx;
 
@@ -810,11 +811,18 @@  static void test_source_timer_schedule(void)
 
 int main(int argc, char **argv)
 {
+    Error *local_error;
     GSource *src;
 
     init_clocks();
 
-    ctx = aio_context_new();
+    ctx = aio_context_new(&local_error);
+    if (!ctx) {
+        error_report("Failed to create AIO Context: \'%s\'",
+                     error_get_pretty(local_error));
+        error_free(local_error);
+        exit(1);
+    }
     src = aio_get_g_source(ctx);
     g_source_attach(src, NULL);
     g_source_unref(src);
diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
index f40b7fc..a066dfc 100644
--- a/tests/test-thread-pool.c
+++ b/tests/test-thread-pool.c
@@ -4,6 +4,7 @@ 
 #include "block/thread-pool.h"
 #include "block/block.h"
 #include "qemu/timer.h"
+#include "qemu/error-report.h"
 
 static AioContext *ctx;
 static ThreadPool *pool;
@@ -205,10 +206,17 @@  static void test_cancel(void)
 int main(int argc, char **argv)
 {
     int ret;
+    Error *local_error;
 
     init_clocks();
 
-    ctx = aio_context_new();
+    ctx = aio_context_new(&local_error);
+    if (!ctx) {
+        error_report("Failed to create AIO Context: \'%s\'",
+                     error_get_pretty(local_error));
+        error_free(local_error);
+        exit(1);
+    }
     pool = aio_get_thread_pool(ctx);
 
     g_test_init(&argc, &argv, NULL);
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 000ae31..01b765e 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -14,6 +14,7 @@ 
 #include <math.h>
 #include "block/aio.h"
 #include "qemu/throttle.h"
+#include "qemu/error-report.h"
 
 static AioContext     *ctx;
 static LeakyBucket    bkt;
@@ -492,10 +493,17 @@  static void test_accounting(void)
 int main(int argc, char **argv)
 {
     GSource *src;
+    Error *local_error;
 
     init_clocks();
 
-    ctx = aio_context_new();
+    ctx = aio_context_new(&local_error);
+    if (!ctx) {
+        error_report("Failed to create AIO Context: \'%s\'",
+                     error_get_pretty(local_error));
+        error_free(local_error);
+        exit(1);
+    }
     src = aio_get_g_source(ctx);
     g_source_attach(src, NULL);
     g_source_unref(src);
diff --git a/vl.c b/vl.c
index 5db0d08..4711d00 100644
--- a/vl.c
+++ b/vl.c
@@ -2968,6 +2968,7 @@  int main(int argc, char **argv, char **envp)
     ram_addr_t maxram_size = default_ram_size;
     uint64_t ram_slots = 0;
     FILE *vmstate_dump_file = NULL;
+    Error *main_loop_err = NULL;
 
     atexit(qemu_run_exit_notifiers);
     error_set_progname(argv[0]);
@@ -3998,8 +3999,8 @@  int main(int argc, char **argv, char **envp)
 
     os_daemonize();
 
-    if (qemu_init_main_loop()) {
-        fprintf(stderr, "qemu_init_main_loop failed\n");
+    if (qemu_init_main_loop(&main_loop_err)) {
+        error_report("%s", error_get_pretty(main_loop_err));
         exit(1);
     }