Message ID | 20190201052551.53359-1-lifei1214@126.com |
---|---|
State | New |
Headers | show |
Series | qemu_thread_create: propagate the error to callers to handle | expand |
It seems that this poor patch is left alone. :( I sent all, but this patch failed to join them, so sorry for that.. Could we just let it be? Have a nice day, thanks Fei 在 2019/2/1 下午1:25, Fei Li 写道: > From: Fei Li <fli@suse.com> > > Supplement the error handling for touch_all_pages: add an Error > parameter for it to propagate the error to its caller to do the > handling in case it fails. > > Cc: Markus Armbruster <armbru@redhat.com> > Signed-off-by: Fei Li <fli@suse.com> > --- > util/oslib-posix.c | 26 ++++++++++++++++---------- > 1 file changed, 16 insertions(+), 10 deletions(-) > > diff --git a/util/oslib-posix.c b/util/oslib-posix.c > index b6c2ee270d..b4dd3d8970 100644 > --- a/util/oslib-posix.c > +++ b/util/oslib-posix.c > @@ -435,7 +435,7 @@ static inline int get_memset_num_threads(int smp_cpus) > } > > static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, > - int smp_cpus) > + int smp_cpus, Error **errp) > { > size_t numpages_per_thread; > size_t size_per_thread; > @@ -452,20 +452,29 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, > memset_thread[i].numpages = (i == (memset_num_threads - 1)) ? > numpages : numpages_per_thread; > memset_thread[i].hpagesize = hpagesize; > - /* TODO: let the callers handle the error instead of abort() here */ > - qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", > - do_touch_pages, &memset_thread[i], > - QEMU_THREAD_JOINABLE, &error_abort); > + if (qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", > + do_touch_pages, &memset_thread[i], > + QEMU_THREAD_JOINABLE, errp) < 0) { > + memset_thread_failed = true; > + break; > + } > addr += size_per_thread; > numpages -= numpages_per_thread; > } > + > + memset_num_threads = i; > for (i = 0; i < memset_num_threads; i++) { > qemu_thread_join(&memset_thread[i].pgthread); > } > g_free(memset_thread); > memset_thread = NULL; > > - return memset_thread_failed; > + if (memset_thread_failed) { > + error_append_hint(errp, "os_mem_prealloc: Insufficient free host " > + "memory pages available to allocate guest RAM"); > + return false; > + } > + return true; > } > > void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, > @@ -488,10 +497,7 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, > } > > /* touch pages simultaneously */ > - if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) { > - error_setg(errp, "os_mem_prealloc: Insufficient free host memory " > - "pages available to allocate guest RAM"); > - } > + touch_all_pages(area, hpagesize, numpages, smp_cpus, errp); > > ret = sigaction(SIGBUS, &oldact, NULL); > if (ret) {
Fei Li <shirley17fei@gmail.com> writes: > It seems that this poor patch is left alone. :( > I sent all, but this patch failed to join them, so sorry for that.. > Could we just let it be? It'll do, thanks.
Cc: Paolo for preexisting issues due to commit 1e356fc14be. Fei Li <lifei1214@126.com> writes: > From: Fei Li <fli@suse.com> > > Supplement the error handling for touch_all_pages: add an Error > parameter for it to propagate the error to its caller to do the > handling in case it fails. > > Cc: Markus Armbruster <armbru@redhat.com> > Signed-off-by: Fei Li <fli@suse.com> > --- > util/oslib-posix.c | 26 ++++++++++++++++---------- > 1 file changed, 16 insertions(+), 10 deletions(-) > > diff --git a/util/oslib-posix.c b/util/oslib-posix.c > index b6c2ee270d..b4dd3d8970 100644 > --- a/util/oslib-posix.c > +++ b/util/oslib-posix.c Some context we'll need below: static void sigbus_handler(int signal) { int i; if (memset_thread) { for (i = 0; i < memset_num_threads; i++) { if (qemu_thread_is_self(&memset_thread[i].pgthread)) { siglongjmp(memset_thread[i].env, 1); } } } } Preexisting, not this patch's business: touch_all_pages() sets @memset_num_threads to the number of desired threads, then leisurely populates memset_thread[], starting threads as it goes. If one of these threads catches SIGBUS before memset_thread[] is completely populated, memset_thread[i].pgthread can be null here. qemu_thread_is_self() dereferences it. Oops. Note there's a time window between qemu_thread_create() starting the thread and storing it in memset_thread[i].pgthread. Any fix will have to work even when the thread catches SIGBUS within that time window. static void *do_touch_pages(void *arg) { MemsetThread *memset_args = (MemsetThread *)arg; sigset_t set, oldset; /* unblock SIGBUS */ sigemptyset(&set); sigaddset(&set, SIGBUS); pthread_sigmask(SIG_UNBLOCK, &set, &oldset); if (sigsetjmp(memset_args->env, 1)) { memset_thread_failed = true; } else { [...] } pthread_sigmask(SIG_SETMASK, &oldset, NULL); return NULL; } We set @memset_thread_failed on SIGBUS in a memset thread. > @@ -435,7 +435,7 @@ static inline int get_memset_num_threads(int smp_cpus) > } > > static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, > - int smp_cpus) > + int smp_cpus, Error **errp) > { > size_t numpages_per_thread; > size_t size_per_thread; char *addr = area; int i = 0; memset_thread_failed = false; We clear @memset_thread_failed before we create the memset threads. Preexisting, not this patch's business: use of global state without synchronization leaves touch_all_pages() and thus os_mem_prealloc() not thread-safe. Even when that's just fine, it warrants at least a big fat comment, and ideally assertions to catch misuse. memset_num_threads = get_memset_num_threads(smp_cpus); memset_thread = g_new0(MemsetThread, memset_num_threads); numpages_per_thread = (numpages / memset_num_threads); size_per_thread = (hpagesize * numpages_per_thread); for (i = 0; i < memset_num_threads; i++) { memset_thread[i].addr = addr; > @@ -452,20 +452,29 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, > memset_thread[i].numpages = (i == (memset_num_threads - 1)) ? > numpages : numpages_per_thread; > memset_thread[i].hpagesize = hpagesize; > - /* TODO: let the callers handle the error instead of abort() here */ > - qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", > - do_touch_pages, &memset_thread[i], > - QEMU_THREAD_JOINABLE, &error_abort); > + if (qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", > + do_touch_pages, &memset_thread[i], > + QEMU_THREAD_JOINABLE, errp) < 0) { > + memset_thread_failed = true; We set @memset_thread_failed when creating a memset thread fails. @errp contains an error then. > + break; > + } > addr += size_per_thread; > numpages -= numpages_per_thread; > } > + > + memset_num_threads = i; > for (i = 0; i < memset_num_threads; i++) { > qemu_thread_join(&memset_thread[i].pgthread); > } > g_free(memset_thread); > memset_thread = NULL; > > - return memset_thread_failed; > + if (memset_thread_failed) { > + error_append_hint(errp, "os_mem_prealloc: Insufficient free host " > + "memory pages available to allocate guest RAM"); > + return false; @memset_thread_failed is true when creating a memset thread failed, or a memset thread caught SIGBUS. @errp contains an error only if the former is the case. If we succeeded in creating all threads, but got SIGBUS, @errp is null, and error_append_hint() does nothing. touch_all_pages() then fails without setting an error. Suggested fix: drop the memset_thread_failed = true on thread creation failure, and do something like for (j = 0; j < i; j++) { qemu_thread_join(&memset_thread[i].pgthread); } g_free(memset_thread); memset_thread = NULL; if (i < memset_num_threads) { /* qemu_thread_create() has set @errp */ return false; } if (memset_thread_failed) { error_setg(errp, "os_mem_prealloc: Insufficient free host " "memory pages available to allocate guest RAM"); return false; > + } > + return true; > } One more remark: when creating the N-th thread fails, we patiently wait for thread 0..N-1 to complete their job. I guess that's tolerable. If we want it to fail quickly, we'd need a way to tell the threads to abandon the job. Perhaps sending a SIGBUS would do. > > void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, > @@ -488,10 +497,7 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, > } > > /* touch pages simultaneously */ > - if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) { > - error_setg(errp, "os_mem_prealloc: Insufficient free host memory " > - "pages available to allocate guest RAM"); > - } > + touch_all_pages(area, hpagesize, numpages, smp_cpus, errp); > > ret = sigaction(SIGBUS, &oldact, NULL); > if (ret) {
在 2019/2/1 下午11:03, Markus Armbruster 写道: > Cc: Paolo for preexisting issues due to commit 1e356fc14be. > > Fei Li <lifei1214@126.com> writes: > >> From: Fei Li <fli@suse.com> >> >> Supplement the error handling for touch_all_pages: add an Error >> parameter for it to propagate the error to its caller to do the >> handling in case it fails. >> >> Cc: Markus Armbruster <armbru@redhat.com> >> Signed-off-by: Fei Li <fli@suse.com> >> --- >> util/oslib-posix.c | 26 ++++++++++++++++---------- >> 1 file changed, 16 insertions(+), 10 deletions(-) >> >> diff --git a/util/oslib-posix.c b/util/oslib-posix.c >> index b6c2ee270d..b4dd3d8970 100644 >> --- a/util/oslib-posix.c >> +++ b/util/oslib-posix.c > Some context we'll need below: > > static void sigbus_handler(int signal) > { > int i; > if (memset_thread) { > for (i = 0; i < memset_num_threads; i++) { > if (qemu_thread_is_self(&memset_thread[i].pgthread)) { > siglongjmp(memset_thread[i].env, 1); > } > } > } > } > > Preexisting, not this patch's business: touch_all_pages() sets > @memset_num_threads to the number of desired threads, then leisurely > populates memset_thread[], starting threads as it goes. If one of these > threads catches SIGBUS before memset_thread[] is completely populated, > memset_thread[i].pgthread can be null here. qemu_thread_is_self() > dereferences it. Oops. Ok, > > Note there's a time window between qemu_thread_create() starting the > thread and storing it in memset_thread[i].pgthread. Any fix will have > to work even when the thread catches SIGBUS within that time window. > > static void *do_touch_pages(void *arg) > { > MemsetThread *memset_args = (MemsetThread *)arg; > sigset_t set, oldset; > > /* unblock SIGBUS */ > sigemptyset(&set); > sigaddset(&set, SIGBUS); > pthread_sigmask(SIG_UNBLOCK, &set, &oldset); > > if (sigsetjmp(memset_args->env, 1)) { > memset_thread_failed = true; > } else { > [...] > } > pthread_sigmask(SIG_SETMASK, &oldset, NULL); > return NULL; > } > > We set @memset_thread_failed on SIGBUS in a memset thread. Ok, thanks for the detail explanation. > >> @@ -435,7 +435,7 @@ static inline int get_memset_num_threads(int smp_cpus) >> } >> >> static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, >> - int smp_cpus) >> + int smp_cpus, Error **errp) >> { >> size_t numpages_per_thread; >> size_t size_per_thread; > char *addr = area; > int i = 0; > > memset_thread_failed = false; > > We clear @memset_thread_failed before we create the memset threads. > > Preexisting, not this patch's business: use of global state without > synchronization leaves touch_all_pages() and thus os_mem_prealloc() not > thread-safe. Even when that's just fine, it warrants at least a big fat > comment, and ideally assertions to catch misuse. > > memset_num_threads = get_memset_num_threads(smp_cpus); > memset_thread = g_new0(MemsetThread, memset_num_threads); > numpages_per_thread = (numpages / memset_num_threads); > size_per_thread = (hpagesize * numpages_per_thread); > for (i = 0; i < memset_num_threads; i++) { > memset_thread[i].addr = addr; >> @@ -452,20 +452,29 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, >> memset_thread[i].numpages = (i == (memset_num_threads - 1)) ? >> numpages : numpages_per_thread; >> memset_thread[i].hpagesize = hpagesize; >> - /* TODO: let the callers handle the error instead of abort() here */ >> - qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", >> - do_touch_pages, &memset_thread[i], >> - QEMU_THREAD_JOINABLE, &error_abort); >> + if (qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", >> + do_touch_pages, &memset_thread[i], >> + QEMU_THREAD_JOINABLE, errp) < 0) { >> + memset_thread_failed = true; > We set @memset_thread_failed when creating a memset thread fails. @errp > contains an error then. > >> + break; >> + } >> addr += size_per_thread; >> numpages -= numpages_per_thread; >> } >> + >> + memset_num_threads = i; >> for (i = 0; i < memset_num_threads; i++) { >> qemu_thread_join(&memset_thread[i].pgthread); >> } >> g_free(memset_thread); >> memset_thread = NULL; >> >> - return memset_thread_failed; >> + if (memset_thread_failed) { >> + error_append_hint(errp, "os_mem_prealloc: Insufficient free host " >> + "memory pages available to allocate guest RAM"); >> + return false; > @memset_thread_failed is true when creating a memset thread failed, or a > memset thread caught SIGBUS. > > @errp contains an error only if the former is the case. > > If we succeeded in creating all threads, but got SIGBUS, @errp is null, > and error_append_hint() does nothing. touch_all_pages() then fails > without setting an error. > > Suggested fix: drop the memset_thread_failed = true on thread creation > failure, and do something like > > for (j = 0; j < i; j++) { > qemu_thread_join(&memset_thread[i].pgthread); [j] > } > g_free(memset_thread); > memset_thread = NULL; > > if (i < memset_num_threads) { > /* qemu_thread_create() has set @errp */ > return false; > } > > if (memset_thread_failed) { > error_setg(errp, "os_mem_prealloc: Insufficient free host " > "memory pages available to allocate guest RAM"); > return false; Agree. >> + } >> + return true; >> } > One more remark: when creating the N-th thread fails, we patiently wait > for thread 0..N-1 to complete their job. I guess that's tolerable. If > we want it to fail quickly, we'd need a way to tell the threads to > abandon the job. Perhaps sending a SIGBUS would do. Have a nice day, thanks Fei >> >> void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, >> @@ -488,10 +497,7 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, >> } >> >> /* touch pages simultaneously */ >> - if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) { >> - error_setg(errp, "os_mem_prealloc: Insufficient free host memory " >> - "pages available to allocate guest RAM"); >> - } >> + touch_all_pages(area, hpagesize, numpages, smp_cpus, errp); >> >> ret = sigaction(SIGBUS, &oldact, NULL); >> if (ret) {
diff --git a/util/oslib-posix.c b/util/oslib-posix.c index b6c2ee270d..b4dd3d8970 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -435,7 +435,7 @@ static inline int get_memset_num_threads(int smp_cpus) } static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, - int smp_cpus) + int smp_cpus, Error **errp) { size_t numpages_per_thread; size_t size_per_thread; @@ -452,20 +452,29 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, memset_thread[i].numpages = (i == (memset_num_threads - 1)) ? numpages : numpages_per_thread; memset_thread[i].hpagesize = hpagesize; - /* TODO: let the callers handle the error instead of abort() here */ - qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", - do_touch_pages, &memset_thread[i], - QEMU_THREAD_JOINABLE, &error_abort); + if (qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", + do_touch_pages, &memset_thread[i], + QEMU_THREAD_JOINABLE, errp) < 0) { + memset_thread_failed = true; + break; + } addr += size_per_thread; numpages -= numpages_per_thread; } + + memset_num_threads = i; for (i = 0; i < memset_num_threads; i++) { qemu_thread_join(&memset_thread[i].pgthread); } g_free(memset_thread); memset_thread = NULL; - return memset_thread_failed; + if (memset_thread_failed) { + error_append_hint(errp, "os_mem_prealloc: Insufficient free host " + "memory pages available to allocate guest RAM"); + return false; + } + return true; } void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, @@ -488,10 +497,7 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, } /* touch pages simultaneously */ - if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) { - error_setg(errp, "os_mem_prealloc: Insufficient free host memory " - "pages available to allocate guest RAM"); - } + touch_all_pages(area, hpagesize, numpages, smp_cpus, errp); ret = sigaction(SIGBUS, &oldact, NULL); if (ret) {