Message ID | 1392994280-9675-2-git-send-email-stefanha@redhat.com |
---|---|
State | New |
Headers | show |
Il 21/02/2014 15:51, Stefan Hajnoczi ha scritto: > Keep the thread ID around so we can report it via QMP. > > There's only one problem: qemu_get_thread_id() (gettid() wrapper on > Linux) must be called from the thread itself. There is no way to get > the thread ID outside the thread. > > This patch uses a condvar to wait for iothread_run() to populate the > thread_id inside the thread. > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > --- > iothread.c | 34 ++++++++++++++++++++++++++++++++-- > 1 file changed, 32 insertions(+), 2 deletions(-) > > diff --git a/iothread.c b/iothread.c > index 033de7f..1a0d1ec 100644 > --- a/iothread.c > +++ b/iothread.c > @@ -26,8 +26,15 @@ struct IOThread { > QemuThread thread; > AioContext *ctx; > bool stopping; > + int thread_id; > }; > > +typedef struct { > + QemuMutex init_done_lock; > + QemuCond init_done_cond; /* is thread initialization done? */ > + IOThread *iothread; > +} ThreadInitInfo; > + > #define IOTHREAD_GET_CLASS(obj) \ > OBJECT_GET_CLASS(IOThreadClass, obj, TYPE_IOTHREAD) > #define IOTHREAD_CLASS(klass) \ > @@ -35,7 +42,15 @@ struct IOThread { > > static void *iothread_run(void *opaque) > { > - IOThread *iothread = opaque; > + ThreadInitInfo *init_info = opaque; > + IOThread *iothread = init_info->iothread; > + > + iothread->thread_id = qemu_get_thread_id(); > + > + /* Signal that initialization is done */ > + qemu_mutex_lock(&init_info->init_done_lock); > + qemu_cond_signal(&init_info->init_done_cond); > + qemu_mutex_unlock(&init_info->init_done_lock); > > while (!iothread->stopping) { > aio_context_acquire(iothread->ctx); > @@ -60,15 +75,30 @@ static void iothread_instance_finalize(Object *obj) > static void iothread_complete(UserCreatable *obj, Error **errp) > { > IOThread *iothread = IOTHREAD(obj); > + ThreadInitInfo init_info = { > + .iothread = iothread, > + }; > > iothread->stopping = false; > iothread->ctx = aio_context_new(); > > + qemu_mutex_init(&init_info.init_done_lock); > + qemu_cond_init(&init_info.init_done_cond); > + > /* This assumes we are called from a thread with useful CPU affinity for us > * to inherit. > */ > qemu_thread_create(&iothread->thread, iothread_run, > - iothread, QEMU_THREAD_JOINABLE); > + &init_info, QEMU_THREAD_JOINABLE); > + > + /* Wait for initialization to complete */ > + qemu_mutex_lock(&init_info.init_done_lock); > + qemu_cond_wait(&init_info.init_done_cond, > + &init_info.init_done_lock); > + qemu_mutex_unlock(&init_info.init_done_lock); > + > + qemu_cond_destroy(&init_info.init_done_cond); > + qemu_mutex_destroy(&init_info.init_done_lock); Destroying the mutex here is racy. You need to keep it until the iothread is destroyed. Paolo > } > > static void iothread_class_init(ObjectClass *klass, void *class_data) >
On Fri, Feb 21, 2014 at 04:18:30PM +0100, Paolo Bonzini wrote: > Il 21/02/2014 15:51, Stefan Hajnoczi ha scritto: > >Keep the thread ID around so we can report it via QMP. > > > >There's only one problem: qemu_get_thread_id() (gettid() wrapper on > >Linux) must be called from the thread itself. There is no way to get > >the thread ID outside the thread. > > > >This patch uses a condvar to wait for iothread_run() to populate the > >thread_id inside the thread. > > > >Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > >--- > > iothread.c | 34 ++++++++++++++++++++++++++++++++-- > > 1 file changed, 32 insertions(+), 2 deletions(-) > > > >diff --git a/iothread.c b/iothread.c > >index 033de7f..1a0d1ec 100644 > >--- a/iothread.c > >+++ b/iothread.c > >@@ -26,8 +26,15 @@ struct IOThread { > > QemuThread thread; > > AioContext *ctx; > > bool stopping; > >+ int thread_id; > > }; > > > >+typedef struct { > >+ QemuMutex init_done_lock; > >+ QemuCond init_done_cond; /* is thread initialization done? */ > >+ IOThread *iothread; > >+} ThreadInitInfo; > >+ > > #define IOTHREAD_GET_CLASS(obj) \ > > OBJECT_GET_CLASS(IOThreadClass, obj, TYPE_IOTHREAD) > > #define IOTHREAD_CLASS(klass) \ > >@@ -35,7 +42,15 @@ struct IOThread { > > > > static void *iothread_run(void *opaque) > > { > >- IOThread *iothread = opaque; > >+ ThreadInitInfo *init_info = opaque; > >+ IOThread *iothread = init_info->iothread; > >+ > >+ iothread->thread_id = qemu_get_thread_id(); > >+ > >+ /* Signal that initialization is done */ > >+ qemu_mutex_lock(&init_info->init_done_lock); > >+ qemu_cond_signal(&init_info->init_done_cond); > >+ qemu_mutex_unlock(&init_info->init_done_lock); > > > > while (!iothread->stopping) { > > aio_context_acquire(iothread->ctx); > >@@ -60,15 +75,30 @@ static void iothread_instance_finalize(Object *obj) > > static void iothread_complete(UserCreatable *obj, Error **errp) > > { > > IOThread *iothread = IOTHREAD(obj); > >+ ThreadInitInfo init_info = { > >+ .iothread = iothread, > >+ }; > > > > iothread->stopping = false; > > iothread->ctx = aio_context_new(); > > > >+ qemu_mutex_init(&init_info.init_done_lock); > >+ qemu_cond_init(&init_info.init_done_cond); > >+ > > /* This assumes we are called from a thread with useful CPU affinity for us > > * to inherit. > > */ > > qemu_thread_create(&iothread->thread, iothread_run, > >- iothread, QEMU_THREAD_JOINABLE); > >+ &init_info, QEMU_THREAD_JOINABLE); > >+ > >+ /* Wait for initialization to complete */ > >+ qemu_mutex_lock(&init_info.init_done_lock); > >+ qemu_cond_wait(&init_info.init_done_cond, > >+ &init_info.init_done_lock); > >+ qemu_mutex_unlock(&init_info.init_done_lock); > >+ > >+ qemu_cond_destroy(&init_info.init_done_cond); > >+ qemu_mutex_destroy(&init_info.init_done_lock); > > Destroying the mutex here is racy. You need to keep it until the > iothread is destroyed. I don't think so: qemu_cond_signal() is called with the mutex held. Therefore, our qemu_cond_wait() followed by qemu_mutex_unlock() will only complete once the thread has released the mutex. The thread will never touch the mutex again so it is safe to destroy it. There is no race condition. Stefan
Il 24/02/2014 16:53, Stefan Hajnoczi ha scritto: >>> > >+ qemu_cond_destroy(&init_info.init_done_cond); >>> > >+ qemu_mutex_destroy(&init_info.init_done_lock); >> > >> > Destroying the mutex here is racy. You need to keep it until the >> > iothread is destroyed. > I don't think so: > > qemu_cond_signal() is called with the mutex held. Therefore, our > qemu_cond_wait() followed by qemu_mutex_unlock() will only complete once > the thread has released the mutex. > > The thread will never touch the mutex again so it is safe to destroy it. > There is no race condition. Could qemu_mutex_destroy run while the other thread has already released the main thread, but before it returns? As far as I know, the only time when it is safe to destroy the "last" synchronization object (in this case the mutex is the last, the condvar is not) is after pthread_join. Paolo
On Mon, Feb 24, 2014 at 05:48:13PM +0100, Paolo Bonzini wrote: > Il 24/02/2014 16:53, Stefan Hajnoczi ha scritto: > >>>> >+ qemu_cond_destroy(&init_info.init_done_cond); > >>>> >+ qemu_mutex_destroy(&init_info.init_done_lock); > >>> > >>> Destroying the mutex here is racy. You need to keep it until the > >>> iothread is destroyed. > >I don't think so: > > > >qemu_cond_signal() is called with the mutex held. Therefore, our > >qemu_cond_wait() followed by qemu_mutex_unlock() will only complete once > >the thread has released the mutex. > > > >The thread will never touch the mutex again so it is safe to destroy it. > >There is no race condition. > > Could qemu_mutex_destroy run while the other thread has already > released the main thread, but before it returns? As far as I know, > the only time when it is safe to destroy the "last" synchronization > object (in this case the mutex is the last, the condvar is not) is > after pthread_join. I guess you're saying that while unlocking the mutex is atomic, that doesn't guarantee pthread won't access the mutex internal state some more after it has unlocked it. Therefore it's not safe for another thread to destroy the mutex even after it has acquired it. POSIX does say that: "It shall be safe to destroy an initialized mutex that is unlocked." But maybe I am reading too much into that? After poking around glibc a little I think you are right. I can't say for sure but it seems even after a futex call glibc might still mess with internal state. But if anyone knows for certain, please speak up. Stefan
Il 25/02/2014 16:42, Stefan Hajnoczi ha scritto: > I guess you're saying that while unlocking the mutex is atomic, that > doesn't guarantee pthread won't access the mutex internal state some > more after it has unlocked it. Therefore it's not safe for another > thread to destroy the mutex even after it has acquired it. Yes. > POSIX does say that: > > "It shall be safe to destroy an initialized mutex that is unlocked." The question is what "unlocked" means... :) > But maybe I am reading too much into that? > > After poking around glibc a little I think you are right. I can't say > for sure but it seems even after a futex call glibc might still mess > with internal state. But if anyone knows for certain, please speak up. I think other races are possible. Let's look at the simple lock in nptl/lowlevellock.h: /* Mutex lock counter: bit 31 clear means unlocked; bit 31 set means locked. All code that looks at bit 31 first increases the 'number of interested threads' usage counter, which is in bits 0-30. The comment is wrong, there is a fast path that does not do that; I'm not sure if this is why the problem can happen, I'm just pointing this out because it contradicts the code I'm posting now. The file uses C code, but it's simpler to look at it in assembly. Unlocking is very simple: lock; btcl $31, futex jz 2f ... do futex wake ... 2: Locking has a fast path followed by preparing the slow path, re-checking the fastpath condition, and waiting if it fails still: lock; btsl $31, futex jnc 9f lock; incl futex 1: lock; btsl $31, futex jnc 8f ... do futex wait ... jmp 1b 8: lock; decl futex 9: It's possible, if futex is locked by CPU 0 and CPU 1 tries to grab it, that the following happens: CPU 0 CPU 1 lock; btsl $31, futex (fails) lock; incl futex lock; btcl %0 (not zero) lock; btsl $31, futex (succeeds) lock; decl futex destroy lock free(lock) futex wake If you get an EFAULT from the futex wakeup, this could be a problem. Paolo
On Mon, Feb 24, 2014 at 05:48:13PM +0100, Paolo Bonzini wrote: > Il 24/02/2014 16:53, Stefan Hajnoczi ha scritto: > >>>> >+ qemu_cond_destroy(&init_info.init_done_cond); > >>>> >+ qemu_mutex_destroy(&init_info.init_done_lock); > >>> > >>> Destroying the mutex here is racy. You need to keep it until the > >>> iothread is destroyed. > >I don't think so: > > > >qemu_cond_signal() is called with the mutex held. Therefore, our > >qemu_cond_wait() followed by qemu_mutex_unlock() will only complete once > >the thread has released the mutex. > > > >The thread will never touch the mutex again so it is safe to destroy it. > >There is no race condition. > > Could qemu_mutex_destroy run while the other thread has already > released the main thread, but before it returns? As far as I know, > the only time when it is safe to destroy the "last" synchronization > object (in this case the mutex is the last, the condvar is not) is > after pthread_join. For the default mutex type (PTHREAD_MUTEX_TIMED_NP) glibc looks safe to me. The other mutex types are trickier and I haven't audited them. Anyway, I can just move the mutex into the IOThread object and destroy it after the thread is joined :). Stefan
Il 25/02/2014 17:17, Stefan Hajnoczi ha scritto: > For the default mutex type (PTHREAD_MUTEX_TIMED_NP) glibc looks safe to > me. The other mutex types are trickier and I haven't audited them. It also depends on the low-level lock implementation. I looked at the C one and it's not safe, the x86-optimized one is hard to follow. I think I was looking at a different race, namely cases where the locking thread uses a fast path, while the unlocking thread plays it safe and uses the slow path instead. Then the slow path can run "asynchronously" from the locking thread, and the locking thread has time to unlock and destroy the mutex. See the other message. Paolo
diff --git a/iothread.c b/iothread.c index 033de7f..1a0d1ec 100644 --- a/iothread.c +++ b/iothread.c @@ -26,8 +26,15 @@ struct IOThread { QemuThread thread; AioContext *ctx; bool stopping; + int thread_id; }; +typedef struct { + QemuMutex init_done_lock; + QemuCond init_done_cond; /* is thread initialization done? */ + IOThread *iothread; +} ThreadInitInfo; + #define IOTHREAD_GET_CLASS(obj) \ OBJECT_GET_CLASS(IOThreadClass, obj, TYPE_IOTHREAD) #define IOTHREAD_CLASS(klass) \ @@ -35,7 +42,15 @@ struct IOThread { static void *iothread_run(void *opaque) { - IOThread *iothread = opaque; + ThreadInitInfo *init_info = opaque; + IOThread *iothread = init_info->iothread; + + iothread->thread_id = qemu_get_thread_id(); + + /* Signal that initialization is done */ + qemu_mutex_lock(&init_info->init_done_lock); + qemu_cond_signal(&init_info->init_done_cond); + qemu_mutex_unlock(&init_info->init_done_lock); while (!iothread->stopping) { aio_context_acquire(iothread->ctx); @@ -60,15 +75,30 @@ static void iothread_instance_finalize(Object *obj) static void iothread_complete(UserCreatable *obj, Error **errp) { IOThread *iothread = IOTHREAD(obj); + ThreadInitInfo init_info = { + .iothread = iothread, + }; iothread->stopping = false; iothread->ctx = aio_context_new(); + qemu_mutex_init(&init_info.init_done_lock); + qemu_cond_init(&init_info.init_done_cond); + /* This assumes we are called from a thread with useful CPU affinity for us * to inherit. */ qemu_thread_create(&iothread->thread, iothread_run, - iothread, QEMU_THREAD_JOINABLE); + &init_info, QEMU_THREAD_JOINABLE); + + /* Wait for initialization to complete */ + qemu_mutex_lock(&init_info.init_done_lock); + qemu_cond_wait(&init_info.init_done_cond, + &init_info.init_done_lock); + qemu_mutex_unlock(&init_info.init_done_lock); + + qemu_cond_destroy(&init_info.init_done_cond); + qemu_mutex_destroy(&init_info.init_done_lock); } static void iothread_class_init(ObjectClass *klass, void *class_data)
Keep the thread ID around so we can report it via QMP. There's only one problem: qemu_get_thread_id() (gettid() wrapper on Linux) must be called from the thread itself. There is no way to get the thread ID outside the thread. This patch uses a condvar to wait for iothread_run() to populate the thread_id inside the thread. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> --- iothread.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-)