Patchwork [v1,3/3] : Convert v9fs_stat to threaded model.

login
register
mail settings
Submitter Arun Bharadwaj
Date March 15, 2011, 10:39 a.m.
Message ID <20110315103901.GD23922@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/86938/
State New
Headers show

Comments

Arun Bharadwaj - March 15, 2011, 10:39 a.m.
* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2011-03-15 16:04:53]:

Author: Harsh Prateek Bora <harsh@linux.vnet.ibm.com>
Date:   Mon Mar 14 13:55:37 2011 +0530

    Convert v9fs_stat to threaded model.
    
    This patch converts v9fs_stat syscall of 9pfs to threaded
    model by making use of the helper routines provided
    created by the previous patch.
    
    Signed-off-by: Harsh Prateek Bora <harsh@linux.vnet.ibm.com>
    Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Stefan Hajnoczi - March 16, 2011, 10:23 a.m.
On Tue, Mar 15, 2011 at 10:39 AM, Arun R Bharadwaj
<arun@linux.vnet.ibm.com> wrote:
> -static void v9fs_stat_post_lstat(V9fsState *s, V9fsStatState *vs, int err)
> +static void v9fs_stat_post_lstat(void *opaque)
>  {
> -    if (err == -1) {
> -        err = -errno;
> +    V9fsStatState *vs = (V9fsStatState *)opaque;

No need to cast void* in C.

> +    if (vs->err == -1) {
> +        vs->err = -(vs->v9fs_errno);

How about the thread worker function puts the -errno into a vs->ret field:

static void v9fs_stat_do_lstat(V9fsRequest *request)
{
    V9fsStatState *vs = container_of(request, V9fsStatState, request);

    vs->ret = v9fs_do_lstat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf);
    if (vs->ret != 0) {
        vs->ret = -errno;
    }
}

Then v9fs_stat_post_lstat() can use vs->ret directly and does not need
to juggle around the two fields, vs->err and vs->v9fs_errno.

>         goto out;
>     }
>
> -    err = stat_to_v9stat(s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);
> -    if (err) {
> +    vs->err = stat_to_v9stat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);

This function can block in v9fs_do_readlink().  Needs to be done
asynchronously to avoid blocking QEMU.

> +    if (vs->err) {
>         goto out;
>     }
>     vs->offset += pdu_marshal(vs->pdu, vs->offset, "wS", 0, &vs->v9stat);
> -    err = vs->offset;
> +    vs->err = vs->offset;
>
>  out:
> -    complete_pdu(s, vs->pdu, err);
> +    complete_pdu(vs->s, vs->pdu, vs->err);
>     v9fs_stat_free(&vs->v9stat);
>     qemu_free(vs);
>  }
>
> +static void v9fs_stat_do_lstat(V9fsRequest *request)
> +{
> +    V9fsStatState *vs = container_of(request, V9fsStatState, request);

Nice.  Could container_of() be used for v9fs_post_lstat() too?  I'm
suggesting making post op functions take the V9fsRequest* instead of a
void* opaque pointer.

> +
> +    vs->err = v9fs_do_lstat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf);

This is not threadsafe since rpath still uses a static buffer in
qemu.git.  Please ensure that rpath() is thread-safe before pushing
this patch.

> +    vs->v9fs_errno = errno;
> +}
> +
>  static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>  {
>     int32_t fid;
> @@ -1487,6 +1496,10 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>     vs = qemu_malloc(sizeof(*vs));
>     vs->pdu = pdu;
>     vs->offset = 7;
> +    vs->s = s;
> +    vs->request.func = v9fs_stat_do_lstat;
> +    vs->request.post_op.func = v9fs_stat_post_lstat;
> +    vs->request.post_op.arg = vs;
>
>     memset(&vs->v9stat, 0, sizeof(vs->v9stat));
>
> @@ -1498,8 +1511,11 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>         goto out;
>     }
>
> +    /*
>     err = v9fs_do_lstat(s, &vs->fidp->fsmap.path, &vs->stbuf);
>     v9fs_stat_post_lstat(s, vs, err);
> +    */

Please remove unused code, it quickly becomes out-of-date and confuses readers.

> +    v9fs_qemu_submit_request(&vs->request);

What happens when another PDU is handled next that uses the same fid?
The worst case is if the client sends TCLUNK and fid is freed while
the worker thread and later the post op still access the memory.
There needs to be some kind of guard (like a reference count) to
prevent this.

Stefan
jvrao - March 16, 2011, 2:33 p.m.
On 3/16/2011 3:23 AM, Stefan Hajnoczi wrote:
> On Tue, Mar 15, 2011 at 10:39 AM, Arun R Bharadwaj
> <arun@linux.vnet.ibm.com> wrote:
>> -static void v9fs_stat_post_lstat(V9fsState *s, V9fsStatState *vs, int err)
>> +static void v9fs_stat_post_lstat(void *opaque)
>>  {
>> -    if (err == -1) {
>> -        err = -errno;
>> +    V9fsStatState *vs = (V9fsStatState *)opaque;
> 
> No need to cast void* in C.
> 
>> +    if (vs->err == -1) {
>> +        vs->err = -(vs->v9fs_errno);
> 
> How about the thread worker function puts the -errno into a vs->ret field:
> 
> static void v9fs_stat_do_lstat(V9fsRequest *request)
> {
>     V9fsStatState *vs = container_of(request, V9fsStatState, request);
> 
>     vs->ret = v9fs_do_lstat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf);
>     if (vs->ret != 0) {
>         vs->ret = -errno;
>     }
> }
> 
> Then v9fs_stat_post_lstat() can use vs->ret directly and does not need
> to juggle around the two fields, vs->err and vs->v9fs_errno.
> 
>>         goto out;
>>     }
>>
>> -    err = stat_to_v9stat(s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);
>> -    if (err) {
>> +    vs->err = stat_to_v9stat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);
> 
> This function can block in v9fs_do_readlink().  Needs to be done
> asynchronously to avoid blocking QEMU.
> 
>> +    if (vs->err) {
>>         goto out;
>>     }
>>     vs->offset += pdu_marshal(vs->pdu, vs->offset, "wS", 0, &vs->v9stat);
>> -    err = vs->offset;
>> +    vs->err = vs->offset;
>>
>>  out:
>> -    complete_pdu(s, vs->pdu, err);
>> +    complete_pdu(vs->s, vs->pdu, vs->err);
>>     v9fs_stat_free(&vs->v9stat);
>>     qemu_free(vs);
>>  }
>>
>> +static void v9fs_stat_do_lstat(V9fsRequest *request)
>> +{
>> +    V9fsStatState *vs = container_of(request, V9fsStatState, request);
> 
> Nice.  Could container_of() be used for v9fs_post_lstat() too?  I'm
> suggesting making post op functions take the V9fsRequest* instead of a
> void* opaque pointer.
> 
>> +
>> +    vs->err = v9fs_do_lstat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf);
> 
> This is not threadsafe since rpath still uses a static buffer in
> qemu.git.  Please ensure that rpath() is thread-safe before pushing
> this patch.

There is another patch on the internal list to make rpath thread safe.

> 
>> +    vs->v9fs_errno = errno;
>> +}
>> +
>>  static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>>  {
>>     int32_t fid;
>> @@ -1487,6 +1496,10 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>>     vs = qemu_malloc(sizeof(*vs));
>>     vs->pdu = pdu;
>>     vs->offset = 7;
>> +    vs->s = s;
>> +    vs->request.func = v9fs_stat_do_lstat;
>> +    vs->request.post_op.func = v9fs_stat_post_lstat;
>> +    vs->request.post_op.arg = vs;
>>
>>     memset(&vs->v9stat, 0, sizeof(vs->v9stat));
>>
>> @@ -1498,8 +1511,11 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>>         goto out;
>>     }
>>
>> +    /*
>>     err = v9fs_do_lstat(s, &vs->fidp->fsmap.path, &vs->stbuf);
>>     v9fs_stat_post_lstat(s, vs, err);
>> +    */
> 
> Please remove unused code, it quickly becomes out-of-date and confuses readers.
> 
>> +    v9fs_qemu_submit_request(&vs->request);
> 
> What happens when another PDU is handled next that uses the same fid?
> The worst case is if the client sends TCLUNK and fid is freed while
> the worker thread and later the post op still access the memory.
> There needs to be some kind of guard (like a reference count) to
> prevent this.

As per the protocol this should not happen. Client is the controls the fid,
and the fid is created or destroyed per the directive of the client.
It should not send clunk until the response is received on that fid
based operation(if there is any).

- JV
> 
> Stefan
Stefan Hajnoczi - March 16, 2011, 5:10 p.m.
On Wed, Mar 16, 2011 at 2:33 PM, Venkateswararao Jujjuri (JV)
<jvrao@linux.vnet.ibm.com> wrote:
> On 3/16/2011 3:23 AM, Stefan Hajnoczi wrote:
>> On Tue, Mar 15, 2011 at 10:39 AM, Arun R Bharadwaj
>> <arun@linux.vnet.ibm.com> wrote:
>>> -static void v9fs_stat_post_lstat(V9fsState *s, V9fsStatState *vs, int err)
>>> +static void v9fs_stat_post_lstat(void *opaque)
>>>  {
>>> -    if (err == -1) {
>>> -        err = -errno;
>>> +    V9fsStatState *vs = (V9fsStatState *)opaque;
>>
>> No need to cast void* in C.
>>
>>> +    if (vs->err == -1) {
>>> +        vs->err = -(vs->v9fs_errno);
>>
>> How about the thread worker function puts the -errno into a vs->ret field:
>>
>> static void v9fs_stat_do_lstat(V9fsRequest *request)
>> {
>>     V9fsStatState *vs = container_of(request, V9fsStatState, request);
>>
>>     vs->ret = v9fs_do_lstat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf);
>>     if (vs->ret != 0) {
>>         vs->ret = -errno;
>>     }
>> }
>>
>> Then v9fs_stat_post_lstat() can use vs->ret directly and does not need
>> to juggle around the two fields, vs->err and vs->v9fs_errno.
>>
>>>         goto out;
>>>     }
>>>
>>> -    err = stat_to_v9stat(s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);
>>> -    if (err) {
>>> +    vs->err = stat_to_v9stat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);
>>
>> This function can block in v9fs_do_readlink().  Needs to be done
>> asynchronously to avoid blocking QEMU.
>>
>>> +    if (vs->err) {
>>>         goto out;
>>>     }
>>>     vs->offset += pdu_marshal(vs->pdu, vs->offset, "wS", 0, &vs->v9stat);
>>> -    err = vs->offset;
>>> +    vs->err = vs->offset;
>>>
>>>  out:
>>> -    complete_pdu(s, vs->pdu, err);
>>> +    complete_pdu(vs->s, vs->pdu, vs->err);
>>>     v9fs_stat_free(&vs->v9stat);
>>>     qemu_free(vs);
>>>  }
>>>
>>> +static void v9fs_stat_do_lstat(V9fsRequest *request)
>>> +{
>>> +    V9fsStatState *vs = container_of(request, V9fsStatState, request);
>>
>> Nice.  Could container_of() be used for v9fs_post_lstat() too?  I'm
>> suggesting making post op functions take the V9fsRequest* instead of a
>> void* opaque pointer.
>>
>>> +
>>> +    vs->err = v9fs_do_lstat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf);
>>
>> This is not threadsafe since rpath still uses a static buffer in
>> qemu.git.  Please ensure that rpath() is thread-safe before pushing
>> this patch.
>
> There is another patch on the internal list to make rpath thread safe.
>
>>
>>> +    vs->v9fs_errno = errno;
>>> +}
>>> +
>>>  static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>>>  {
>>>     int32_t fid;
>>> @@ -1487,6 +1496,10 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>>>     vs = qemu_malloc(sizeof(*vs));
>>>     vs->pdu = pdu;
>>>     vs->offset = 7;
>>> +    vs->s = s;
>>> +    vs->request.func = v9fs_stat_do_lstat;
>>> +    vs->request.post_op.func = v9fs_stat_post_lstat;
>>> +    vs->request.post_op.arg = vs;
>>>
>>>     memset(&vs->v9stat, 0, sizeof(vs->v9stat));
>>>
>>> @@ -1498,8 +1511,11 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>>>         goto out;
>>>     }
>>>
>>> +    /*
>>>     err = v9fs_do_lstat(s, &vs->fidp->fsmap.path, &vs->stbuf);
>>>     v9fs_stat_post_lstat(s, vs, err);
>>> +    */
>>
>> Please remove unused code, it quickly becomes out-of-date and confuses readers.
>>
>>> +    v9fs_qemu_submit_request(&vs->request);
>>
>> What happens when another PDU is handled next that uses the same fid?
>> The worst case is if the client sends TCLUNK and fid is freed while
>> the worker thread and later the post op still access the memory.
>> There needs to be some kind of guard (like a reference count) to
>> prevent this.
>
> As per the protocol this should not happen. Client is the controls the fid,
> and the fid is created or destroyed per the directive of the client.
> It should not send clunk until the response is received on that fid
> based operation(if there is any).

Unfortunately it is still possible for a guest to do it.  The model
for emulated hardware is that the guest is untrusted and we cannot
allow things to crash.

It's really important for everyone to keep this in mind otherwise
security vulnerabilities will be introduced for use cases like hosting
and cloud where the guest really cannot be trusted.

An easy fix is to mark a fid busy and reject requests that mess with
it before the existing request has been processed.

Stefan
jvrao - March 17, 2011, 4:26 a.m.
On 3/16/2011 10:10 AM, Stefan Hajnoczi wrote:
> On Wed, Mar 16, 2011 at 2:33 PM, Venkateswararao Jujjuri (JV)
> <jvrao@linux.vnet.ibm.com> wrote:
>> On 3/16/2011 3:23 AM, Stefan Hajnoczi wrote:
>>> On Tue, Mar 15, 2011 at 10:39 AM, Arun R Bharadwaj
>>> <arun@linux.vnet.ibm.com> wrote:
>>>> -static void v9fs_stat_post_lstat(V9fsState *s, V9fsStatState *vs, int err)
>>>> +static void v9fs_stat_post_lstat(void *opaque)
>>>>  {
>>>> -    if (err == -1) {
>>>> -        err = -errno;
>>>> +    V9fsStatState *vs = (V9fsStatState *)opaque;
>>>
>>> No need to cast void* in C.
>>>
>>>> +    if (vs->err == -1) {
>>>> +        vs->err = -(vs->v9fs_errno);
>>>
>>> How about the thread worker function puts the -errno into a vs->ret field:
>>>
>>> static void v9fs_stat_do_lstat(V9fsRequest *request)
>>> {
>>>     V9fsStatState *vs = container_of(request, V9fsStatState, request);
>>>
>>>     vs->ret = v9fs_do_lstat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf);
>>>     if (vs->ret != 0) {
>>>         vs->ret = -errno;
>>>     }
>>> }
>>>
>>> Then v9fs_stat_post_lstat() can use vs->ret directly and does not need
>>> to juggle around the two fields, vs->err and vs->v9fs_errno.
>>>
>>>>         goto out;
>>>>     }
>>>>
>>>> -    err = stat_to_v9stat(s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);
>>>> -    if (err) {
>>>> +    vs->err = stat_to_v9stat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);
>>>
>>> This function can block in v9fs_do_readlink().  Needs to be done
>>> asynchronously to avoid blocking QEMU.
>>>
>>>> +    if (vs->err) {
>>>>         goto out;
>>>>     }
>>>>     vs->offset += pdu_marshal(vs->pdu, vs->offset, "wS", 0, &vs->v9stat);
>>>> -    err = vs->offset;
>>>> +    vs->err = vs->offset;
>>>>
>>>>  out:
>>>> -    complete_pdu(s, vs->pdu, err);
>>>> +    complete_pdu(vs->s, vs->pdu, vs->err);
>>>>     v9fs_stat_free(&vs->v9stat);
>>>>     qemu_free(vs);
>>>>  }
>>>>
>>>> +static void v9fs_stat_do_lstat(V9fsRequest *request)
>>>> +{
>>>> +    V9fsStatState *vs = container_of(request, V9fsStatState, request);
>>>
>>> Nice.  Could container_of() be used for v9fs_post_lstat() too?  I'm
>>> suggesting making post op functions take the V9fsRequest* instead of a
>>> void* opaque pointer.
>>>
>>>> +
>>>> +    vs->err = v9fs_do_lstat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf);
>>>
>>> This is not threadsafe since rpath still uses a static buffer in
>>> qemu.git.  Please ensure that rpath() is thread-safe before pushing
>>> this patch.
>>
>> There is another patch on the internal list to make rpath thread safe.
>>
>>>
>>>> +    vs->v9fs_errno = errno;
>>>> +}
>>>> +
>>>>  static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>>>>  {
>>>>     int32_t fid;
>>>> @@ -1487,6 +1496,10 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>>>>     vs = qemu_malloc(sizeof(*vs));
>>>>     vs->pdu = pdu;
>>>>     vs->offset = 7;
>>>> +    vs->s = s;
>>>> +    vs->request.func = v9fs_stat_do_lstat;
>>>> +    vs->request.post_op.func = v9fs_stat_post_lstat;
>>>> +    vs->request.post_op.arg = vs;
>>>>
>>>>     memset(&vs->v9stat, 0, sizeof(vs->v9stat));
>>>>
>>>> @@ -1498,8 +1511,11 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>>>>         goto out;
>>>>     }
>>>>
>>>> +    /*
>>>>     err = v9fs_do_lstat(s, &vs->fidp->fsmap.path, &vs->stbuf);
>>>>     v9fs_stat_post_lstat(s, vs, err);
>>>> +    */
>>>
>>> Please remove unused code, it quickly becomes out-of-date and confuses readers.
>>>
>>>> +    v9fs_qemu_submit_request(&vs->request);
>>>
>>> What happens when another PDU is handled next that uses the same fid?
>>> The worst case is if the client sends TCLUNK and fid is freed while
>>> the worker thread and later the post op still access the memory.
>>> There needs to be some kind of guard (like a reference count) to
>>> prevent this.
>>
>> As per the protocol this should not happen. Client is the controls the fid,
>> and the fid is created or destroyed per the directive of the client.
>> It should not send clunk until the response is received on that fid
>> based operation(if there is any).
> 
> Unfortunately it is still possible for a guest to do it.  The model
> for emulated hardware is that the guest is untrusted and we cannot
> allow things to crash.

Well, it can happen only if the guest OS is hacked...and the worst thing
can happen is guest goes down. I am not sure how it is different from
a bare metal system..
> 
> It's really important for everyone to keep this in mind otherwise
> security vulnerabilities will be introduced for use cases like hosting
> and cloud where the guest really cannot be trusted.
> 
> An easy fix is to mark a fid busy and reject requests that mess with
> it before the existing request has been processed.
> 
> Stefan
Stefan Hajnoczi - March 17, 2011, 7:19 a.m.
On Thu, Mar 17, 2011 at 4:26 AM, Venkateswararao Jujjuri (JV)
<jvrao@linux.vnet.ibm.com> wrote:
> On 3/16/2011 10:10 AM, Stefan Hajnoczi wrote:
>> On Wed, Mar 16, 2011 at 2:33 PM, Venkateswararao Jujjuri (JV)
>> <jvrao@linux.vnet.ibm.com> wrote:
>>> On 3/16/2011 3:23 AM, Stefan Hajnoczi wrote:
>>>> On Tue, Mar 15, 2011 at 10:39 AM, Arun R Bharadwaj
>>>> <arun@linux.vnet.ibm.com> wrote:
>>>>> -static void v9fs_stat_post_lstat(V9fsState *s, V9fsStatState *vs, int err)
>>>>> +static void v9fs_stat_post_lstat(void *opaque)
>>>>>  {
>>>>> -    if (err == -1) {
>>>>> -        err = -errno;
>>>>> +    V9fsStatState *vs = (V9fsStatState *)opaque;
>>>>
>>>> No need to cast void* in C.
>>>>
>>>>> +    if (vs->err == -1) {
>>>>> +        vs->err = -(vs->v9fs_errno);
>>>>
>>>> How about the thread worker function puts the -errno into a vs->ret field:
>>>>
>>>> static void v9fs_stat_do_lstat(V9fsRequest *request)
>>>> {
>>>>     V9fsStatState *vs = container_of(request, V9fsStatState, request);
>>>>
>>>>     vs->ret = v9fs_do_lstat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf);
>>>>     if (vs->ret != 0) {
>>>>         vs->ret = -errno;
>>>>     }
>>>> }
>>>>
>>>> Then v9fs_stat_post_lstat() can use vs->ret directly and does not need
>>>> to juggle around the two fields, vs->err and vs->v9fs_errno.
>>>>
>>>>>         goto out;
>>>>>     }
>>>>>
>>>>> -    err = stat_to_v9stat(s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);
>>>>> -    if (err) {
>>>>> +    vs->err = stat_to_v9stat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);
>>>>
>>>> This function can block in v9fs_do_readlink().  Needs to be done
>>>> asynchronously to avoid blocking QEMU.
>>>>
>>>>> +    if (vs->err) {
>>>>>         goto out;
>>>>>     }
>>>>>     vs->offset += pdu_marshal(vs->pdu, vs->offset, "wS", 0, &vs->v9stat);
>>>>> -    err = vs->offset;
>>>>> +    vs->err = vs->offset;
>>>>>
>>>>>  out:
>>>>> -    complete_pdu(s, vs->pdu, err);
>>>>> +    complete_pdu(vs->s, vs->pdu, vs->err);
>>>>>     v9fs_stat_free(&vs->v9stat);
>>>>>     qemu_free(vs);
>>>>>  }
>>>>>
>>>>> +static void v9fs_stat_do_lstat(V9fsRequest *request)
>>>>> +{
>>>>> +    V9fsStatState *vs = container_of(request, V9fsStatState, request);
>>>>
>>>> Nice.  Could container_of() be used for v9fs_post_lstat() too?  I'm
>>>> suggesting making post op functions take the V9fsRequest* instead of a
>>>> void* opaque pointer.
>>>>
>>>>> +
>>>>> +    vs->err = v9fs_do_lstat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf);
>>>>
>>>> This is not threadsafe since rpath still uses a static buffer in
>>>> qemu.git.  Please ensure that rpath() is thread-safe before pushing
>>>> this patch.
>>>
>>> There is another patch on the internal list to make rpath thread safe.
>>>
>>>>
>>>>> +    vs->v9fs_errno = errno;
>>>>> +}
>>>>> +
>>>>>  static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>>>>>  {
>>>>>     int32_t fid;
>>>>> @@ -1487,6 +1496,10 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>>>>>     vs = qemu_malloc(sizeof(*vs));
>>>>>     vs->pdu = pdu;
>>>>>     vs->offset = 7;
>>>>> +    vs->s = s;
>>>>> +    vs->request.func = v9fs_stat_do_lstat;
>>>>> +    vs->request.post_op.func = v9fs_stat_post_lstat;
>>>>> +    vs->request.post_op.arg = vs;
>>>>>
>>>>>     memset(&vs->v9stat, 0, sizeof(vs->v9stat));
>>>>>
>>>>> @@ -1498,8 +1511,11 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
>>>>>         goto out;
>>>>>     }
>>>>>
>>>>> +    /*
>>>>>     err = v9fs_do_lstat(s, &vs->fidp->fsmap.path, &vs->stbuf);
>>>>>     v9fs_stat_post_lstat(s, vs, err);
>>>>> +    */
>>>>
>>>> Please remove unused code, it quickly becomes out-of-date and confuses readers.
>>>>
>>>>> +    v9fs_qemu_submit_request(&vs->request);
>>>>
>>>> What happens when another PDU is handled next that uses the same fid?
>>>> The worst case is if the client sends TCLUNK and fid is freed while
>>>> the worker thread and later the post op still access the memory.
>>>> There needs to be some kind of guard (like a reference count) to
>>>> prevent this.
>>>
>>> As per the protocol this should not happen. Client is the controls the fid,
>>> and the fid is created or destroyed per the directive of the client.
>>> It should not send clunk until the response is received on that fid
>>> based operation(if there is any).
>>
>> Unfortunately it is still possible for a guest to do it.  The model
>> for emulated hardware is that the guest is untrusted and we cannot
>> allow things to crash.
>
> Well, it can happen only if the guest OS is hacked...and the worst thing
> can happen is guest goes down. I am not sure how it is different from
> a bare metal system..

No, use after free can lead to arbitrary code execution or other
effects more severe than the guest going down:
http://en.wikipedia.org/wiki/Malloc#Use_after_free

For example if the same memory is handed out by malloc and used to
store a function pointer, then the function pointer can be corrupted
and cause a jump to an arbitrary place in memory.

Hardware emulation is like implementing system calls in an operating
system.  You cannot crash in the kernel and you cannot allow undefined
things to happen.  Every state needs to be handled, whether a
reasonable process would do it or not.

Also, allowing hardware emulation to take down the guest is not an
option going forward.  People have been working on nested
virtualization.  In that scenario a virtio-9p-pci device can be passed
through to a nested guest.  If that guest is able to take down QEMU
then it can kill its sibling nested guests and its parent guest, which
it should not be able to do.

Stefan

Patch

diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index cf61345..a328e97 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -1458,26 +1458,35 @@  out:
     v9fs_string_free(&aname);
 }
 
-static void v9fs_stat_post_lstat(V9fsState *s, V9fsStatState *vs, int err)
+static void v9fs_stat_post_lstat(void *opaque)
 {
-    if (err == -1) {
-        err = -errno;
+    V9fsStatState *vs = (V9fsStatState *)opaque;
+    if (vs->err == -1) {
+        vs->err = -(vs->v9fs_errno);
         goto out;
     }
 
-    err = stat_to_v9stat(s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);
-    if (err) {
+    vs->err = stat_to_v9stat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);
+    if (vs->err) {
         goto out;
     }
     vs->offset += pdu_marshal(vs->pdu, vs->offset, "wS", 0, &vs->v9stat);
-    err = vs->offset;
+    vs->err = vs->offset;
 
 out:
-    complete_pdu(s, vs->pdu, err);
+    complete_pdu(vs->s, vs->pdu, vs->err);
     v9fs_stat_free(&vs->v9stat);
     qemu_free(vs);
 }
 
+static void v9fs_stat_do_lstat(V9fsRequest *request)
+{
+    V9fsStatState *vs = container_of(request, V9fsStatState, request);
+
+    vs->err = v9fs_do_lstat(vs->s, &vs->fidp->fsmap.path, &vs->stbuf);
+    vs->v9fs_errno = errno;
+}
+
 static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
 {
     int32_t fid;
@@ -1487,6 +1496,10 @@  static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
     vs = qemu_malloc(sizeof(*vs));
     vs->pdu = pdu;
     vs->offset = 7;
+    vs->s = s;
+    vs->request.func = v9fs_stat_do_lstat;
+    vs->request.post_op.func = v9fs_stat_post_lstat;
+    vs->request.post_op.arg = vs;
 
     memset(&vs->v9stat, 0, sizeof(vs->v9stat));
 
@@ -1498,8 +1511,11 @@  static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
+    /*
     err = v9fs_do_lstat(s, &vs->fidp->fsmap.path, &vs->stbuf);
     v9fs_stat_post_lstat(s, vs, err);
+    */
+    v9fs_qemu_submit_request(&vs->request);
     return;
 
 out:
diff --git a/hw/9pfs/virtio-9p.h b/hw/9pfs/virtio-9p.h
index e7d2326..1d6c17c 100644
--- a/hw/9pfs/virtio-9p.h
+++ b/hw/9pfs/virtio-9p.h
@@ -271,6 +271,10 @@  typedef struct V9fsStatState {
     V9fsStat v9stat;
     V9fsFidState *fidp;
     struct stat stbuf;
+    V9fsState *s;
+    int err;
+    int v9fs_errno;
+    V9fsRequest request;
 } V9fsStatState;
 
 typedef struct V9fsStatDotl {