diff mbox

[PULL,7/7] nbd-client: Fix regression when server sends garbage

Message ID 20170815150907.21495-8-eblake@redhat.com
State New
Headers show

Commit Message

Eric Blake Aug. 15, 2017, 3:09 p.m. UTC
When we switched NBD to use coroutines for qemu 2.9 (in particular,
commit a12a712a), we introduced a regression: if a server sends us
garbage (such as a corrupted magic number), we quit the read loop
but do not stop sending further queued commands, resulting in the
client hanging when it never reads the response to those additional
commands.  In qemu 2.8, we properly detected that the server is no
longer reliable, and cancelled all existing pending commands with
EIO, then tore down the socket so that all further command attempts
get EPIPE.

Restore the proper behavior of quitting (almost) all communication
with a broken server: Once we know we are out of sync or otherwise
can't trust the server, we must assume that any further incoming
data is unreliable and therefore end all pending commands with EIO,
and quit trying to send any further commands.  As an exception, we
still (try to) send NBD_CMD_DISC to let the server know we are going
away (in part, because it is easier to do that than to further
refactor nbd_teardown_connection, and in part because it is the
only command where we do not have to wait for a reply).

Based on a patch by Vladimir Sementsov-Ogievskiy.

A malicious server can be created with the following hack,
followed by setting NBD_SERVER_DEBUG to a non-zero value in the
environment when running qemu-nbd:

| --- a/nbd/server.c
| +++ b/nbd/server.c
| @@ -919,6 +919,17 @@ static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
|      stl_be_p(buf + 4, reply->error);
|      stq_be_p(buf + 8, reply->handle);
|
| +    static int debug;
| +    static int count;
| +    if (!count++) {
| +        const char *str = getenv("NBD_SERVER_DEBUG");
| +        if (str) {
| +            debug = atoi(str);
| +        }
| +    }
| +    if (debug && !(count % debug)) {
| +        buf[0] = 0;
| +    }
|      return nbd_write(ioc, buf, sizeof(buf), errp);
|  }

Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170814213426.24681-1-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/nbd-client.h |  1 +
 block/nbd-client.c | 17 +++++++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy Aug. 15, 2017, 3:50 p.m. UTC | #1
15.08.2017 18:09, Eric Blake wrote:
> When we switched NBD to use coroutines for qemu 2.9 (in particular,
> commit a12a712a), we introduced a regression: if a server sends us
> garbage (such as a corrupted magic number), we quit the read loop
> but do not stop sending further queued commands, resulting in the
> client hanging when it never reads the response to those additional
> commands.  In qemu 2.8, we properly detected that the server is no
> longer reliable, and cancelled all existing pending commands with
> EIO, then tore down the socket so that all further command attempts
> get EPIPE.
>
> Restore the proper behavior of quitting (almost) all communication
> with a broken server: Once we know we are out of sync or otherwise
> can't trust the server, we must assume that any further incoming
> data is unreliable and therefore end all pending commands with EIO,
> and quit trying to send any further commands.  As an exception, we
> still (try to) send NBD_CMD_DISC to let the server know we are going
> away (in part, because it is easier to do that than to further
> refactor nbd_teardown_connection, and in part because it is the
> only command where we do not have to wait for a reply).
>
> Based on a patch by Vladimir Sementsov-Ogievskiy.
>
> A malicious server can be created with the following hack,
> followed by setting NBD_SERVER_DEBUG to a non-zero value in the
> environment when running qemu-nbd:
>
> | --- a/nbd/server.c
> | +++ b/nbd/server.c
> | @@ -919,6 +919,17 @@ static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
> |      stl_be_p(buf + 4, reply->error);
> |      stq_be_p(buf + 8, reply->handle);
> |
> | +    static int debug;
> | +    static int count;
> | +    if (!count++) {
> | +        const char *str = getenv("NBD_SERVER_DEBUG");
> | +        if (str) {
> | +            debug = atoi(str);
> | +        }
> | +    }
> | +    if (debug && !(count % debug)) {
> | +        buf[0] = 0;
> | +    }
> |      return nbd_write(ioc, buf, sizeof(buf), errp);
> |  }
>
> Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> Signed-off-by: Eric Blake <eblake@redhat.com>
> Message-Id: <20170814213426.24681-1-eblake@redhat.com>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>   block/nbd-client.h |  1 +
>   block/nbd-client.c | 17 +++++++++++++----
>   2 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/block/nbd-client.h b/block/nbd-client.h
> index df80771357..1935ffbcaa 100644
> --- a/block/nbd-client.h
> +++ b/block/nbd-client.h
> @@ -29,6 +29,7 @@ typedef struct NBDClientSession {
>
>       Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
>       NBDReply reply;
> +    bool quit;
>   } NBDClientSession;
>
>   NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
> diff --git a/block/nbd-client.c b/block/nbd-client.c
> index 25dd28406b..422ecb4307 100644
> --- a/block/nbd-client.c
> +++ b/block/nbd-client.c
> @@ -73,7 +73,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
>       int ret;
>       Error *local_err = NULL;
>
> -    for (;;) {
> +    while (!s->quit) {
>           assert(s->reply.handle == 0);
>           ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
>           if (ret < 0) {

I think we should check quit here, if it is true, we should not continue 
normal path of handling reply

> @@ -107,6 +107,9 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
>           qemu_coroutine_yield();
>       }
>
> +    if (ret < 0) {
> +        s->quit = true;
> +    }
>       nbd_recv_coroutines_enter_all(s);
>       s->read_reply_co = NULL;
>   }
> @@ -135,6 +138,10 @@ static int nbd_co_send_request(BlockDriverState *bs,
>       assert(i < MAX_NBD_REQUESTS);
>       request->handle = INDEX_TO_HANDLE(s, i);

not bad to check s->quit at start of the function, but it will 
complicate things like in my patch.

> +    if (s->quit) {
> +        qemu_co_mutex_unlock(&s->send_mutex);
> +        return -EIO;
> +    }
>       if (!s->ioc) {
>           qemu_co_mutex_unlock(&s->send_mutex);
>           return -EPIPE;
> @@ -143,7 +150,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
>       if (qiov) {
>           qio_channel_set_cork(s->ioc, true);
>           rc = nbd_send_request(s->ioc, request);
> -        if (rc >= 0) {
> +        if (rc >= 0 && !s->quit) {
>               ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
>                             NULL);
>               if (ret != request->len) {
> @@ -154,6 +161,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
>       } else {
>           rc = nbd_send_request(s->ioc, request);
>       }
> +    if (rc < 0) {
> +        s->quit = true;
> +    }
>       qemu_co_mutex_unlock(&s->send_mutex);

and here, if rc == 0 and quite is true, we should not return 0

>       return rc;
>   }
> @@ -168,8 +178,7 @@ static void nbd_co_receive_reply(NBDClientSession *s,
>       /* Wait until we're woken up by nbd_read_reply_entry.  */
>       qemu_coroutine_yield();
>       *reply = s->reply;
> -    if (reply->handle != request->handle ||
> -        !s->ioc) {
> +    if (reply->handle != request->handle || !s->ioc || s->quit) {
>           reply->error = EIO;

here, if s->quit is false, we should set it to inform other coroutines

>       } else {
>           if (qiov && reply->error == 0) {

and here follows a call to nbd_rwv(), where s->quit should be 
appropriately handled..
Eric Blake Aug. 15, 2017, 4:51 p.m. UTC | #2
On 08/15/2017 10:50 AM, Vladimir Sementsov-Ogievskiy wrote:
> 15.08.2017 18:09, Eric Blake wrote:
>> When we switched NBD to use coroutines for qemu 2.9 (in particular,
>> commit a12a712a), we introduced a regression: if a server sends us
>> garbage (such as a corrupted magic number), we quit the read loop
>> but do not stop sending further queued commands, resulting in the
>> client hanging when it never reads the response to those additional
>> commands.  In qemu 2.8, we properly detected that the server is no
>> longer reliable, and cancelled all existing pending commands with
>> EIO, then tore down the socket so that all further command attempts
>> get EPIPE.
>>

>> +++ b/block/nbd-client.c
>> @@ -73,7 +73,7 @@ static coroutine_fn void nbd_read_reply_entry(void
>> *opaque)
>>       int ret;
>>       Error *local_err = NULL;
>>
>> -    for (;;) {
>> +    while (!s->quit) {
>>           assert(s->reply.handle == 0);
>>           ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
>>           if (ret < 0) {
> 
> I think we should check quit here, if it is true, we should not continue
> normal path of handling reply

I don't think it matters.  If nbd_receive_reply() correctly got data off
the wire for this particular coroutine's request, we might as well act
on that data, regardless of what other coroutines have learned in the
meantime.

This is already in the pull request for -rc3, but if you can come up
with a scenario that still behaves incorrectly, we can do a followup
patch for -rc4 (although I'm hoping we don't have to change it any
further for 2.10).  Otherwise, I'm fine if your refactoring work for
2.11 addresses the issue as part of making the code easier to read.

>> @@ -154,6 +161,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
>>       } else {
>>           rc = nbd_send_request(s->ioc, request);
>>       }
>> +    if (rc < 0) {
>> +        s->quit = true;
>> +    }
>>       qemu_co_mutex_unlock(&s->send_mutex);
> 
> and here, if rc == 0 and quite is true, we should not return 0
> 
>>       return rc;

We don't - we return rc, which is negative.

>>   }
>> @@ -168,8 +178,7 @@ static void nbd_co_receive_reply(NBDClientSession *s,
>>       /* Wait until we're woken up by nbd_read_reply_entry.  */
>>       qemu_coroutine_yield();
>>       *reply = s->reply;
>> -    if (reply->handle != request->handle ||
>> -        !s->ioc) {
>> +    if (reply->handle != request->handle || !s->ioc || s->quit) {
>>           reply->error = EIO;
> 
> here, if s->quit is false, we should set it to inform other coroutines

We can't get into nbd_co_receive_reply() unless the two handles were
once equal, and the only code that changes them to be not equal is when
we are shutting down.  Checking s->quit is a safety valve if some other
coroutine detects corruption first, but this coroutine does not need to
set s->quit because it is either already set, or we are already shutting
down.

> 
>>       } else {
>>           if (qiov && reply->error == 0) {
> 
> and here follows a call to nbd_rwv(), where s->quit should be
> appropriately handled..

Reading from a corrupt server is not as bad as writing to the corrupt
server; the patch for 2.10 is solely focused on preventing writes where
we need a followup read (because once we know the server is corrupt, we
can't guarantee the followup reads will come).

Again, if you can prove we have a scenario that is still buggy (client
can crash or hang), then it is -rc4 material; if not, then this is all
the more that 2.10 needs, and your refactoring work for 2.11 should
clean up a lot of this mess in the first place as you make the
coroutines easier to follow.
Vladimir Sementsov-Ogievskiy Aug. 16, 2017, noon UTC | #3
15.08.2017 19:51, Eric Blake wrote:
> On 08/15/2017 10:50 AM, Vladimir Sementsov-Ogievskiy wrote:
>> 15.08.2017 18:09, Eric Blake wrote:
>>> When we switched NBD to use coroutines for qemu 2.9 (in particular,
>>> commit a12a712a), we introduced a regression: if a server sends us
>>> garbage (such as a corrupted magic number), we quit the read loop
>>> but do not stop sending further queued commands, resulting in the
>>> client hanging when it never reads the response to those additional
>>> commands.  In qemu 2.8, we properly detected that the server is no
>>> longer reliable, and cancelled all existing pending commands with
>>> EIO, then tore down the socket so that all further command attempts
>>> get EPIPE.
>>>
>>> +++ b/block/nbd-client.c
>>> @@ -73,7 +73,7 @@ static coroutine_fn void nbd_read_reply_entry(void
>>> *opaque)
>>>        int ret;
>>>        Error *local_err = NULL;
>>>
>>> -    for (;;) {
>>> +    while (!s->quit) {
>>>            assert(s->reply.handle == 0);
>>>            ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
>>>            if (ret < 0) {
>> I think we should check quit here, if it is true, we should not continue
>> normal path of handling reply
> I don't think it matters.  If nbd_receive_reply() correctly got data off
> the wire for this particular coroutine's request, we might as well act
> on that data, regardless of what other coroutines have learned in the
> meantime.
>
> This is already in the pull request for -rc3, but if you can come up
> with a scenario that still behaves incorrectly, we can do a followup

it just don't correspond to your commit message:
"... therefore end all pending commands with EIO, and quit trying to 
send any further commands"

so, we should end this command (we read reply for it) with EIO, instead 
of continuing success path.

However, I think this don't leads to a scenario, leading to a hang or a 
crash of the client, it's OK for me to
handle it in my refactoring for 2.11.

> patch for -rc4 (although I'm hoping we don't have to change it any
> further for 2.10).  Otherwise, I'm fine if your refactoring work for
> 2.11 addresses the issue as part of making the code easier to read.
>
>>> @@ -154,6 +161,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
>>>        } else {
>>>            rc = nbd_send_request(s->ioc, request);
>>>        }
>>> +    if (rc < 0) {
>>> +        s->quit = true;
>>> +    }
>>>        qemu_co_mutex_unlock(&s->send_mutex);
>> and here, if rc == 0 and quite is true, we should not return 0
>>
>>>        return rc;
> We don't - we return rc, which is negative.

I think it can be zero, while quit is set to true in other coroutine.

>
>>>    }
>>> @@ -168,8 +178,7 @@ static void nbd_co_receive_reply(NBDClientSession *s,
>>>        /* Wait until we're woken up by nbd_read_reply_entry.  */
>>>        qemu_coroutine_yield();
>>>        *reply = s->reply;
>>> -    if (reply->handle != request->handle ||
>>> -        !s->ioc) {
>>> +    if (reply->handle != request->handle || !s->ioc || s->quit) {
>>>            reply->error = EIO;
>> here, if s->quit is false, we should set it to inform other coroutines
> We can't get into nbd_co_receive_reply() unless the two handles were
> once equal, and the only code that changes them to be not equal is when
> we are shutting down.  Checking s->quit is a safety valve if some other
> coroutine detects corruption first, but this coroutine does not need to
> set s->quit because it is either already set, or we are already shutting
> down.

ok, and, as s->quit is set when we are shutting down, we can drop 
comparing handles here.

>
>>>        } else {
>>>            if (qiov && reply->error == 0) {
>> and here follows a call to nbd_rwv(), where s->quit should be
>> appropriately handled..
> Reading from a corrupt server is not as bad as writing to the corrupt
> server; the patch for 2.10 is solely focused on preventing writes where
> we need a followup read (because once we know the server is corrupt, we
> can't guarantee the followup reads will come).
>
> Again, if you can prove we have a scenario that is still buggy (client
> can crash or hang), then it is -rc4 material; if not, then this is all
> the more that 2.10 needs, and your refactoring work for 2.11 should
> clean up a lot of this mess in the first place as you make the
> coroutines easier to follow.

ok.

>
Vladimir Sementsov-Ogievskiy Aug. 21, 2017, 10:11 a.m. UTC | #4
15.08.2017 18:09, Eric Blake wrote:
> When we switched NBD to use coroutines for qemu 2.9 (in particular,
> commit a12a712a), we introduced a regression: if a server sends us
> garbage (such as a corrupted magic number), we quit the read loop
> but do not stop sending further queued commands, resulting in the
> client hanging when it never reads the response to those additional
> commands.  In qemu 2.8, we properly detected that the server is no
> longer reliable, and cancelled all existing pending commands with
> EIO, then tore down the socket so that all further command attempts
> get EPIPE.
>
> Restore the proper behavior of quitting (almost) all communication
> with a broken server: Once we know we are out of sync or otherwise
> can't trust the server, we must assume that any further incoming
> data is unreliable and therefore end all pending commands with EIO,
> and quit trying to send any further commands.  As an exception, we
> still (try to) send NBD_CMD_DISC to let the server know we are going
> away (in part, because it is easier to do that than to further
> refactor nbd_teardown_connection, and in part because it is the
> only command where we do not have to wait for a reply).
>
> Based on a patch by Vladimir Sementsov-Ogievskiy.
>
> A malicious server can be created with the following hack,
> followed by setting NBD_SERVER_DEBUG to a non-zero value in the
> environment when running qemu-nbd:
>
> | --- a/nbd/server.c
> | +++ b/nbd/server.c
> | @@ -919,6 +919,17 @@ static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
> |      stl_be_p(buf + 4, reply->error);
> |      stq_be_p(buf + 8, reply->handle);
> |
> | +    static int debug;
> | +    static int count;
> | +    if (!count++) {
> | +        const char *str = getenv("NBD_SERVER_DEBUG");
> | +        if (str) {
> | +            debug = atoi(str);
> | +        }
> | +    }
> | +    if (debug && !(count % debug)) {
> | +        buf[0] = 0;
> | +    }
> |      return nbd_write(ioc, buf, sizeof(buf), errp);
> |  }
>
> Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> Signed-off-by: Eric Blake <eblake@redhat.com>
> Message-Id: <20170814213426.24681-1-eblake@redhat.com>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>   block/nbd-client.h |  1 +
>   block/nbd-client.c | 17 +++++++++++++----
>   2 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/block/nbd-client.h b/block/nbd-client.h
> index df80771357..1935ffbcaa 100644
> --- a/block/nbd-client.h
> +++ b/block/nbd-client.h
> @@ -29,6 +29,7 @@ typedef struct NBDClientSession {
>
>       Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
>       NBDReply reply;
> +    bool quit;
>   } NBDClientSession;
>
>   NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
> diff --git a/block/nbd-client.c b/block/nbd-client.c
> index 25dd28406b..422ecb4307 100644
> --- a/block/nbd-client.c
> +++ b/block/nbd-client.c
> @@ -73,7 +73,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
>       int ret;
>       Error *local_err = NULL;
>
> -    for (;;) {
> +    while (!s->quit) {
>           assert(s->reply.handle == 0);
>           ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
>           if (ret < 0) {
> @@ -107,6 +107,9 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
>           qemu_coroutine_yield();
>       }
>
> +    if (ret < 0) {
> +        s->quit = true;

but on wrong handle ret is 0:

         if (ret <= 0) {
break;
}

         /* There's no need for a mutex on the receive side, because the
          * handler acts as a synchronization point and ensures that only
          * one coroutine is called until the reply finishes.
*/
         i = HANDLE_TO_INDEX(s, s->reply.handle);
         if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
break;
         }


looks like we should set ret in this if-block

> +    }
>       nbd_recv_coroutines_enter_all(s);
>       s->read_reply_co = NULL;
>   }
> @@ -135,6 +138,10 @@ static int nbd_co_send_request(BlockDriverState *bs,
>       assert(i < MAX_NBD_REQUESTS);
>       request->handle = INDEX_TO_HANDLE(s, i);
>
> +    if (s->quit) {
> +        qemu_co_mutex_unlock(&s->send_mutex);
> +        return -EIO;
> +    }
>       if (!s->ioc) {
>           qemu_co_mutex_unlock(&s->send_mutex);
>           return -EPIPE;
> @@ -143,7 +150,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
>       if (qiov) {
>           qio_channel_set_cork(s->ioc, true);
>           rc = nbd_send_request(s->ioc, request);
> -        if (rc >= 0) {
> +        if (rc >= 0 && !s->quit) {
>               ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
>                             NULL);
>               if (ret != request->len) {
> @@ -154,6 +161,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
>       } else {
>           rc = nbd_send_request(s->ioc, request);
>       }
> +    if (rc < 0) {
> +        s->quit = true;
> +    }
>       qemu_co_mutex_unlock(&s->send_mutex);
>       return rc;
>   }
> @@ -168,8 +178,7 @@ static void nbd_co_receive_reply(NBDClientSession *s,
>       /* Wait until we're woken up by nbd_read_reply_entry.  */
>       qemu_coroutine_yield();
>       *reply = s->reply;
> -    if (reply->handle != request->handle ||
> -        !s->ioc) {
> +    if (reply->handle != request->handle || !s->ioc || s->quit) {
>           reply->error = EIO;
>       } else {
>           if (qiov && reply->error == 0) {
Vladimir Sementsov-Ogievskiy Aug. 21, 2017, 10:13 a.m. UTC | #5
21.08.2017 13:11, Vladimir Sementsov-Ogievskiy wrote:
> 15.08.2017 18:09, Eric Blake wrote:

>> When we switched NBD to use coroutines for qemu 2.9 (in particular,

>> commit a12a712a), we introduced a regression: if a server sends us

>> garbage (such as a corrupted magic number), we quit the read loop

>> but do not stop sending further queued commands, resulting in the

>> client hanging when it never reads the response to those additional

>> commands.  In qemu 2.8, we properly detected that the server is no

>> longer reliable, and cancelled all existing pending commands with

>> EIO, then tore down the socket so that all further command attempts

>> get EPIPE.

>>

>> Restore the proper behavior of quitting (almost) all communication

>> with a broken server: Once we know we are out of sync or otherwise

>> can't trust the server, we must assume that any further incoming

>> data is unreliable and therefore end all pending commands with EIO,

>> and quit trying to send any further commands.  As an exception, we

>> still (try to) send NBD_CMD_DISC to let the server know we are going

>> away (in part, because it is easier to do that than to further

>> refactor nbd_teardown_connection, and in part because it is the

>> only command where we do not have to wait for a reply).

>>

>> Based on a patch by Vladimir Sementsov-Ogievskiy.

>>

>> A malicious server can be created with the following hack,

>> followed by setting NBD_SERVER_DEBUG to a non-zero value in the

>> environment when running qemu-nbd:

>>

>> | --- a/nbd/server.c

>> | +++ b/nbd/server.c

>> | @@ -919,6 +919,17 @@ static int nbd_send_reply(QIOChannel *ioc, 

>> NBDReply *reply, Error **errp)

>> |      stl_be_p(buf + 4, reply->error);

>> |      stq_be_p(buf + 8, reply->handle);

>> |

>> | +    static int debug;

>> | +    static int count;

>> | +    if (!count++) {

>> | +        const char *str = getenv("NBD_SERVER_DEBUG");

>> | +        if (str) {

>> | +            debug = atoi(str);

>> | +        }

>> | +    }

>> | +    if (debug && !(count % debug)) {

>> | +        buf[0] = 0;

>> | +    }

>> |      return nbd_write(ioc, buf, sizeof(buf), errp);

>> |  }

>>

>> Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

>> Signed-off-by: Eric Blake <eblake@redhat.com>

>> Message-Id: <20170814213426.24681-1-eblake@redhat.com>

>> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

>> ---

>>   block/nbd-client.h |  1 +

>>   block/nbd-client.c | 17 +++++++++++++----

>>   2 files changed, 14 insertions(+), 4 deletions(-)

>>

>> diff --git a/block/nbd-client.h b/block/nbd-client.h

>> index df80771357..1935ffbcaa 100644

>> --- a/block/nbd-client.h

>> +++ b/block/nbd-client.h

>> @@ -29,6 +29,7 @@ typedef struct NBDClientSession {

>>

>>       Coroutine *recv_coroutine[MAX_NBD_REQUESTS];

>>       NBDReply reply;

>> +    bool quit;

>>   } NBDClientSession;

>>

>>   NBDClientSession *nbd_get_client_session(BlockDriverState *bs);

>> diff --git a/block/nbd-client.c b/block/nbd-client.c

>> index 25dd28406b..422ecb4307 100644

>> --- a/block/nbd-client.c

>> +++ b/block/nbd-client.c

>> @@ -73,7 +73,7 @@ static coroutine_fn void nbd_read_reply_entry(void 

>> *opaque)

>>       int ret;

>>       Error *local_err = NULL;

>>

>> -    for (;;) {

>> +    while (!s->quit) {

>>           assert(s->reply.handle == 0);

>>           ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);

>>           if (ret < 0) {

>> @@ -107,6 +107,9 @@ static coroutine_fn void 

>> nbd_read_reply_entry(void *opaque)

>>           qemu_coroutine_yield();

>>       }

>>

>> +    if (ret < 0) {

>> +        s->quit = true;

>

> but on wrong handle ret is 0:


s/0/positive/

>

>         if (ret <= 0) {

> break;

> }

>

>         /* There's no need for a mutex on the receive side, because the

>          * handler acts as a synchronization point and ensures that only

>          * one coroutine is called until the reply finishes.

> */

>         i = HANDLE_TO_INDEX(s, s->reply.handle);

>         if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {

> break;

>         }

>

>

> looks like we should set ret in this if-block

>

>> +    }

>>       nbd_recv_coroutines_enter_all(s);

>>       s->read_reply_co = NULL;

>>   }

>> @@ -135,6 +138,10 @@ static int nbd_co_send_request(BlockDriverState 

>> *bs,

>>       assert(i < MAX_NBD_REQUESTS);

>>       request->handle = INDEX_TO_HANDLE(s, i);

>>

>> +    if (s->quit) {

>> +        qemu_co_mutex_unlock(&s->send_mutex);

>> +        return -EIO;

>> +    }

>>       if (!s->ioc) {

>>           qemu_co_mutex_unlock(&s->send_mutex);

>>           return -EPIPE;

>> @@ -143,7 +150,7 @@ static int nbd_co_send_request(BlockDriverState *bs,

>>       if (qiov) {

>>           qio_channel_set_cork(s->ioc, true);

>>           rc = nbd_send_request(s->ioc, request);

>> -        if (rc >= 0) {

>> +        if (rc >= 0 && !s->quit) {

>>               ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, 

>> request->len, false,

>>                             NULL);

>>               if (ret != request->len) {

>> @@ -154,6 +161,9 @@ static int nbd_co_send_request(BlockDriverState *bs,

>>       } else {

>>           rc = nbd_send_request(s->ioc, request);

>>       }

>> +    if (rc < 0) {

>> +        s->quit = true;

>> +    }

>>       qemu_co_mutex_unlock(&s->send_mutex);

>>       return rc;

>>   }

>> @@ -168,8 +178,7 @@ static void nbd_co_receive_reply(NBDClientSession 

>> *s,

>>       /* Wait until we're woken up by nbd_read_reply_entry.  */

>>       qemu_coroutine_yield();

>>       *reply = s->reply;

>> -    if (reply->handle != request->handle ||

>> -        !s->ioc) {

>> +    if (reply->handle != request->handle || !s->ioc || s->quit) {

>>           reply->error = EIO;

>>       } else {

>>           if (qiov && reply->error == 0) {

>

>


-- 
Best regards,
Vladimir
Vladimir Sementsov-Ogievskiy Aug. 23, 2017, 3:09 p.m. UTC | #6
ping

21.08.2017 13:13, Vladimir Sementsov-Ogievskiy wrote:
> 21.08.2017 13:11, Vladimir Sementsov-Ogievskiy wrote:

>> 15.08.2017 18:09, Eric Blake wrote:

>>> When we switched NBD to use coroutines for qemu 2.9 (in particular,

>>> commit a12a712a), we introduced a regression: if a server sends us

>>> garbage (such as a corrupted magic number), we quit the read loop

>>> but do not stop sending further queued commands, resulting in the

>>> client hanging when it never reads the response to those additional

>>> commands.  In qemu 2.8, we properly detected that the server is no

>>> longer reliable, and cancelled all existing pending commands with

>>> EIO, then tore down the socket so that all further command attempts

>>> get EPIPE.

>>>

>>> Restore the proper behavior of quitting (almost) all communication

>>> with a broken server: Once we know we are out of sync or otherwise

>>> can't trust the server, we must assume that any further incoming

>>> data is unreliable and therefore end all pending commands with EIO,

>>> and quit trying to send any further commands.  As an exception, we

>>> still (try to) send NBD_CMD_DISC to let the server know we are going

>>> away (in part, because it is easier to do that than to further

>>> refactor nbd_teardown_connection, and in part because it is the

>>> only command where we do not have to wait for a reply).

>>>

>>> Based on a patch by Vladimir Sementsov-Ogievskiy.

>>>

>>> A malicious server can be created with the following hack,

>>> followed by setting NBD_SERVER_DEBUG to a non-zero value in the

>>> environment when running qemu-nbd:

>>>

>>> | --- a/nbd/server.c

>>> | +++ b/nbd/server.c

>>> | @@ -919,6 +919,17 @@ static int nbd_send_reply(QIOChannel *ioc, 

>>> NBDReply *reply, Error **errp)

>>> |      stl_be_p(buf + 4, reply->error);

>>> |      stq_be_p(buf + 8, reply->handle);

>>> |

>>> | +    static int debug;

>>> | +    static int count;

>>> | +    if (!count++) {

>>> | +        const char *str = getenv("NBD_SERVER_DEBUG");

>>> | +        if (str) {

>>> | +            debug = atoi(str);

>>> | +        }

>>> | +    }

>>> | +    if (debug && !(count % debug)) {

>>> | +        buf[0] = 0;

>>> | +    }

>>> |      return nbd_write(ioc, buf, sizeof(buf), errp);

>>> |  }

>>>

>>> Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

>>> Signed-off-by: Eric Blake <eblake@redhat.com>

>>> Message-Id: <20170814213426.24681-1-eblake@redhat.com>

>>> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

>>> ---

>>>   block/nbd-client.h |  1 +

>>>   block/nbd-client.c | 17 +++++++++++++----

>>>   2 files changed, 14 insertions(+), 4 deletions(-)

>>>

>>> diff --git a/block/nbd-client.h b/block/nbd-client.h

>>> index df80771357..1935ffbcaa 100644

>>> --- a/block/nbd-client.h

>>> +++ b/block/nbd-client.h

>>> @@ -29,6 +29,7 @@ typedef struct NBDClientSession {

>>>

>>>       Coroutine *recv_coroutine[MAX_NBD_REQUESTS];

>>>       NBDReply reply;

>>> +    bool quit;

>>>   } NBDClientSession;

>>>

>>>   NBDClientSession *nbd_get_client_session(BlockDriverState *bs);

>>> diff --git a/block/nbd-client.c b/block/nbd-client.c

>>> index 25dd28406b..422ecb4307 100644

>>> --- a/block/nbd-client.c

>>> +++ b/block/nbd-client.c

>>> @@ -73,7 +73,7 @@ static coroutine_fn void nbd_read_reply_entry(void 

>>> *opaque)

>>>       int ret;

>>>       Error *local_err = NULL;

>>>

>>> -    for (;;) {

>>> +    while (!s->quit) {

>>>           assert(s->reply.handle == 0);

>>>           ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);

>>>           if (ret < 0) {

>>> @@ -107,6 +107,9 @@ static coroutine_fn void 

>>> nbd_read_reply_entry(void *opaque)

>>>           qemu_coroutine_yield();

>>>       }

>>>

>>> +    if (ret < 0) {

>>> +        s->quit = true;

>>

>> but on wrong handle ret is 0:

>

> s/0/positive/

>

>>

>>         if (ret <= 0) {

>> break;

>> }

>>

>>         /* There's no need for a mutex on the receive side, because the

>>          * handler acts as a synchronization point and ensures that only

>>          * one coroutine is called until the reply finishes.

>> */

>>         i = HANDLE_TO_INDEX(s, s->reply.handle);

>>         if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {

>> break;

>>         }

>>

>>

>> looks like we should set ret in this if-block

>>

>>> +    }

>>>       nbd_recv_coroutines_enter_all(s);

>>>       s->read_reply_co = NULL;

>>>   }

>>> @@ -135,6 +138,10 @@ static int nbd_co_send_request(BlockDriverState 

>>> *bs,

>>>       assert(i < MAX_NBD_REQUESTS);

>>>       request->handle = INDEX_TO_HANDLE(s, i);

>>>

>>> +    if (s->quit) {

>>> +        qemu_co_mutex_unlock(&s->send_mutex);

>>> +        return -EIO;

>>> +    }

>>>       if (!s->ioc) {

>>>           qemu_co_mutex_unlock(&s->send_mutex);

>>>           return -EPIPE;

>>> @@ -143,7 +150,7 @@ static int nbd_co_send_request(BlockDriverState 

>>> *bs,

>>>       if (qiov) {

>>>           qio_channel_set_cork(s->ioc, true);

>>>           rc = nbd_send_request(s->ioc, request);

>>> -        if (rc >= 0) {

>>> +        if (rc >= 0 && !s->quit) {

>>>               ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, 

>>> request->len, false,

>>>                             NULL);

>>>               if (ret != request->len) {

>>> @@ -154,6 +161,9 @@ static int nbd_co_send_request(BlockDriverState 

>>> *bs,

>>>       } else {

>>>           rc = nbd_send_request(s->ioc, request);

>>>       }

>>> +    if (rc < 0) {

>>> +        s->quit = true;

>>> +    }

>>>       qemu_co_mutex_unlock(&s->send_mutex);

>>>       return rc;

>>>   }

>>> @@ -168,8 +178,7 @@ static void 

>>> nbd_co_receive_reply(NBDClientSession *s,

>>>       /* Wait until we're woken up by nbd_read_reply_entry. */

>>>       qemu_coroutine_yield();

>>>       *reply = s->reply;

>>> -    if (reply->handle != request->handle ||

>>> -        !s->ioc) {

>>> +    if (reply->handle != request->handle || !s->ioc || s->quit) {

>>>           reply->error = EIO;

>>>       } else {

>>>           if (qiov && reply->error == 0) {

>>

>>

>


-- 
Best regards,
Vladimir
Eric Blake Aug. 23, 2017, 3:17 p.m. UTC | #7
On 08/23/2017 10:09 AM, Vladimir Sementsov-Ogievskiy wrote:
> ping
> 

We're still trying to round up last-minute patches for a 2.10-rc4 pull
request.


>>>> @@ -107,6 +107,9 @@ static coroutine_fn void
>>>> nbd_read_reply_entry(void *opaque)
>>>>           qemu_coroutine_yield();
>>>>       }
>>>>
>>>> +    if (ret < 0) {
>>>> +        s->quit = true;
>>>
>>> but on wrong handle ret is 0:
>>
>> s/0/positive/
>>
>>>

Is your issue fixed by Stefan's patch?

https://lists.gnu.org/archive/html/qemu-devel/2017-08/msg04027.html

In fact, he listed a reproduction formula in his earlier attempt at it,
that demonstrates that we still had a hang possible with just what made
it into -rc3
https://lists.gnu.org/archive/html/qemu-devel/2017-08/msg03853.html
Vladimir Sementsov-Ogievskiy Aug. 23, 2017, 3:21 p.m. UTC | #8
23.08.2017 18:17, Eric Blake wrote:
> On 08/23/2017 10:09 AM, Vladimir Sementsov-Ogievskiy wrote:
>> ping
>>
> We're still trying to round up last-minute patches for a 2.10-rc4 pull
> request.


>
>
>>>>> @@ -107,6 +107,9 @@ static coroutine_fn void
>>>>> nbd_read_reply_entry(void *opaque)
>>>>>            qemu_coroutine_yield();
>>>>>        }
>>>>>
>>>>> +    if (ret < 0) {
>>>>> +        s->quit = true;
>>>> but on wrong handle ret is 0:
>>> s/0/positive/
>>>
> Is your issue fixed by Stefan's patch?
>
> https://lists.gnu.org/archive/html/qemu-devel/2017-08/msg04027.html


Yes it fixed,  I'm now composing reply on it.

>
> In fact, he listed a reproduction formula in his earlier attempt at it,
> that demonstrates that we still had a hang possible with just what made
> it into -rc3
> https://lists.gnu.org/archive/html/qemu-devel/2017-08/msg03853.html
>
diff mbox

Patch

diff --git a/block/nbd-client.h b/block/nbd-client.h
index df80771357..1935ffbcaa 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -29,6 +29,7 @@  typedef struct NBDClientSession {

     Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
     NBDReply reply;
+    bool quit;
 } NBDClientSession;

 NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 25dd28406b..422ecb4307 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -73,7 +73,7 @@  static coroutine_fn void nbd_read_reply_entry(void *opaque)
     int ret;
     Error *local_err = NULL;

-    for (;;) {
+    while (!s->quit) {
         assert(s->reply.handle == 0);
         ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
         if (ret < 0) {
@@ -107,6 +107,9 @@  static coroutine_fn void nbd_read_reply_entry(void *opaque)
         qemu_coroutine_yield();
     }

+    if (ret < 0) {
+        s->quit = true;
+    }
     nbd_recv_coroutines_enter_all(s);
     s->read_reply_co = NULL;
 }
@@ -135,6 +138,10 @@  static int nbd_co_send_request(BlockDriverState *bs,
     assert(i < MAX_NBD_REQUESTS);
     request->handle = INDEX_TO_HANDLE(s, i);

+    if (s->quit) {
+        qemu_co_mutex_unlock(&s->send_mutex);
+        return -EIO;
+    }
     if (!s->ioc) {
         qemu_co_mutex_unlock(&s->send_mutex);
         return -EPIPE;
@@ -143,7 +150,7 @@  static int nbd_co_send_request(BlockDriverState *bs,
     if (qiov) {
         qio_channel_set_cork(s->ioc, true);
         rc = nbd_send_request(s->ioc, request);
-        if (rc >= 0) {
+        if (rc >= 0 && !s->quit) {
             ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
                           NULL);
             if (ret != request->len) {
@@ -154,6 +161,9 @@  static int nbd_co_send_request(BlockDriverState *bs,
     } else {
         rc = nbd_send_request(s->ioc, request);
     }
+    if (rc < 0) {
+        s->quit = true;
+    }
     qemu_co_mutex_unlock(&s->send_mutex);
     return rc;
 }
@@ -168,8 +178,7 @@  static void nbd_co_receive_reply(NBDClientSession *s,
     /* Wait until we're woken up by nbd_read_reply_entry.  */
     qemu_coroutine_yield();
     *reply = s->reply;
-    if (reply->handle != request->handle ||
-        !s->ioc) {
+    if (reply->handle != request->handle || !s->ioc || s->quit) {
         reply->error = EIO;
     } else {
         if (qiov && reply->error == 0) {