diff mbox series

[v3] virtio-rng: return available data with O_NONBLOCK

Message ID 20200811142821.12323-1-mwilck@suse.com
State New
Headers show
Series [v3] virtio-rng: return available data with O_NONBLOCK | expand

Commit Message

Martin Wilck Aug. 11, 2020, 2:28 p.m. UTC
From: Martin Wilck <mwilck@suse.com>

If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and
non-blocking read() to retrieve random data, it ends up in a tight
loop with poll() always returning POLLIN and read() returning EAGAIN.
This repeats forever until some process makes a blocking read() call.
The reason is that virtio_read() always returns 0 in non-blocking mode,
even if data is available. Worse, it fetches random data from the
hypervisor after every non-blocking call, without ever using this data.

The following test program illustrates the behavior and can be used
for testing and experiments. The problem will only be seen if all
tasks use non-blocking access; otherwise the blocking reads will
"recharge" the random pool and cause other, non-blocking reads to
succeed at least sometimes.

/* Whether to use non-blocking mode in a task, problem occurs if CONDITION is 1 */
//#define CONDITION (getpid() % 2 != 0)

static volatile sig_atomic_t stop;
static void handler(int sig __attribute__((unused))) { stop = 1; }

static void loop(int fd, int sec)
{
	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
	unsigned long errors = 0, eagains = 0, bytes = 0, succ = 0;
	int size, rc, rd;

	srandom(getpid());
	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK) == -1)
		perror("fcntl");
	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ + 1);

	for(;;) {
		char buf[size];

		if (stop)
			break;
		rc = poll(&pfd, 1, sec);
		if (rc > 0) {
			rd = read(fd, buf, sizeof(buf));
			if (rd == -1 && errno == EAGAIN)
				eagains++;
			else if (rd == -1)
				errors++;
			else {
				succ++;
				bytes += rd;
				write(1, buf, sizeof(buf));
			}
		} else if (rc == -1) {
			if (errno != EINTR)
				perror("poll");
			break;
		} else
			fprintf(stderr, "poll: timeout\n");
	}
	fprintf(stderr,
		"pid %d %sblocking, bufsize %d, %d seconds, %lu bytes read, %lu success, %lu eagain, %lu errors\n",
		getpid(), CONDITION ? "non-" : "", size, sec, bytes, succ, eagains, errors);
}

int main(void)
{
	int fd;

	fork(); fork();
	fd = open("/dev/hwrng", O_RDONLY);
	if (fd == -1) {
		perror("open");
		return 1;
	};
	signal(SIGALRM, handler);
	alarm(SECONDS);
	loop(fd, SECONDS);
	close(fd);
	wait(NULL);
	return 0;
}

void loop(int fd)
{
        struct pollfd pfd0 = { .fd = fd, .events  = POLLIN, };
        int rc;
        unsigned int n;

        for (n = LOOPS; n > 0; n--) {
                struct pollfd pfd = pfd0;
                char buf[SIZE];

                rc = poll(&pfd, 1, 1);
                if (rc > 0) {
                        int rd = read(fd, buf, sizeof(buf));

                        if (rd == -1)
                                perror("read");
                        else
                                printf("read %d bytes\n", rd);
                } else if (rc == -1)
                        perror("poll");
                else
                        fprintf(stderr, "timeout\n");

        }
}

int main(void)
{
        int fd;

        fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
        if (fd == -1) {
                perror("open");
                return 1;
        };
        loop(fd);
        close(fd);
        return 0;
}

This can be observed in the real word e.g. with nested qemu/KVM virtual
machines, if both the "outer" and "inner" VMs have a virtio-rng device.
If the "inner" VM requests random data, qemu running in the "outer" VM
uses this device in a non-blocking manner like the test program above.

Fix it by returning available data if a previous hypervisor call has
completed. I tested this patch with the program above, and with rng-tools.

v2 -> v3: Simplified the implementation as suggested by Laurent Vivier

Signed-off-by: Martin Wilck <mwilck@suse.com>
---
 drivers/char/hw_random/virtio-rng.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Comments

Laurent Vivier Aug. 11, 2020, 2:42 p.m. UTC | #1
On 11/08/2020 16:28, mwilck@suse.com wrote:
> From: Martin Wilck <mwilck@suse.com>
> 
> If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and
> non-blocking read() to retrieve random data, it ends up in a tight
> loop with poll() always returning POLLIN and read() returning EAGAIN.
> This repeats forever until some process makes a blocking read() call.
> The reason is that virtio_read() always returns 0 in non-blocking mode,
> even if data is available. Worse, it fetches random data from the
> hypervisor after every non-blocking call, without ever using this data.
> 
> The following test program illustrates the behavior and can be used
> for testing and experiments. The problem will only be seen if all
> tasks use non-blocking access; otherwise the blocking reads will
> "recharge" the random pool and cause other, non-blocking reads to
> succeed at least sometimes.
> 
> /* Whether to use non-blocking mode in a task, problem occurs if CONDITION is 1 */
> //#define CONDITION (getpid() % 2 != 0)
> 
> static volatile sig_atomic_t stop;
> static void handler(int sig __attribute__((unused))) { stop = 1; }
> 
> static void loop(int fd, int sec)
> {
> 	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
> 	unsigned long errors = 0, eagains = 0, bytes = 0, succ = 0;
> 	int size, rc, rd;
> 
> 	srandom(getpid());
> 	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK) == -1)
> 		perror("fcntl");
> 	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ + 1);
> 
> 	for(;;) {
> 		char buf[size];
> 
> 		if (stop)
> 			break;
> 		rc = poll(&pfd, 1, sec);
> 		if (rc > 0) {
> 			rd = read(fd, buf, sizeof(buf));
> 			if (rd == -1 && errno == EAGAIN)
> 				eagains++;
> 			else if (rd == -1)
> 				errors++;
> 			else {
> 				succ++;
> 				bytes += rd;
> 				write(1, buf, sizeof(buf));
> 			}
> 		} else if (rc == -1) {
> 			if (errno != EINTR)
> 				perror("poll");
> 			break;
> 		} else
> 			fprintf(stderr, "poll: timeout\n");
> 	}
> 	fprintf(stderr,
> 		"pid %d %sblocking, bufsize %d, %d seconds, %lu bytes read, %lu success, %lu eagain, %lu errors\n",
> 		getpid(), CONDITION ? "non-" : "", size, sec, bytes, succ, eagains, errors);
> }
> 
> int main(void)
> {
> 	int fd;
> 
> 	fork(); fork();
> 	fd = open("/dev/hwrng", O_RDONLY);
> 	if (fd == -1) {
> 		perror("open");
> 		return 1;
> 	};
> 	signal(SIGALRM, handler);
> 	alarm(SECONDS);
> 	loop(fd, SECONDS);
> 	close(fd);
> 	wait(NULL);
> 	return 0;
> }
> 
> void loop(int fd)
> {
>         struct pollfd pfd0 = { .fd = fd, .events  = POLLIN, };
>         int rc;
>         unsigned int n;
> 
>         for (n = LOOPS; n > 0; n--) {
>                 struct pollfd pfd = pfd0;
>                 char buf[SIZE];
> 
>                 rc = poll(&pfd, 1, 1);
>                 if (rc > 0) {
>                         int rd = read(fd, buf, sizeof(buf));
> 
>                         if (rd == -1)
>                                 perror("read");
>                         else
>                                 printf("read %d bytes\n", rd);
>                 } else if (rc == -1)
>                         perror("poll");
>                 else
>                         fprintf(stderr, "timeout\n");
> 
>         }
> }
> 
> int main(void)
> {
>         int fd;
> 
>         fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
>         if (fd == -1) {
>                 perror("open");
>                 return 1;
>         };
>         loop(fd);
>         close(fd);
>         return 0;
> }
> 
> This can be observed in the real word e.g. with nested qemu/KVM virtual
> machines, if both the "outer" and "inner" VMs have a virtio-rng device.
> If the "inner" VM requests random data, qemu running in the "outer" VM
> uses this device in a non-blocking manner like the test program above.
> 
> Fix it by returning available data if a previous hypervisor call has
> completed. I tested this patch with the program above, and with rng-tools.
> 
> v2 -> v3: Simplified the implementation as suggested by Laurent Vivier
> 
> Signed-off-by: Martin Wilck <mwilck@suse.com>
> ---
>  drivers/char/hw_random/virtio-rng.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
> index a90001e02bf7..8eaeceecb41e 100644
> --- a/drivers/char/hw_random/virtio-rng.c
> +++ b/drivers/char/hw_random/virtio-rng.c
> @@ -65,7 +65,7 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
>  		register_buffer(vi, buf, size);
>  	}
>  
> -	if (!wait)
> +	if (!wait && !completion_done(&vi->have_data))
>  		return 0;
>  
>  	ret = wait_for_completion_killable(&vi->have_data);
> @@ -74,7 +74,7 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
>  
>  	vi->busy = false;
>  
> -	return vi->data_avail;
> +	return min_t(size_t, size, vi->data_avail);
>  }
>  
>  static void virtio_cleanup(struct hwrng *rng)
> 

Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Michael S. Tsirkin Aug. 26, 2020, 12:26 p.m. UTC | #2
On Tue, Aug 11, 2020 at 04:42:32PM +0200, Laurent Vivier wrote:
> On 11/08/2020 16:28, mwilck@suse.com wrote:
> > From: Martin Wilck <mwilck@suse.com>
> > 
> > If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and
> > non-blocking read() to retrieve random data, it ends up in a tight
> > loop with poll() always returning POLLIN and read() returning EAGAIN.
> > This repeats forever until some process makes a blocking read() call.
> > The reason is that virtio_read() always returns 0 in non-blocking mode,
> > even if data is available. Worse, it fetches random data from the
> > hypervisor after every non-blocking call, without ever using this data.
> > 
> > The following test program illustrates the behavior and can be used
> > for testing and experiments. The problem will only be seen if all
> > tasks use non-blocking access; otherwise the blocking reads will
> > "recharge" the random pool and cause other, non-blocking reads to
> > succeed at least sometimes.
> > 
> > /* Whether to use non-blocking mode in a task, problem occurs if CONDITION is 1 */
> > //#define CONDITION (getpid() % 2 != 0)
> > 
> > static volatile sig_atomic_t stop;
> > static void handler(int sig __attribute__((unused))) { stop = 1; }
> > 
> > static void loop(int fd, int sec)
> > {
> > 	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
> > 	unsigned long errors = 0, eagains = 0, bytes = 0, succ = 0;
> > 	int size, rc, rd;
> > 
> > 	srandom(getpid());
> > 	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK) == -1)
> > 		perror("fcntl");
> > 	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ + 1);
> > 
> > 	for(;;) {
> > 		char buf[size];
> > 
> > 		if (stop)
> > 			break;
> > 		rc = poll(&pfd, 1, sec);
> > 		if (rc > 0) {
> > 			rd = read(fd, buf, sizeof(buf));
> > 			if (rd == -1 && errno == EAGAIN)
> > 				eagains++;
> > 			else if (rd == -1)
> > 				errors++;
> > 			else {
> > 				succ++;
> > 				bytes += rd;
> > 				write(1, buf, sizeof(buf));
> > 			}
> > 		} else if (rc == -1) {
> > 			if (errno != EINTR)
> > 				perror("poll");
> > 			break;
> > 		} else
> > 			fprintf(stderr, "poll: timeout\n");
> > 	}
> > 	fprintf(stderr,
> > 		"pid %d %sblocking, bufsize %d, %d seconds, %lu bytes read, %lu success, %lu eagain, %lu errors\n",
> > 		getpid(), CONDITION ? "non-" : "", size, sec, bytes, succ, eagains, errors);
> > }
> > 
> > int main(void)
> > {
> > 	int fd;
> > 
> > 	fork(); fork();
> > 	fd = open("/dev/hwrng", O_RDONLY);
> > 	if (fd == -1) {
> > 		perror("open");
> > 		return 1;
> > 	};
> > 	signal(SIGALRM, handler);
> > 	alarm(SECONDS);
> > 	loop(fd, SECONDS);
> > 	close(fd);
> > 	wait(NULL);
> > 	return 0;
> > }
> > 
> > void loop(int fd)
> > {
> >         struct pollfd pfd0 = { .fd = fd, .events  = POLLIN, };
> >         int rc;
> >         unsigned int n;
> > 
> >         for (n = LOOPS; n > 0; n--) {
> >                 struct pollfd pfd = pfd0;
> >                 char buf[SIZE];
> > 
> >                 rc = poll(&pfd, 1, 1);
> >                 if (rc > 0) {
> >                         int rd = read(fd, buf, sizeof(buf));
> > 
> >                         if (rd == -1)
> >                                 perror("read");
> >                         else
> >                                 printf("read %d bytes\n", rd);
> >                 } else if (rc == -1)
> >                         perror("poll");
> >                 else
> >                         fprintf(stderr, "timeout\n");
> > 
> >         }
> > }
> > 
> > int main(void)
> > {
> >         int fd;
> > 
> >         fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
> >         if (fd == -1) {
> >                 perror("open");
> >                 return 1;
> >         };
> >         loop(fd);
> >         close(fd);
> >         return 0;
> > }
> > 
> > This can be observed in the real word e.g. with nested qemu/KVM virtual
> > machines, if both the "outer" and "inner" VMs have a virtio-rng device.
> > If the "inner" VM requests random data, qemu running in the "outer" VM
> > uses this device in a non-blocking manner like the test program above.
> > 
> > Fix it by returning available data if a previous hypervisor call has
> > completed. I tested this patch with the program above, and with rng-tools.
> > 
> > v2 -> v3: Simplified the implementation as suggested by Laurent Vivier
> > 
> > Signed-off-by: Martin Wilck <mwilck@suse.com>
> > ---
> >  drivers/char/hw_random/virtio-rng.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
> > index a90001e02bf7..8eaeceecb41e 100644
> > --- a/drivers/char/hw_random/virtio-rng.c
> > +++ b/drivers/char/hw_random/virtio-rng.c
> > @@ -65,7 +65,7 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
> >  		register_buffer(vi, buf, size);
> >  	}
> >  
> > -	if (!wait)
> > +	if (!wait && !completion_done(&vi->have_data))
> >  		return 0;
> >  
> >  	ret = wait_for_completion_killable(&vi->have_data);
> > @@ -74,7 +74,7 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
> >  
> >  	vi->busy = false;
> >  
> > -	return vi->data_avail;
> > +	return min_t(size_t, size, vi->data_avail);
> >  }
> >  
> >  static void virtio_cleanup(struct hwrng *rng)
> > 
> 
> Reviewed-by: Laurent Vivier <lvivier@redhat.com>

Laurent didn't we agree the real fix is private buffers in the driver,
and copying out from there?
Martin Wilck Aug. 28, 2020, 9:34 p.m. UTC | #3
On Wed, 2020-08-26 at 08:26 -0400, Michael S. Tsirkin wrote:
> On Tue, Aug 11, 2020 at 04:42:32PM +0200, Laurent Vivier wrote:
> > On 11/08/2020 16:28, mwilck@suse.com wrote:
> > > From: Martin Wilck <mwilck@suse.com>
> > > 
> > > If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and
> > > non-blocking read() to retrieve random data, it ends up in a
> > > tight
> > > loop with poll() always returning POLLIN and read() returning
> > > EAGAIN.
> > > This repeats forever until some process makes a blocking read()
> > > call.
> > > The reason is that virtio_read() always returns 0 in non-blocking 
> > > mode,
> > > even if data is available. Worse, it fetches random data from the
> > > hypervisor after every non-blocking call, without ever using this
> > > data.
> > > 
> > > The following test program illustrates the behavior and can be
> > > used
> > > for testing and experiments. The problem will only be seen if all
> > > tasks use non-blocking access; otherwise the blocking reads will
> > > "recharge" the random pool and cause other, non-blocking reads to
> > > succeed at least sometimes.
> > > 
> > > /* Whether to use non-blocking mode in a task, problem occurs if
> > > CONDITION is 1 */
> > > //#define CONDITION (getpid() % 2 != 0)
> > > 
> > > static volatile sig_atomic_t stop;
> > > static void handler(int sig __attribute__((unused))) { stop = 1;
> > > }
> > > 
> > > static void loop(int fd, int sec)
> > > {
> > > 	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
> > > 	unsigned long errors = 0, eagains = 0, bytes = 0, succ = 0;
> > > 	int size, rc, rd;
> > > 
> > > 	srandom(getpid());
> > > 	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) |
> > > O_NONBLOCK) == -1)
> > > 		perror("fcntl");
> > > 	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ + 1);
> > > 
> > > 	for(;;) {
> > > 		char buf[size];
> > > 
> > > 		if (stop)
> > > 			break;
> > > 		rc = poll(&pfd, 1, sec);
> > > 		if (rc > 0) {
> > > 			rd = read(fd, buf, sizeof(buf));
> > > 			if (rd == -1 && errno == EAGAIN)
> > > 				eagains++;
> > > 			else if (rd == -1)
> > > 				errors++;
> > > 			else {
> > > 				succ++;
> > > 				bytes += rd;
> > > 				write(1, buf, sizeof(buf));
> > > 			}
> > > 		} else if (rc == -1) {
> > > 			if (errno != EINTR)
> > > 				perror("poll");
> > > 			break;
> > > 		} else
> > > 			fprintf(stderr, "poll: timeout\n");
> > > 	}
> > > 	fprintf(stderr,
> > > 		"pid %d %sblocking, bufsize %d, %d seconds, %lu bytes
> > > read, %lu success, %lu eagain, %lu errors\n",
> > > 		getpid(), CONDITION ? "non-" : "", size, sec, bytes,
> > > succ, eagains, errors);
> > > }
> > > 
> > > int main(void)
> > > {
> > > 	int fd;
> > > 
> > > 	fork(); fork();
> > > 	fd = open("/dev/hwrng", O_RDONLY);
> > > 	if (fd == -1) {
> > > 		perror("open");
> > > 		return 1;
> > > 	};
> > > 	signal(SIGALRM, handler);
> > > 	alarm(SECONDS);
> > > 	loop(fd, SECONDS);
> > > 	close(fd);
> > > 	wait(NULL);
> > > 	return 0;
> > > }
> > > 
> > > void loop(int fd)
> > > {
> > >         struct pollfd pfd0 = { .fd = fd, .events  = POLLIN, };
> > >         int rc;
> > >         unsigned int n;
> > > 
> > >         for (n = LOOPS; n > 0; n--) {
> > >                 struct pollfd pfd = pfd0;
> > >                 char buf[SIZE];
> > > 
> > >                 rc = poll(&pfd, 1, 1);
> > >                 if (rc > 0) {
> > >                         int rd = read(fd, buf, sizeof(buf));
> > > 
> > >                         if (rd == -1)
> > >                                 perror("read");
> > >                         else
> > >                                 printf("read %d bytes\n", rd);
> > >                 } else if (rc == -1)
> > >                         perror("poll");
> > >                 else
> > >                         fprintf(stderr, "timeout\n");
> > > 
> > >         }
> > > }
> > > 
> > > int main(void)
> > > {
> > >         int fd;
> > > 
> > >         fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
> > >         if (fd == -1) {
> > >                 perror("open");
> > >                 return 1;
> > >         };
> > >         loop(fd);
> > >         close(fd);
> > >         return 0;
> > > }
> > > 
> > > This can be observed in the real word e.g. with nested qemu/KVM
> > > virtual
> > > machines, if both the "outer" and "inner" VMs have a virtio-rng
> > > device.
> > > If the "inner" VM requests random data, qemu running in the
> > > "outer" VM
> > > uses this device in a non-blocking manner like the test program
> > > above.
> > > 
> > > Fix it by returning available data if a previous hypervisor call
> > > has
> > > completed. I tested this patch with the program above, and with
> > > rng-tools.
> > > 
> > > v2 -> v3: Simplified the implementation as suggested by Laurent
> > > Vivier
> > > 
> > > Signed-off-by: Martin Wilck <mwilck@suse.com>
> > > ---
> > >  drivers/char/hw_random/virtio-rng.c | 4 ++--
> > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/char/hw_random/virtio-rng.c
> > > b/drivers/char/hw_random/virtio-rng.c
> > > index a90001e02bf7..8eaeceecb41e 100644
> > > --- a/drivers/char/hw_random/virtio-rng.c
> > > +++ b/drivers/char/hw_random/virtio-rng.c
> > > @@ -65,7 +65,7 @@ static int virtio_read(struct hwrng *rng, void
> > > *buf, size_t size, bool wait)
> > >  		register_buffer(vi, buf, size);
> > >  	}
> > >  
> > > -	if (!wait)
> > > +	if (!wait && !completion_done(&vi->have_data))
> > >  		return 0;
> > >  
> > >  	ret = wait_for_completion_killable(&vi->have_data);
> > > @@ -74,7 +74,7 @@ static int virtio_read(struct hwrng *rng, void
> > > *buf, size_t size, bool wait)
> > >  
> > >  	vi->busy = false;
> > >  
> > > -	return vi->data_avail;
> > > +	return min_t(size_t, size, vi->data_avail);
> > >  }
> > >  
> > >  static void virtio_cleanup(struct hwrng *rng)
> > > 
> > 
> > Reviewed-by: Laurent Vivier <lvivier@redhat.com>
> 
> Laurent didn't we agree the real fix is private buffers in the
> driver,
> and copying out from there?
> 

Can we perhaps proceed with this for now? AFAICS the private buffer
implementation would be a larger effort, while we have the issues with
nested VMs getting no entropy today.

Thanks
Martin
Laurent Vivier Aug. 31, 2020, 12:37 p.m. UTC | #4
On 28/08/2020 23:34, Martin Wilck wrote:
> On Wed, 2020-08-26 at 08:26 -0400, Michael S. Tsirkin wrote:
>> On Tue, Aug 11, 2020 at 04:42:32PM +0200, Laurent Vivier wrote:
>>> On 11/08/2020 16:28, mwilck@suse.com wrote:
>>>> From: Martin Wilck <mwilck@suse.com>
>>>>
>>>> If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and
>>>> non-blocking read() to retrieve random data, it ends up in a
>>>> tight
>>>> loop with poll() always returning POLLIN and read() returning
>>>> EAGAIN.
>>>> This repeats forever until some process makes a blocking read()
>>>> call.
>>>> The reason is that virtio_read() always returns 0 in non-blocking 
>>>> mode,
>>>> even if data is available. Worse, it fetches random data from the
>>>> hypervisor after every non-blocking call, without ever using this
>>>> data.
>>>>
>>>> The following test program illustrates the behavior and can be
>>>> used
>>>> for testing and experiments. The problem will only be seen if all
>>>> tasks use non-blocking access; otherwise the blocking reads will
>>>> "recharge" the random pool and cause other, non-blocking reads to
>>>> succeed at least sometimes.
>>>>
>>>> /* Whether to use non-blocking mode in a task, problem occurs if
>>>> CONDITION is 1 */
>>>> //#define CONDITION (getpid() % 2 != 0)
>>>>
>>>> static volatile sig_atomic_t stop;
>>>> static void handler(int sig __attribute__((unused))) { stop = 1;
>>>> }
>>>>
>>>> static void loop(int fd, int sec)
>>>> {
>>>> 	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
>>>> 	unsigned long errors = 0, eagains = 0, bytes = 0, succ = 0;
>>>> 	int size, rc, rd;
>>>>
>>>> 	srandom(getpid());
>>>> 	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) |
>>>> O_NONBLOCK) == -1)
>>>> 		perror("fcntl");
>>>> 	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ + 1);
>>>>
>>>> 	for(;;) {
>>>> 		char buf[size];
>>>>
>>>> 		if (stop)
>>>> 			break;
>>>> 		rc = poll(&pfd, 1, sec);
>>>> 		if (rc > 0) {
>>>> 			rd = read(fd, buf, sizeof(buf));
>>>> 			if (rd == -1 && errno == EAGAIN)
>>>> 				eagains++;
>>>> 			else if (rd == -1)
>>>> 				errors++;
>>>> 			else {
>>>> 				succ++;
>>>> 				bytes += rd;
>>>> 				write(1, buf, sizeof(buf));
>>>> 			}
>>>> 		} else if (rc == -1) {
>>>> 			if (errno != EINTR)
>>>> 				perror("poll");
>>>> 			break;
>>>> 		} else
>>>> 			fprintf(stderr, "poll: timeout\n");
>>>> 	}
>>>> 	fprintf(stderr,
>>>> 		"pid %d %sblocking, bufsize %d, %d seconds, %lu bytes
>>>> read, %lu success, %lu eagain, %lu errors\n",
>>>> 		getpid(), CONDITION ? "non-" : "", size, sec, bytes,
>>>> succ, eagains, errors);
>>>> }
>>>>
>>>> int main(void)
>>>> {
>>>> 	int fd;
>>>>
>>>> 	fork(); fork();
>>>> 	fd = open("/dev/hwrng", O_RDONLY);
>>>> 	if (fd == -1) {
>>>> 		perror("open");
>>>> 		return 1;
>>>> 	};
>>>> 	signal(SIGALRM, handler);
>>>> 	alarm(SECONDS);
>>>> 	loop(fd, SECONDS);
>>>> 	close(fd);
>>>> 	wait(NULL);
>>>> 	return 0;
>>>> }
>>>>
>>>> void loop(int fd)
>>>> {
>>>>         struct pollfd pfd0 = { .fd = fd, .events  = POLLIN, };
>>>>         int rc;
>>>>         unsigned int n;
>>>>
>>>>         for (n = LOOPS; n > 0; n--) {
>>>>                 struct pollfd pfd = pfd0;
>>>>                 char buf[SIZE];
>>>>
>>>>                 rc = poll(&pfd, 1, 1);
>>>>                 if (rc > 0) {
>>>>                         int rd = read(fd, buf, sizeof(buf));
>>>>
>>>>                         if (rd == -1)
>>>>                                 perror("read");
>>>>                         else
>>>>                                 printf("read %d bytes\n", rd);
>>>>                 } else if (rc == -1)
>>>>                         perror("poll");
>>>>                 else
>>>>                         fprintf(stderr, "timeout\n");
>>>>
>>>>         }
>>>> }
>>>>
>>>> int main(void)
>>>> {
>>>>         int fd;
>>>>
>>>>         fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
>>>>         if (fd == -1) {
>>>>                 perror("open");
>>>>                 return 1;
>>>>         };
>>>>         loop(fd);
>>>>         close(fd);
>>>>         return 0;
>>>> }
>>>>
>>>> This can be observed in the real word e.g. with nested qemu/KVM
>>>> virtual
>>>> machines, if both the "outer" and "inner" VMs have a virtio-rng
>>>> device.
>>>> If the "inner" VM requests random data, qemu running in the
>>>> "outer" VM
>>>> uses this device in a non-blocking manner like the test program
>>>> above.
>>>>
>>>> Fix it by returning available data if a previous hypervisor call
>>>> has
>>>> completed. I tested this patch with the program above, and with
>>>> rng-tools.
>>>>
>>>> v2 -> v3: Simplified the implementation as suggested by Laurent
>>>> Vivier
>>>>
>>>> Signed-off-by: Martin Wilck <mwilck@suse.com>
>>>> ---
>>>>  drivers/char/hw_random/virtio-rng.c | 4 ++--
>>>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/char/hw_random/virtio-rng.c
>>>> b/drivers/char/hw_random/virtio-rng.c
>>>> index a90001e02bf7..8eaeceecb41e 100644
>>>> --- a/drivers/char/hw_random/virtio-rng.c
>>>> +++ b/drivers/char/hw_random/virtio-rng.c
>>>> @@ -65,7 +65,7 @@ static int virtio_read(struct hwrng *rng, void
>>>> *buf, size_t size, bool wait)
>>>>  		register_buffer(vi, buf, size);
>>>>  	}
>>>>  
>>>> -	if (!wait)
>>>> +	if (!wait && !completion_done(&vi->have_data))
>>>>  		return 0;
>>>>  
>>>>  	ret = wait_for_completion_killable(&vi->have_data);
>>>> @@ -74,7 +74,7 @@ static int virtio_read(struct hwrng *rng, void
>>>> *buf, size_t size, bool wait)
>>>>  
>>>>  	vi->busy = false;
>>>>  
>>>> -	return vi->data_avail;
>>>> +	return min_t(size_t, size, vi->data_avail);
>>>>  }
>>>>  
>>>>  static void virtio_cleanup(struct hwrng *rng)
>>>>
>>>
>>> Reviewed-by: Laurent Vivier <lvivier@redhat.com>
>>
>> Laurent didn't we agree the real fix is private buffers in the
>> driver,
>> and copying out from there?
>>
> 
> Can we perhaps proceed with this for now? AFAICS the private buffer
> implementation would be a larger effort, while we have the issues with
> nested VMs getting no entropy today.
> 

I agree. I think it's important to have a simple and quick fix for the
problem reported by Martin.

We need the private buffers but not sure how long it will take to have
them included in the kernel and how many new bugs will be introduced
doing that as the code is hard to understand and the core is shared with
several other hardware backends that can be impacted by the changes needed.

Thanks,
Laurent
Michael S. Tsirkin Sept. 8, 2020, 2:14 p.m. UTC | #5
On Mon, Aug 31, 2020 at 02:37:26PM +0200, Laurent Vivier wrote:
> On 28/08/2020 23:34, Martin Wilck wrote:
> > On Wed, 2020-08-26 at 08:26 -0400, Michael S. Tsirkin wrote:
> >> On Tue, Aug 11, 2020 at 04:42:32PM +0200, Laurent Vivier wrote:
> >>> On 11/08/2020 16:28, mwilck@suse.com wrote:
> >>>> From: Martin Wilck <mwilck@suse.com>
> >>>>
> >>>> If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and
> >>>> non-blocking read() to retrieve random data, it ends up in a
> >>>> tight
> >>>> loop with poll() always returning POLLIN and read() returning
> >>>> EAGAIN.
> >>>> This repeats forever until some process makes a blocking read()
> >>>> call.
> >>>> The reason is that virtio_read() always returns 0 in non-blocking 
> >>>> mode,
> >>>> even if data is available. Worse, it fetches random data from the
> >>>> hypervisor after every non-blocking call, without ever using this
> >>>> data.
> >>>>
> >>>> The following test program illustrates the behavior and can be
> >>>> used
> >>>> for testing and experiments. The problem will only be seen if all
> >>>> tasks use non-blocking access; otherwise the blocking reads will
> >>>> "recharge" the random pool and cause other, non-blocking reads to
> >>>> succeed at least sometimes.
> >>>>
> >>>> /* Whether to use non-blocking mode in a task, problem occurs if
> >>>> CONDITION is 1 */
> >>>> //#define CONDITION (getpid() % 2 != 0)
> >>>>
> >>>> static volatile sig_atomic_t stop;
> >>>> static void handler(int sig __attribute__((unused))) { stop = 1;
> >>>> }
> >>>>
> >>>> static void loop(int fd, int sec)
> >>>> {
> >>>> 	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
> >>>> 	unsigned long errors = 0, eagains = 0, bytes = 0, succ = 0;
> >>>> 	int size, rc, rd;
> >>>>
> >>>> 	srandom(getpid());
> >>>> 	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) |
> >>>> O_NONBLOCK) == -1)
> >>>> 		perror("fcntl");
> >>>> 	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ + 1);
> >>>>
> >>>> 	for(;;) {
> >>>> 		char buf[size];
> >>>>
> >>>> 		if (stop)
> >>>> 			break;
> >>>> 		rc = poll(&pfd, 1, sec);
> >>>> 		if (rc > 0) {
> >>>> 			rd = read(fd, buf, sizeof(buf));
> >>>> 			if (rd == -1 && errno == EAGAIN)
> >>>> 				eagains++;
> >>>> 			else if (rd == -1)
> >>>> 				errors++;
> >>>> 			else {
> >>>> 				succ++;
> >>>> 				bytes += rd;
> >>>> 				write(1, buf, sizeof(buf));
> >>>> 			}
> >>>> 		} else if (rc == -1) {
> >>>> 			if (errno != EINTR)
> >>>> 				perror("poll");
> >>>> 			break;
> >>>> 		} else
> >>>> 			fprintf(stderr, "poll: timeout\n");
> >>>> 	}
> >>>> 	fprintf(stderr,
> >>>> 		"pid %d %sblocking, bufsize %d, %d seconds, %lu bytes
> >>>> read, %lu success, %lu eagain, %lu errors\n",
> >>>> 		getpid(), CONDITION ? "non-" : "", size, sec, bytes,
> >>>> succ, eagains, errors);
> >>>> }
> >>>>
> >>>> int main(void)
> >>>> {
> >>>> 	int fd;
> >>>>
> >>>> 	fork(); fork();
> >>>> 	fd = open("/dev/hwrng", O_RDONLY);
> >>>> 	if (fd == -1) {
> >>>> 		perror("open");
> >>>> 		return 1;
> >>>> 	};
> >>>> 	signal(SIGALRM, handler);
> >>>> 	alarm(SECONDS);
> >>>> 	loop(fd, SECONDS);
> >>>> 	close(fd);
> >>>> 	wait(NULL);
> >>>> 	return 0;
> >>>> }
> >>>>
> >>>> void loop(int fd)
> >>>> {
> >>>>         struct pollfd pfd0 = { .fd = fd, .events  = POLLIN, };
> >>>>         int rc;
> >>>>         unsigned int n;
> >>>>
> >>>>         for (n = LOOPS; n > 0; n--) {
> >>>>                 struct pollfd pfd = pfd0;
> >>>>                 char buf[SIZE];
> >>>>
> >>>>                 rc = poll(&pfd, 1, 1);
> >>>>                 if (rc > 0) {
> >>>>                         int rd = read(fd, buf, sizeof(buf));
> >>>>
> >>>>                         if (rd == -1)
> >>>>                                 perror("read");
> >>>>                         else
> >>>>                                 printf("read %d bytes\n", rd);
> >>>>                 } else if (rc == -1)
> >>>>                         perror("poll");
> >>>>                 else
> >>>>                         fprintf(stderr, "timeout\n");
> >>>>
> >>>>         }
> >>>> }
> >>>>
> >>>> int main(void)
> >>>> {
> >>>>         int fd;
> >>>>
> >>>>         fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
> >>>>         if (fd == -1) {
> >>>>                 perror("open");
> >>>>                 return 1;
> >>>>         };
> >>>>         loop(fd);
> >>>>         close(fd);
> >>>>         return 0;
> >>>> }
> >>>>
> >>>> This can be observed in the real word e.g. with nested qemu/KVM
> >>>> virtual
> >>>> machines, if both the "outer" and "inner" VMs have a virtio-rng
> >>>> device.
> >>>> If the "inner" VM requests random data, qemu running in the
> >>>> "outer" VM
> >>>> uses this device in a non-blocking manner like the test program
> >>>> above.
> >>>>
> >>>> Fix it by returning available data if a previous hypervisor call
> >>>> has
> >>>> completed. I tested this patch with the program above, and with
> >>>> rng-tools.
> >>>>
> >>>> v2 -> v3: Simplified the implementation as suggested by Laurent
> >>>> Vivier
> >>>>
> >>>> Signed-off-by: Martin Wilck <mwilck@suse.com>
> >>>> ---
> >>>>  drivers/char/hw_random/virtio-rng.c | 4 ++--
> >>>>  1 file changed, 2 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/drivers/char/hw_random/virtio-rng.c
> >>>> b/drivers/char/hw_random/virtio-rng.c
> >>>> index a90001e02bf7..8eaeceecb41e 100644
> >>>> --- a/drivers/char/hw_random/virtio-rng.c
> >>>> +++ b/drivers/char/hw_random/virtio-rng.c
> >>>> @@ -65,7 +65,7 @@ static int virtio_read(struct hwrng *rng, void
> >>>> *buf, size_t size, bool wait)
> >>>>  		register_buffer(vi, buf, size);
> >>>>  	}
> >>>>  
> >>>> -	if (!wait)
> >>>> +	if (!wait && !completion_done(&vi->have_data))
> >>>>  		return 0;
> >>>>  
> >>>>  	ret = wait_for_completion_killable(&vi->have_data);
> >>>> @@ -74,7 +74,7 @@ static int virtio_read(struct hwrng *rng, void
> >>>> *buf, size_t size, bool wait)
> >>>>  
> >>>>  	vi->busy = false;
> >>>>  
> >>>> -	return vi->data_avail;
> >>>> +	return min_t(size_t, size, vi->data_avail);
> >>>>  }
> >>>>  
> >>>>  static void virtio_cleanup(struct hwrng *rng)
> >>>>
> >>>
> >>> Reviewed-by: Laurent Vivier <lvivier@redhat.com>
> >>
> >> Laurent didn't we agree the real fix is private buffers in the
> >> driver,
> >> and copying out from there?
> >>
> > 
> > Can we perhaps proceed with this for now? AFAICS the private buffer
> > implementation would be a larger effort, while we have the issues with
> > nested VMs getting no entropy today.
> > 
> 
> I agree. I think it's important to have a simple and quick fix for the
> problem reported by Martin.
> 
> We need the private buffers but not sure how long it will take to have
> them included in the kernel and how many new bugs will be introduced
> doing that as the code is hard to understand and the core is shared with
> several other hardware backends that can be impacted by the changes needed.
> 
> Thanks,
> Laurent

However I am not sure with the patch applies we never return
the same buffer to userspace twice, e.g. if one is
non blocking another blocking. Doing that would be a bug.
Martin Wilck Sept. 8, 2020, 3:33 p.m. UTC | #6
On Tue, 2020-09-08 at 10:14 -0400, Michael S. Tsirkin wrote:
> On Mon, Aug 31, 2020 at 02:37:26PM +0200, Laurent Vivier wrote:
> > On 28/08/2020 23:34, Martin Wilck wrote:
> > > On Wed, 2020-08-26 at 08:26 -0400, Michael S. Tsirkin wrote:
> > > > On Tue, Aug 11, 2020 at 04:42:32PM +0200, Laurent Vivier wrote:
> > > > > On 11/08/2020 16:28, mwilck@suse.com wrote:
> > > > > > From: Martin Wilck <mwilck@suse.com>
> > > > > > 
> > > > > > If a program opens /dev/hwrng with O_NONBLOCK and uses
> > > > > > poll() and
> > > > > > non-blocking read() to retrieve random data, it ends up in
> > > > > > a
> > > > > > tight
> > > > > > loop with poll() always returning POLLIN and read()
> > > > > > returning
> > > > > > EAGAIN.
> > > > > > This repeats forever until some process makes a blocking
> > > > > > read()
> > > > > > call.
> > > > > > The reason is that virtio_read() always returns 0 in non-
> > > > > > blocking 
> > > > > > mode,
> > > > > > even if data is available. Worse, it fetches random data
> > > > > > from the
> > > > > > hypervisor after every non-blocking call, without ever
> > > > > > using this
> > > > > > data.
> > > > > > 
> > > > > > The following test program illustrates the behavior and can
> > > > > > be
> > > > > > used
> > > > > > for testing and experiments. The problem will only be seen
> > > > > > if all
> > > > > > tasks use non-blocking access; otherwise the blocking reads
> > > > > > will
> > > > > > "recharge" the random pool and cause other, non-blocking
> > > > > > reads to
> > > > > > succeed at least sometimes.
> > > > > > 
> > > > > > /* Whether to use non-blocking mode in a task, problem
> > > > > > occurs if
> > > > > > CONDITION is 1 */
> > > > > > //#define CONDITION (getpid() % 2 != 0)
> > > > > > 
> > > > > > static volatile sig_atomic_t stop;
> > > > > > static void handler(int sig __attribute__((unused))) { stop
> > > > > > = 1;
> > > > > > }
> > > > > > 
> > > > > > static void loop(int fd, int sec)
> > > > > > {
> > > > > > 	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
> > > > > > 	unsigned long errors = 0, eagains = 0, bytes = 0, succ
> > > > > > = 0;
> > > > > > 	int size, rc, rd;
> > > > > > 
> > > > > > 	srandom(getpid());
> > > > > > 	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL)
> > > > > > |
> > > > > > O_NONBLOCK) == -1)
> > > > > > 		perror("fcntl");
> > > > > > 	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ +
> > > > > > 1);
> > > > > > 
> > > > > > 	for(;;) {
> > > > > > 		char buf[size];
> > > > > > 
> > > > > > 		if (stop)
> > > > > > 			break;
> > > > > > 		rc = poll(&pfd, 1, sec);
> > > > > > 		if (rc > 0) {
> > > > > > 			rd = read(fd, buf, sizeof(buf));
> > > > > > 			if (rd == -1 && errno == EAGAIN)
> > > > > > 				eagains++;
> > > > > > 			else if (rd == -1)
> > > > > > 				errors++;
> > > > > > 			else {
> > > > > > 				succ++;
> > > > > > 				bytes += rd;
> > > > > > 				write(1, buf, sizeof(buf));
> > > > > > 			}
> > > > > > 		} else if (rc == -1) {
> > > > > > 			if (errno != EINTR)
> > > > > > 				perror("poll");
> > > > > > 			break;
> > > > > > 		} else
> > > > > > 			fprintf(stderr, "poll: timeout\n");
> > > > > > 	}
> > > > > > 	fprintf(stderr,
> > > > > > 		"pid %d %sblocking, bufsize %d, %d seconds, %lu
> > > > > > bytes
> > > > > > read, %lu success, %lu eagain, %lu errors\n",
> > > > > > 		getpid(), CONDITION ? "non-" : "", size, sec,
> > > > > > bytes,
> > > > > > succ, eagains, errors);
> > > > > > }
> > > > > > 
> > > > > > int main(void)
> > > > > > {
> > > > > > 	int fd;
> > > > > > 
> > > > > > 	fork(); fork();
> > > > > > 	fd = open("/dev/hwrng", O_RDONLY);
> > > > > > 	if (fd == -1) {
> > > > > > 		perror("open");
> > > > > > 		return 1;
> > > > > > 	};
> > > > > > 	signal(SIGALRM, handler);
> > > > > > 	alarm(SECONDS);
> > > > > > 	loop(fd, SECONDS);
> > > > > > 	close(fd);
> > > > > > 	wait(NULL);
> > > > > > 	return 0;
> > > > > > }
> > > > > > 
> > > > > > void loop(int fd)
> > > > > > {
> > > > > >         struct pollfd pfd0 = { .fd = fd, .events  = POLLIN,
> > > > > > };
> > > > > >         int rc;
> > > > > >         unsigned int n;
> > > > > > 
> > > > > >         for (n = LOOPS; n > 0; n--) {
> > > > > >                 struct pollfd pfd = pfd0;
> > > > > >                 char buf[SIZE];
> > > > > > 
> > > > > >                 rc = poll(&pfd, 1, 1);
> > > > > >                 if (rc > 0) {
> > > > > >                         int rd = read(fd, buf,
> > > > > > sizeof(buf));
> > > > > > 
> > > > > >                         if (rd == -1)
> > > > > >                                 perror("read");
> > > > > >                         else
> > > > > >                                 printf("read %d bytes\n",
> > > > > > rd);
> > > > > >                 } else if (rc == -1)
> > > > > >                         perror("poll");
> > > > > >                 else
> > > > > >                         fprintf(stderr, "timeout\n");
> > > > > > 
> > > > > >         }
> > > > > > }
> > > > > > 
> > > > > > int main(void)
> > > > > > {
> > > > > >         int fd;
> > > > > > 
> > > > > >         fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
> > > > > >         if (fd == -1) {
> > > > > >                 perror("open");
> > > > > >                 return 1;
> > > > > >         };
> > > > > >         loop(fd);
> > > > > >         close(fd);
> > > > > >         return 0;
> > > > > > }
> > > > > > 
> > > > > > This can be observed in the real word e.g. with nested
> > > > > > qemu/KVM
> > > > > > virtual
> > > > > > machines, if both the "outer" and "inner" VMs have a
> > > > > > virtio-rng
> > > > > > device.
> > > > > > If the "inner" VM requests random data, qemu running in the
> > > > > > "outer" VM
> > > > > > uses this device in a non-blocking manner like the test
> > > > > > program
> > > > > > above.
> > > > > > 
> > > > > > Fix it by returning available data if a previous hypervisor
> > > > > > call
> > > > > > has
> > > > > > completed. I tested this patch with the program above, and
> > > > > > with
> > > > > > rng-tools.
> > > > > > 
> > > > > > v2 -> v3: Simplified the implementation as suggested by
> > > > > > Laurent
> > > > > > Vivier
> > > > > > 
> > > > > > Signed-off-by: Martin Wilck <mwilck@suse.com>
> > > > > > ---
> > > > > >  drivers/char/hw_random/virtio-rng.c | 4 ++--
> > > > > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/char/hw_random/virtio-rng.c
> > > > > > b/drivers/char/hw_random/virtio-rng.c
> > > > > > index a90001e02bf7..8eaeceecb41e 100644
> > > > > > --- a/drivers/char/hw_random/virtio-rng.c
> > > > > > +++ b/drivers/char/hw_random/virtio-rng.c
> > > > > > @@ -65,7 +65,7 @@ static int virtio_read(struct hwrng *rng,
> > > > > > void
> > > > > > *buf, size_t size, bool wait)
> > > > > >  		register_buffer(vi, buf, size);
> > > > > >  	}
> > > > > >  
> > > > > > -	if (!wait)
> > > > > > +	if (!wait && !completion_done(&vi->have_data))
> > > > > >  		return 0;
> > > > > >  
> > > > > >  	ret = wait_for_completion_killable(&vi->have_data);
> > > > > > @@ -74,7 +74,7 @@ static int virtio_read(struct hwrng *rng,
> > > > > > void
> > > > > > *buf, size_t size, bool wait)
> > > > > >  
> > > > > >  	vi->busy = false;
> > > > > >  
> > > > > > -	return vi->data_avail;
> > > > > > +	return min_t(size_t, size, vi->data_avail);
> > > > > >  }
> > > > > >  
> > > > > >  static void virtio_cleanup(struct hwrng *rng)
> > > > > > 
> > > > > 
> > > > > Reviewed-by: Laurent Vivier <lvivier@redhat.com>
> > > > 
> > > > Laurent didn't we agree the real fix is private buffers in the
> > > > driver,
> > > > and copying out from there?
> > > > 
> > > 
> > > Can we perhaps proceed with this for now? AFAICS the private
> > > buffer
> > > implementation would be a larger effort, while we have the issues
> > > with
> > > nested VMs getting no entropy today.
> > > 
> > 
> > I agree. I think it's important to have a simple and quick fix for
> > the
> > problem reported by Martin.
> > 
> > We need the private buffers but not sure how long it will take to
> > have
> > them included in the kernel and how many new bugs will be
> > introduced
> > doing that as the code is hard to understand and the core is shared
> > with
> > several other hardware backends that can be impacted by the changes
> > needed.
> > 
> > Thanks,
> > Laurent
> 
> However I am not sure with the patch applies we never return
> the same buffer to userspace twice, e.g. if one is
> non blocking another blocking. Doing that would be a bug.
> 

As Laurent mentioned in 
https://lists.gnu.org/archive/html/qemu-devel/2020-08/msg02039.html,
there are only 2 different buffers that may be passed to virtio_read(),
rng_buffer and rng_fillbuf.
The latter is only used in blocking mode.

AFAICS there's just one problematic situation: 

 1 a user space process reads random data without blocking and runs
register_buffer(), gets no data, releases reading_mutex
 2 the hwrng kthread grabs the mutex and makes a sync call, vi->busy is
still set, so no new completion is initialized.
 3 hwrng calls wait_for_completion_killable() and sees the completion
   that had been initialized by the user space process previously,
 4 hwrng "thinks" it got some positive randomness, but random data have
   actually been written into rng_buffer, not rng_fillbuff.

This is indeed bad, but it can happen with the current code as well.
Actually, it's more likely to happen with the current code, because
asynchronous callers might hang forever trying to get entropy,
making this scenario more likely (if there's a process, like nested
qemu, that would keep calling . So this wouldn't be a regression caused
by my patch, AFAICT.

How can we avoid this problem entirely? A) With private buffers, of
course. B) Another, a bit hackish, approach would be to remember the
active "buffer" pointer in virtio_rng, and restart the IO when a
another buffer is passed down. C) Finally, we could modify
virtio_read() such that blocking calls always re-initialize the buffer;
they'd then have to wait for a potential already running IO from a
previous, non-blocking access to finish first.

But I believe this is something which could (and should) be done
independently. Alternatively, I could add B) or C). A) I'd rather leave
to Laurent.

Regards,
Martin
Michael S. Tsirkin Oct. 21, 2020, 2:43 p.m. UTC | #7
On Tue, Sep 08, 2020 at 05:33:40PM +0200, Martin Wilck wrote:
> On Tue, 2020-09-08 at 10:14 -0400, Michael S. Tsirkin wrote:
> > On Mon, Aug 31, 2020 at 02:37:26PM +0200, Laurent Vivier wrote:
> > > On 28/08/2020 23:34, Martin Wilck wrote:
> > > > On Wed, 2020-08-26 at 08:26 -0400, Michael S. Tsirkin wrote:
> > > > > On Tue, Aug 11, 2020 at 04:42:32PM +0200, Laurent Vivier wrote:
> > > > > > On 11/08/2020 16:28, mwilck@suse.com wrote:
> > > > > > > From: Martin Wilck <mwilck@suse.com>
> > > > > > > 
> > > > > > > If a program opens /dev/hwrng with O_NONBLOCK and uses
> > > > > > > poll() and
> > > > > > > non-blocking read() to retrieve random data, it ends up in
> > > > > > > a
> > > > > > > tight
> > > > > > > loop with poll() always returning POLLIN and read()
> > > > > > > returning
> > > > > > > EAGAIN.
> > > > > > > This repeats forever until some process makes a blocking
> > > > > > > read()
> > > > > > > call.
> > > > > > > The reason is that virtio_read() always returns 0 in non-
> > > > > > > blocking 
> > > > > > > mode,
> > > > > > > even if data is available. Worse, it fetches random data
> > > > > > > from the
> > > > > > > hypervisor after every non-blocking call, without ever
> > > > > > > using this
> > > > > > > data.
> > > > > > > 
> > > > > > > The following test program illustrates the behavior and can
> > > > > > > be
> > > > > > > used
> > > > > > > for testing and experiments. The problem will only be seen
> > > > > > > if all
> > > > > > > tasks use non-blocking access; otherwise the blocking reads
> > > > > > > will
> > > > > > > "recharge" the random pool and cause other, non-blocking
> > > > > > > reads to
> > > > > > > succeed at least sometimes.
> > > > > > > 
> > > > > > > /* Whether to use non-blocking mode in a task, problem
> > > > > > > occurs if
> > > > > > > CONDITION is 1 */
> > > > > > > //#define CONDITION (getpid() % 2 != 0)
> > > > > > > 
> > > > > > > static volatile sig_atomic_t stop;
> > > > > > > static void handler(int sig __attribute__((unused))) { stop
> > > > > > > = 1;
> > > > > > > }
> > > > > > > 
> > > > > > > static void loop(int fd, int sec)
> > > > > > > {
> > > > > > > 	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
> > > > > > > 	unsigned long errors = 0, eagains = 0, bytes = 0, succ
> > > > > > > = 0;
> > > > > > > 	int size, rc, rd;
> > > > > > > 
> > > > > > > 	srandom(getpid());
> > > > > > > 	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL)
> > > > > > > |
> > > > > > > O_NONBLOCK) == -1)
> > > > > > > 		perror("fcntl");
> > > > > > > 	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ +
> > > > > > > 1);
> > > > > > > 
> > > > > > > 	for(;;) {
> > > > > > > 		char buf[size];
> > > > > > > 
> > > > > > > 		if (stop)
> > > > > > > 			break;
> > > > > > > 		rc = poll(&pfd, 1, sec);
> > > > > > > 		if (rc > 0) {
> > > > > > > 			rd = read(fd, buf, sizeof(buf));
> > > > > > > 			if (rd == -1 && errno == EAGAIN)
> > > > > > > 				eagains++;
> > > > > > > 			else if (rd == -1)
> > > > > > > 				errors++;
> > > > > > > 			else {
> > > > > > > 				succ++;
> > > > > > > 				bytes += rd;
> > > > > > > 				write(1, buf, sizeof(buf));
> > > > > > > 			}
> > > > > > > 		} else if (rc == -1) {
> > > > > > > 			if (errno != EINTR)
> > > > > > > 				perror("poll");
> > > > > > > 			break;
> > > > > > > 		} else
> > > > > > > 			fprintf(stderr, "poll: timeout\n");
> > > > > > > 	}
> > > > > > > 	fprintf(stderr,
> > > > > > > 		"pid %d %sblocking, bufsize %d, %d seconds, %lu
> > > > > > > bytes
> > > > > > > read, %lu success, %lu eagain, %lu errors\n",
> > > > > > > 		getpid(), CONDITION ? "non-" : "", size, sec,
> > > > > > > bytes,
> > > > > > > succ, eagains, errors);
> > > > > > > }
> > > > > > > 
> > > > > > > int main(void)
> > > > > > > {
> > > > > > > 	int fd;
> > > > > > > 
> > > > > > > 	fork(); fork();
> > > > > > > 	fd = open("/dev/hwrng", O_RDONLY);
> > > > > > > 	if (fd == -1) {
> > > > > > > 		perror("open");
> > > > > > > 		return 1;
> > > > > > > 	};
> > > > > > > 	signal(SIGALRM, handler);
> > > > > > > 	alarm(SECONDS);
> > > > > > > 	loop(fd, SECONDS);
> > > > > > > 	close(fd);
> > > > > > > 	wait(NULL);
> > > > > > > 	return 0;
> > > > > > > }
> > > > > > > 
> > > > > > > void loop(int fd)
> > > > > > > {
> > > > > > >         struct pollfd pfd0 = { .fd = fd, .events  = POLLIN,
> > > > > > > };
> > > > > > >         int rc;
> > > > > > >         unsigned int n;
> > > > > > > 
> > > > > > >         for (n = LOOPS; n > 0; n--) {
> > > > > > >                 struct pollfd pfd = pfd0;
> > > > > > >                 char buf[SIZE];
> > > > > > > 
> > > > > > >                 rc = poll(&pfd, 1, 1);
> > > > > > >                 if (rc > 0) {
> > > > > > >                         int rd = read(fd, buf,
> > > > > > > sizeof(buf));
> > > > > > > 
> > > > > > >                         if (rd == -1)
> > > > > > >                                 perror("read");
> > > > > > >                         else
> > > > > > >                                 printf("read %d bytes\n",
> > > > > > > rd);
> > > > > > >                 } else if (rc == -1)
> > > > > > >                         perror("poll");
> > > > > > >                 else
> > > > > > >                         fprintf(stderr, "timeout\n");
> > > > > > > 
> > > > > > >         }
> > > > > > > }
> > > > > > > 
> > > > > > > int main(void)
> > > > > > > {
> > > > > > >         int fd;
> > > > > > > 
> > > > > > >         fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
> > > > > > >         if (fd == -1) {
> > > > > > >                 perror("open");
> > > > > > >                 return 1;
> > > > > > >         };
> > > > > > >         loop(fd);
> > > > > > >         close(fd);
> > > > > > >         return 0;
> > > > > > > }
> > > > > > > 
> > > > > > > This can be observed in the real word e.g. with nested
> > > > > > > qemu/KVM
> > > > > > > virtual
> > > > > > > machines, if both the "outer" and "inner" VMs have a
> > > > > > > virtio-rng
> > > > > > > device.
> > > > > > > If the "inner" VM requests random data, qemu running in the
> > > > > > > "outer" VM
> > > > > > > uses this device in a non-blocking manner like the test
> > > > > > > program
> > > > > > > above.
> > > > > > > 
> > > > > > > Fix it by returning available data if a previous hypervisor
> > > > > > > call
> > > > > > > has
> > > > > > > completed. I tested this patch with the program above, and
> > > > > > > with
> > > > > > > rng-tools.
> > > > > > > 
> > > > > > > v2 -> v3: Simplified the implementation as suggested by
> > > > > > > Laurent
> > > > > > > Vivier
> > > > > > > 
> > > > > > > Signed-off-by: Martin Wilck <mwilck@suse.com>
> > > > > > > ---
> > > > > > >  drivers/char/hw_random/virtio-rng.c | 4 ++--
> > > > > > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/char/hw_random/virtio-rng.c
> > > > > > > b/drivers/char/hw_random/virtio-rng.c
> > > > > > > index a90001e02bf7..8eaeceecb41e 100644
> > > > > > > --- a/drivers/char/hw_random/virtio-rng.c
> > > > > > > +++ b/drivers/char/hw_random/virtio-rng.c
> > > > > > > @@ -65,7 +65,7 @@ static int virtio_read(struct hwrng *rng,
> > > > > > > void
> > > > > > > *buf, size_t size, bool wait)
> > > > > > >  		register_buffer(vi, buf, size);
> > > > > > >  	}
> > > > > > >  
> > > > > > > -	if (!wait)
> > > > > > > +	if (!wait && !completion_done(&vi->have_data))
> > > > > > >  		return 0;
> > > > > > >  
> > > > > > >  	ret = wait_for_completion_killable(&vi->have_data);
> > > > > > > @@ -74,7 +74,7 @@ static int virtio_read(struct hwrng *rng,
> > > > > > > void
> > > > > > > *buf, size_t size, bool wait)
> > > > > > >  
> > > > > > >  	vi->busy = false;
> > > > > > >  
> > > > > > > -	return vi->data_avail;
> > > > > > > +	return min_t(size_t, size, vi->data_avail);
> > > > > > >  }
> > > > > > >  
> > > > > > >  static void virtio_cleanup(struct hwrng *rng)
> > > > > > > 
> > > > > > 
> > > > > > Reviewed-by: Laurent Vivier <lvivier@redhat.com>
> > > > > 
> > > > > Laurent didn't we agree the real fix is private buffers in the
> > > > > driver,
> > > > > and copying out from there?
> > > > > 
> > > > 
> > > > Can we perhaps proceed with this for now? AFAICS the private
> > > > buffer
> > > > implementation would be a larger effort, while we have the issues
> > > > with
> > > > nested VMs getting no entropy today.
> > > > 
> > > 
> > > I agree. I think it's important to have a simple and quick fix for
> > > the
> > > problem reported by Martin.
> > > 
> > > We need the private buffers but not sure how long it will take to
> > > have
> > > them included in the kernel and how many new bugs will be
> > > introduced
> > > doing that as the code is hard to understand and the core is shared
> > > with
> > > several other hardware backends that can be impacted by the changes
> > > needed.
> > > 
> > > Thanks,
> > > Laurent
> > 
> > However I am not sure with the patch applies we never return
> > the same buffer to userspace twice, e.g. if one is
> > non blocking another blocking. Doing that would be a bug.
> > 
> 
> As Laurent mentioned in 
> https://lists.gnu.org/archive/html/qemu-devel/2020-08/msg02039.html,
> there are only 2 different buffers that may be passed to virtio_read(),
> rng_buffer and rng_fillbuf.
> The latter is only used in blocking mode.
> 
> AFAICS there's just one problematic situation: 
> 
>  1 a user space process reads random data without blocking and runs
> register_buffer(), gets no data, releases reading_mutex
>  2 the hwrng kthread grabs the mutex and makes a sync call, vi->busy is
> still set, so no new completion is initialized.
>  3 hwrng calls wait_for_completion_killable() and sees the completion
>    that had been initialized by the user space process previously,
>  4 hwrng "thinks" it got some positive randomness, but random data have
>    actually been written into rng_buffer, not rng_fillbuff.
> 
> This is indeed bad, but it can happen with the current code as well.
> Actually, it's more likely to happen with the current code, because
> asynchronous callers might hang forever trying to get entropy,
> making this scenario more likely (if there's a process, like nested
> qemu, that would keep calling . So this wouldn't be a regression caused
> by my patch, AFAICT.
> 
> How can we avoid this problem entirely? A) With private buffers, of
> course. B) Another, a bit hackish, approach would be to remember the
> active "buffer" pointer in virtio_rng, and restart the IO when a
> another buffer is passed down. C) Finally, we could modify
> virtio_read() such that blocking calls always re-initialize the buffer;
> they'd then have to wait for a potential already running IO from a
> previous, non-blocking access to finish first.
> 
> But I believe this is something which could (and should) be done
> independently. Alternatively, I could add B) or C). A) I'd rather leave
> to Laurent.
> 
> Regards,
> Martin
> 

C sounds good to me. Laurent?
Michael S. Tsirkin Nov. 25, 2020, 9:39 a.m. UTC | #8
On Tue, Sep 08, 2020 at 05:33:40PM +0200, Martin Wilck wrote:
> On Tue, 2020-09-08 at 10:14 -0400, Michael S. Tsirkin wrote:
> > On Mon, Aug 31, 2020 at 02:37:26PM +0200, Laurent Vivier wrote:
> > > On 28/08/2020 23:34, Martin Wilck wrote:
> > > > On Wed, 2020-08-26 at 08:26 -0400, Michael S. Tsirkin wrote:
> > > > > On Tue, Aug 11, 2020 at 04:42:32PM +0200, Laurent Vivier wrote:
> > > > > > On 11/08/2020 16:28, mwilck@suse.com wrote:
> > > > > > > From: Martin Wilck <mwilck@suse.com>
> > > > > > > 
> > > > > > > If a program opens /dev/hwrng with O_NONBLOCK and uses
> > > > > > > poll() and
> > > > > > > non-blocking read() to retrieve random data, it ends up in
> > > > > > > a
> > > > > > > tight
> > > > > > > loop with poll() always returning POLLIN and read()
> > > > > > > returning
> > > > > > > EAGAIN.
> > > > > > > This repeats forever until some process makes a blocking
> > > > > > > read()
> > > > > > > call.
> > > > > > > The reason is that virtio_read() always returns 0 in non-
> > > > > > > blocking 
> > > > > > > mode,
> > > > > > > even if data is available. Worse, it fetches random data
> > > > > > > from the
> > > > > > > hypervisor after every non-blocking call, without ever
> > > > > > > using this
> > > > > > > data.
> > > > > > > 
> > > > > > > The following test program illustrates the behavior and can
> > > > > > > be
> > > > > > > used
> > > > > > > for testing and experiments. The problem will only be seen
> > > > > > > if all
> > > > > > > tasks use non-blocking access; otherwise the blocking reads
> > > > > > > will
> > > > > > > "recharge" the random pool and cause other, non-blocking
> > > > > > > reads to
> > > > > > > succeed at least sometimes.
> > > > > > > 
> > > > > > > /* Whether to use non-blocking mode in a task, problem
> > > > > > > occurs if
> > > > > > > CONDITION is 1 */
> > > > > > > //#define CONDITION (getpid() % 2 != 0)
> > > > > > > 
> > > > > > > static volatile sig_atomic_t stop;
> > > > > > > static void handler(int sig __attribute__((unused))) { stop
> > > > > > > = 1;
> > > > > > > }
> > > > > > > 
> > > > > > > static void loop(int fd, int sec)
> > > > > > > {
> > > > > > > 	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
> > > > > > > 	unsigned long errors = 0, eagains = 0, bytes = 0, succ
> > > > > > > = 0;
> > > > > > > 	int size, rc, rd;
> > > > > > > 
> > > > > > > 	srandom(getpid());
> > > > > > > 	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL)
> > > > > > > |
> > > > > > > O_NONBLOCK) == -1)
> > > > > > > 		perror("fcntl");
> > > > > > > 	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ +
> > > > > > > 1);
> > > > > > > 
> > > > > > > 	for(;;) {
> > > > > > > 		char buf[size];
> > > > > > > 
> > > > > > > 		if (stop)
> > > > > > > 			break;
> > > > > > > 		rc = poll(&pfd, 1, sec);
> > > > > > > 		if (rc > 0) {
> > > > > > > 			rd = read(fd, buf, sizeof(buf));
> > > > > > > 			if (rd == -1 && errno == EAGAIN)
> > > > > > > 				eagains++;
> > > > > > > 			else if (rd == -1)
> > > > > > > 				errors++;
> > > > > > > 			else {
> > > > > > > 				succ++;
> > > > > > > 				bytes += rd;
> > > > > > > 				write(1, buf, sizeof(buf));
> > > > > > > 			}
> > > > > > > 		} else if (rc == -1) {
> > > > > > > 			if (errno != EINTR)
> > > > > > > 				perror("poll");
> > > > > > > 			break;
> > > > > > > 		} else
> > > > > > > 			fprintf(stderr, "poll: timeout\n");
> > > > > > > 	}
> > > > > > > 	fprintf(stderr,
> > > > > > > 		"pid %d %sblocking, bufsize %d, %d seconds, %lu
> > > > > > > bytes
> > > > > > > read, %lu success, %lu eagain, %lu errors\n",
> > > > > > > 		getpid(), CONDITION ? "non-" : "", size, sec,
> > > > > > > bytes,
> > > > > > > succ, eagains, errors);
> > > > > > > }
> > > > > > > 
> > > > > > > int main(void)
> > > > > > > {
> > > > > > > 	int fd;
> > > > > > > 
> > > > > > > 	fork(); fork();
> > > > > > > 	fd = open("/dev/hwrng", O_RDONLY);
> > > > > > > 	if (fd == -1) {
> > > > > > > 		perror("open");
> > > > > > > 		return 1;
> > > > > > > 	};
> > > > > > > 	signal(SIGALRM, handler);
> > > > > > > 	alarm(SECONDS);
> > > > > > > 	loop(fd, SECONDS);
> > > > > > > 	close(fd);
> > > > > > > 	wait(NULL);
> > > > > > > 	return 0;
> > > > > > > }
> > > > > > > 
> > > > > > > void loop(int fd)
> > > > > > > {
> > > > > > >         struct pollfd pfd0 = { .fd = fd, .events  = POLLIN,
> > > > > > > };
> > > > > > >         int rc;
> > > > > > >         unsigned int n;
> > > > > > > 
> > > > > > >         for (n = LOOPS; n > 0; n--) {
> > > > > > >                 struct pollfd pfd = pfd0;
> > > > > > >                 char buf[SIZE];
> > > > > > > 
> > > > > > >                 rc = poll(&pfd, 1, 1);
> > > > > > >                 if (rc > 0) {
> > > > > > >                         int rd = read(fd, buf,
> > > > > > > sizeof(buf));
> > > > > > > 
> > > > > > >                         if (rd == -1)
> > > > > > >                                 perror("read");
> > > > > > >                         else
> > > > > > >                                 printf("read %d bytes\n",
> > > > > > > rd);
> > > > > > >                 } else if (rc == -1)
> > > > > > >                         perror("poll");
> > > > > > >                 else
> > > > > > >                         fprintf(stderr, "timeout\n");
> > > > > > > 
> > > > > > >         }
> > > > > > > }
> > > > > > > 
> > > > > > > int main(void)
> > > > > > > {
> > > > > > >         int fd;
> > > > > > > 
> > > > > > >         fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
> > > > > > >         if (fd == -1) {
> > > > > > >                 perror("open");
> > > > > > >                 return 1;
> > > > > > >         };
> > > > > > >         loop(fd);
> > > > > > >         close(fd);
> > > > > > >         return 0;
> > > > > > > }
> > > > > > > 
> > > > > > > This can be observed in the real word e.g. with nested
> > > > > > > qemu/KVM
> > > > > > > virtual
> > > > > > > machines, if both the "outer" and "inner" VMs have a
> > > > > > > virtio-rng
> > > > > > > device.
> > > > > > > If the "inner" VM requests random data, qemu running in the
> > > > > > > "outer" VM
> > > > > > > uses this device in a non-blocking manner like the test
> > > > > > > program
> > > > > > > above.
> > > > > > > 
> > > > > > > Fix it by returning available data if a previous hypervisor
> > > > > > > call
> > > > > > > has
> > > > > > > completed. I tested this patch with the program above, and
> > > > > > > with
> > > > > > > rng-tools.
> > > > > > > 
> > > > > > > v2 -> v3: Simplified the implementation as suggested by
> > > > > > > Laurent
> > > > > > > Vivier
> > > > > > > 
> > > > > > > Signed-off-by: Martin Wilck <mwilck@suse.com>
> > > > > > > ---
> > > > > > >  drivers/char/hw_random/virtio-rng.c | 4 ++--
> > > > > > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/char/hw_random/virtio-rng.c
> > > > > > > b/drivers/char/hw_random/virtio-rng.c
> > > > > > > index a90001e02bf7..8eaeceecb41e 100644
> > > > > > > --- a/drivers/char/hw_random/virtio-rng.c
> > > > > > > +++ b/drivers/char/hw_random/virtio-rng.c
> > > > > > > @@ -65,7 +65,7 @@ static int virtio_read(struct hwrng *rng,
> > > > > > > void
> > > > > > > *buf, size_t size, bool wait)
> > > > > > >  		register_buffer(vi, buf, size);
> > > > > > >  	}
> > > > > > >  
> > > > > > > -	if (!wait)
> > > > > > > +	if (!wait && !completion_done(&vi->have_data))
> > > > > > >  		return 0;
> > > > > > >  
> > > > > > >  	ret = wait_for_completion_killable(&vi->have_data);
> > > > > > > @@ -74,7 +74,7 @@ static int virtio_read(struct hwrng *rng,
> > > > > > > void
> > > > > > > *buf, size_t size, bool wait)
> > > > > > >  
> > > > > > >  	vi->busy = false;
> > > > > > >  
> > > > > > > -	return vi->data_avail;
> > > > > > > +	return min_t(size_t, size, vi->data_avail);
> > > > > > >  }
> > > > > > >  
> > > > > > >  static void virtio_cleanup(struct hwrng *rng)
> > > > > > > 
> > > > > > 
> > > > > > Reviewed-by: Laurent Vivier <lvivier@redhat.com>
> > > > > 
> > > > > Laurent didn't we agree the real fix is private buffers in the
> > > > > driver,
> > > > > and copying out from there?
> > > > > 
> > > > 
> > > > Can we perhaps proceed with this for now? AFAICS the private
> > > > buffer
> > > > implementation would be a larger effort, while we have the issues
> > > > with
> > > > nested VMs getting no entropy today.
> > > > 
> > > 
> > > I agree. I think it's important to have a simple and quick fix for
> > > the
> > > problem reported by Martin.
> > > 
> > > We need the private buffers but not sure how long it will take to
> > > have
> > > them included in the kernel and how many new bugs will be
> > > introduced
> > > doing that as the code is hard to understand and the core is shared
> > > with
> > > several other hardware backends that can be impacted by the changes
> > > needed.
> > > 
> > > Thanks,
> > > Laurent
> > 
> > However I am not sure with the patch applies we never return
> > the same buffer to userspace twice, e.g. if one is
> > non blocking another blocking. Doing that would be a bug.
> > 
> 
> As Laurent mentioned in 
> https://lists.gnu.org/archive/html/qemu-devel/2020-08/msg02039.html,
> there are only 2 different buffers that may be passed to virtio_read(),
> rng_buffer and rng_fillbuf.
> The latter is only used in blocking mode.
> 
> AFAICS there's just one problematic situation: 
> 
>  1 a user space process reads random data without blocking and runs
> register_buffer(), gets no data, releases reading_mutex
>  2 the hwrng kthread grabs the mutex and makes a sync call, vi->busy is
> still set, so no new completion is initialized.
>  3 hwrng calls wait_for_completion_killable() and sees the completion
>    that had been initialized by the user space process previously,
>  4 hwrng "thinks" it got some positive randomness, but random data have
>    actually been written into rng_buffer, not rng_fillbuff.
> 
> This is indeed bad, but it can happen with the current code as well.
> Actually, it's more likely to happen with the current code, because
> asynchronous callers might hang forever trying to get entropy,
> making this scenario more likely (if there's a process, like nested
> qemu, that would keep calling . So this wouldn't be a regression caused
> by my patch, AFAICT.
> 
> How can we avoid this problem entirely? A) With private buffers, of
> course. B) Another, a bit hackish, approach would be to remember the
> active "buffer" pointer in virtio_rng, and restart the IO when a
> another buffer is passed down. C) Finally, we could modify
> virtio_read() such that blocking calls always re-initialize the buffer;
> they'd then have to wait for a potential already running IO from a
> previous, non-blocking access to finish first.
> 
> But I believe this is something which could (and should) be done
> independently. Alternatively, I could add B) or C). A) I'd rather leave
> to Laurent.
> 
> Regards,
> Martin

Of the simple solutions, C seems cleanest.
Laurent, any interest in working on A meanwhile?
Laurent Vivier Nov. 26, 2020, 10:49 a.m. UTC | #9
On 25/11/2020 10:39, Michael S. Tsirkin wrote:
> On Tue, Sep 08, 2020 at 05:33:40PM +0200, Martin Wilck wrote:
>> On Tue, 2020-09-08 at 10:14 -0400, Michael S. Tsirkin wrote:
>>> On Mon, Aug 31, 2020 at 02:37:26PM +0200, Laurent Vivier wrote:
>>>> On 28/08/2020 23:34, Martin Wilck wrote:
>>>>> On Wed, 2020-08-26 at 08:26 -0400, Michael S. Tsirkin wrote:
>>>>>> On Tue, Aug 11, 2020 at 04:42:32PM +0200, Laurent Vivier wrote:
>>>>>>> On 11/08/2020 16:28, mwilck@suse.com wrote:
>>>>>>>> From: Martin Wilck <mwilck@suse.com>
>>>>>>>>
>>>>>>>> If a program opens /dev/hwrng with O_NONBLOCK and uses
>>>>>>>> poll() and
>>>>>>>> non-blocking read() to retrieve random data, it ends up in
>>>>>>>> a
>>>>>>>> tight
>>>>>>>> loop with poll() always returning POLLIN and read()
>>>>>>>> returning
>>>>>>>> EAGAIN.
>>>>>>>> This repeats forever until some process makes a blocking
>>>>>>>> read()
>>>>>>>> call.
>>>>>>>> The reason is that virtio_read() always returns 0 in non-
>>>>>>>> blocking 
>>>>>>>> mode,
>>>>>>>> even if data is available. Worse, it fetches random data
>>>>>>>> from the
>>>>>>>> hypervisor after every non-blocking call, without ever
>>>>>>>> using this
>>>>>>>> data.
>>>>>>>>
>>>>>>>> The following test program illustrates the behavior and can
>>>>>>>> be
>>>>>>>> used
>>>>>>>> for testing and experiments. The problem will only be seen
>>>>>>>> if all
>>>>>>>> tasks use non-blocking access; otherwise the blocking reads
>>>>>>>> will
>>>>>>>> "recharge" the random pool and cause other, non-blocking
>>>>>>>> reads to
>>>>>>>> succeed at least sometimes.
>>>>>>>>
>>>>>>>> /* Whether to use non-blocking mode in a task, problem
>>>>>>>> occurs if
>>>>>>>> CONDITION is 1 */
>>>>>>>> //#define CONDITION (getpid() % 2 != 0)
>>>>>>>>
>>>>>>>> static volatile sig_atomic_t stop;
>>>>>>>> static void handler(int sig __attribute__((unused))) { stop
>>>>>>>> = 1;
>>>>>>>> }
>>>>>>>>
>>>>>>>> static void loop(int fd, int sec)
>>>>>>>> {
>>>>>>>> 	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
>>>>>>>> 	unsigned long errors = 0, eagains = 0, bytes = 0, succ
>>>>>>>> = 0;
>>>>>>>> 	int size, rc, rd;
>>>>>>>>
>>>>>>>> 	srandom(getpid());
>>>>>>>> 	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL)
>>>>>>>> |
>>>>>>>> O_NONBLOCK) == -1)
>>>>>>>> 		perror("fcntl");
>>>>>>>> 	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ +
>>>>>>>> 1);
>>>>>>>>
>>>>>>>> 	for(;;) {
>>>>>>>> 		char buf[size];
>>>>>>>>
>>>>>>>> 		if (stop)
>>>>>>>> 			break;
>>>>>>>> 		rc = poll(&pfd, 1, sec);
>>>>>>>> 		if (rc > 0) {
>>>>>>>> 			rd = read(fd, buf, sizeof(buf));
>>>>>>>> 			if (rd == -1 && errno == EAGAIN)
>>>>>>>> 				eagains++;
>>>>>>>> 			else if (rd == -1)
>>>>>>>> 				errors++;
>>>>>>>> 			else {
>>>>>>>> 				succ++;
>>>>>>>> 				bytes += rd;
>>>>>>>> 				write(1, buf, sizeof(buf));
>>>>>>>> 			}
>>>>>>>> 		} else if (rc == -1) {
>>>>>>>> 			if (errno != EINTR)
>>>>>>>> 				perror("poll");
>>>>>>>> 			break;
>>>>>>>> 		} else
>>>>>>>> 			fprintf(stderr, "poll: timeout\n");
>>>>>>>> 	}
>>>>>>>> 	fprintf(stderr,
>>>>>>>> 		"pid %d %sblocking, bufsize %d, %d seconds, %lu
>>>>>>>> bytes
>>>>>>>> read, %lu success, %lu eagain, %lu errors\n",
>>>>>>>> 		getpid(), CONDITION ? "non-" : "", size, sec,
>>>>>>>> bytes,
>>>>>>>> succ, eagains, errors);
>>>>>>>> }
>>>>>>>>
>>>>>>>> int main(void)
>>>>>>>> {
>>>>>>>> 	int fd;
>>>>>>>>
>>>>>>>> 	fork(); fork();
>>>>>>>> 	fd = open("/dev/hwrng", O_RDONLY);
>>>>>>>> 	if (fd == -1) {
>>>>>>>> 		perror("open");
>>>>>>>> 		return 1;
>>>>>>>> 	};
>>>>>>>> 	signal(SIGALRM, handler);
>>>>>>>> 	alarm(SECONDS);
>>>>>>>> 	loop(fd, SECONDS);
>>>>>>>> 	close(fd);
>>>>>>>> 	wait(NULL);
>>>>>>>> 	return 0;
>>>>>>>> }
>>>>>>>>
>>>>>>>> void loop(int fd)
>>>>>>>> {
>>>>>>>>         struct pollfd pfd0 = { .fd = fd, .events  = POLLIN,
>>>>>>>> };
>>>>>>>>         int rc;
>>>>>>>>         unsigned int n;
>>>>>>>>
>>>>>>>>         for (n = LOOPS; n > 0; n--) {
>>>>>>>>                 struct pollfd pfd = pfd0;
>>>>>>>>                 char buf[SIZE];
>>>>>>>>
>>>>>>>>                 rc = poll(&pfd, 1, 1);
>>>>>>>>                 if (rc > 0) {
>>>>>>>>                         int rd = read(fd, buf,
>>>>>>>> sizeof(buf));
>>>>>>>>
>>>>>>>>                         if (rd == -1)
>>>>>>>>                                 perror("read");
>>>>>>>>                         else
>>>>>>>>                                 printf("read %d bytes\n",
>>>>>>>> rd);
>>>>>>>>                 } else if (rc == -1)
>>>>>>>>                         perror("poll");
>>>>>>>>                 else
>>>>>>>>                         fprintf(stderr, "timeout\n");
>>>>>>>>
>>>>>>>>         }
>>>>>>>> }
>>>>>>>>
>>>>>>>> int main(void)
>>>>>>>> {
>>>>>>>>         int fd;
>>>>>>>>
>>>>>>>>         fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
>>>>>>>>         if (fd == -1) {
>>>>>>>>                 perror("open");
>>>>>>>>                 return 1;
>>>>>>>>         };
>>>>>>>>         loop(fd);
>>>>>>>>         close(fd);
>>>>>>>>         return 0;
>>>>>>>> }
>>>>>>>>
>>>>>>>> This can be observed in the real word e.g. with nested
>>>>>>>> qemu/KVM
>>>>>>>> virtual
>>>>>>>> machines, if both the "outer" and "inner" VMs have a
>>>>>>>> virtio-rng
>>>>>>>> device.
>>>>>>>> If the "inner" VM requests random data, qemu running in the
>>>>>>>> "outer" VM
>>>>>>>> uses this device in a non-blocking manner like the test
>>>>>>>> program
>>>>>>>> above.
>>>>>>>>
>>>>>>>> Fix it by returning available data if a previous hypervisor
>>>>>>>> call
>>>>>>>> has
>>>>>>>> completed. I tested this patch with the program above, and
>>>>>>>> with
>>>>>>>> rng-tools.
>>>>>>>>
>>>>>>>> v2 -> v3: Simplified the implementation as suggested by
>>>>>>>> Laurent
>>>>>>>> Vivier
>>>>>>>>
>>>>>>>> Signed-off-by: Martin Wilck <mwilck@suse.com>
>>>>>>>> ---
>>>>>>>>  drivers/char/hw_random/virtio-rng.c | 4 ++--
>>>>>>>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/char/hw_random/virtio-rng.c
>>>>>>>> b/drivers/char/hw_random/virtio-rng.c
>>>>>>>> index a90001e02bf7..8eaeceecb41e 100644
>>>>>>>> --- a/drivers/char/hw_random/virtio-rng.c
>>>>>>>> +++ b/drivers/char/hw_random/virtio-rng.c
>>>>>>>> @@ -65,7 +65,7 @@ static int virtio_read(struct hwrng *rng,
>>>>>>>> void
>>>>>>>> *buf, size_t size, bool wait)
>>>>>>>>  		register_buffer(vi, buf, size);
>>>>>>>>  	}
>>>>>>>>  
>>>>>>>> -	if (!wait)
>>>>>>>> +	if (!wait && !completion_done(&vi->have_data))
>>>>>>>>  		return 0;
>>>>>>>>  
>>>>>>>>  	ret = wait_for_completion_killable(&vi->have_data);
>>>>>>>> @@ -74,7 +74,7 @@ static int virtio_read(struct hwrng *rng,
>>>>>>>> void
>>>>>>>> *buf, size_t size, bool wait)
>>>>>>>>  
>>>>>>>>  	vi->busy = false;
>>>>>>>>  
>>>>>>>> -	return vi->data_avail;
>>>>>>>> +	return min_t(size_t, size, vi->data_avail);
>>>>>>>>  }
>>>>>>>>  
>>>>>>>>  static void virtio_cleanup(struct hwrng *rng)
>>>>>>>>
>>>>>>>
>>>>>>> Reviewed-by: Laurent Vivier <lvivier@redhat.com>
>>>>>>
>>>>>> Laurent didn't we agree the real fix is private buffers in the
>>>>>> driver,
>>>>>> and copying out from there?
>>>>>>
>>>>>
>>>>> Can we perhaps proceed with this for now? AFAICS the private
>>>>> buffer
>>>>> implementation would be a larger effort, while we have the issues
>>>>> with
>>>>> nested VMs getting no entropy today.
>>>>>
>>>>
>>>> I agree. I think it's important to have a simple and quick fix for
>>>> the
>>>> problem reported by Martin.
>>>>
>>>> We need the private buffers but not sure how long it will take to
>>>> have
>>>> them included in the kernel and how many new bugs will be
>>>> introduced
>>>> doing that as the code is hard to understand and the core is shared
>>>> with
>>>> several other hardware backends that can be impacted by the changes
>>>> needed.
>>>>
>>>> Thanks,
>>>> Laurent
>>>
>>> However I am not sure with the patch applies we never return
>>> the same buffer to userspace twice, e.g. if one is
>>> non blocking another blocking. Doing that would be a bug.
>>>
>>
>> As Laurent mentioned in 
>> https://lists.gnu.org/archive/html/qemu-devel/2020-08/msg02039.html,
>> there are only 2 different buffers that may be passed to virtio_read(),
>> rng_buffer and rng_fillbuf.
>> The latter is only used in blocking mode.
>>
>> AFAICS there's just one problematic situation: 
>>
>>  1 a user space process reads random data without blocking and runs
>> register_buffer(), gets no data, releases reading_mutex
>>  2 the hwrng kthread grabs the mutex and makes a sync call, vi->busy is
>> still set, so no new completion is initialized.
>>  3 hwrng calls wait_for_completion_killable() and sees the completion
>>    that had been initialized by the user space process previously,
>>  4 hwrng "thinks" it got some positive randomness, but random data have
>>    actually been written into rng_buffer, not rng_fillbuff.
>>
>> This is indeed bad, but it can happen with the current code as well.
>> Actually, it's more likely to happen with the current code, because
>> asynchronous callers might hang forever trying to get entropy,
>> making this scenario more likely (if there's a process, like nested
>> qemu, that would keep calling . So this wouldn't be a regression caused
>> by my patch, AFAICT.
>>
>> How can we avoid this problem entirely? A) With private buffers, of
>> course. B) Another, a bit hackish, approach would be to remember the
>> active "buffer" pointer in virtio_rng, and restart the IO when a
>> another buffer is passed down. C) Finally, we could modify
>> virtio_read() such that blocking calls always re-initialize the buffer;
>> they'd then have to wait for a potential already running IO from a
>> previous, non-blocking access to finish first.
>>
>> But I believe this is something which could (and should) be done
>> independently. Alternatively, I could add B) or C). A) I'd rather leave
>> to Laurent.
>>
>> Regards,
>> Martin
> 
> Of the simple solutions, C seems cleanest.
> Laurent, any interest in working on A meanwhile?
> 

Sorry, I didn't see your email.

I have no time to work on this for the moment. But virtio-rng fixes are on top of my TODO
list...

Thanks,
Laurent
Claudio Fontana Dec. 14, 2022, 1:41 p.m. UTC | #10
On 11/26/20 11:49, Laurent Vivier wrote:
> On 25/11/2020 10:39, Michael S. Tsirkin wrote:
>> On Tue, Sep 08, 2020 at 05:33:40PM +0200, Martin Wilck wrote:
>>> On Tue, 2020-09-08 at 10:14 -0400, Michael S. Tsirkin wrote:
>>>> On Mon, Aug 31, 2020 at 02:37:26PM +0200, Laurent Vivier wrote:
>>>>> On 28/08/2020 23:34, Martin Wilck wrote:
>>>>>> On Wed, 2020-08-26 at 08:26 -0400, Michael S. Tsirkin wrote:
>>>>>>> On Tue, Aug 11, 2020 at 04:42:32PM +0200, Laurent Vivier wrote:
>>>>>>>> On 11/08/2020 16:28, mwilck@suse.com wrote:
>>>>>>>>> From: Martin Wilck <mwilck@suse.com>
>>>>>>>>>
>>>>>>>>> If a program opens /dev/hwrng with O_NONBLOCK and uses
>>>>>>>>> poll() and
>>>>>>>>> non-blocking read() to retrieve random data, it ends up in
>>>>>>>>> a
>>>>>>>>> tight
>>>>>>>>> loop with poll() always returning POLLIN and read()
>>>>>>>>> returning
>>>>>>>>> EAGAIN.
>>>>>>>>> This repeats forever until some process makes a blocking
>>>>>>>>> read()
>>>>>>>>> call.
>>>>>>>>> The reason is that virtio_read() always returns 0 in non-
>>>>>>>>> blocking 
>>>>>>>>> mode,
>>>>>>>>> even if data is available. Worse, it fetches random data
>>>>>>>>> from the
>>>>>>>>> hypervisor after every non-blocking call, without ever
>>>>>>>>> using this
>>>>>>>>> data.
>>>>>>>>>
>>>>>>>>> The following test program illustrates the behavior and can
>>>>>>>>> be
>>>>>>>>> used
>>>>>>>>> for testing and experiments. The problem will only be seen
>>>>>>>>> if all
>>>>>>>>> tasks use non-blocking access; otherwise the blocking reads
>>>>>>>>> will
>>>>>>>>> "recharge" the random pool and cause other, non-blocking
>>>>>>>>> reads to
>>>>>>>>> succeed at least sometimes.
>>>>>>>>>
>>>>>>>>> /* Whether to use non-blocking mode in a task, problem
>>>>>>>>> occurs if
>>>>>>>>> CONDITION is 1 */
>>>>>>>>> //#define CONDITION (getpid() % 2 != 0)
>>>>>>>>>
>>>>>>>>> static volatile sig_atomic_t stop;
>>>>>>>>> static void handler(int sig __attribute__((unused))) { stop
>>>>>>>>> = 1;
>>>>>>>>> }
>>>>>>>>>
>>>>>>>>> static void loop(int fd, int sec)
>>>>>>>>> {
>>>>>>>>> 	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
>>>>>>>>> 	unsigned long errors = 0, eagains = 0, bytes = 0, succ
>>>>>>>>> = 0;
>>>>>>>>> 	int size, rc, rd;
>>>>>>>>>
>>>>>>>>> 	srandom(getpid());
>>>>>>>>> 	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL)
>>>>>>>>> |
>>>>>>>>> O_NONBLOCK) == -1)
>>>>>>>>> 		perror("fcntl");
>>>>>>>>> 	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ +
>>>>>>>>> 1);
>>>>>>>>>
>>>>>>>>> 	for(;;) {
>>>>>>>>> 		char buf[size];
>>>>>>>>>
>>>>>>>>> 		if (stop)
>>>>>>>>> 			break;
>>>>>>>>> 		rc = poll(&pfd, 1, sec);
>>>>>>>>> 		if (rc > 0) {
>>>>>>>>> 			rd = read(fd, buf, sizeof(buf));
>>>>>>>>> 			if (rd == -1 && errno == EAGAIN)
>>>>>>>>> 				eagains++;
>>>>>>>>> 			else if (rd == -1)
>>>>>>>>> 				errors++;
>>>>>>>>> 			else {
>>>>>>>>> 				succ++;
>>>>>>>>> 				bytes += rd;
>>>>>>>>> 				write(1, buf, sizeof(buf));
>>>>>>>>> 			}
>>>>>>>>> 		} else if (rc == -1) {
>>>>>>>>> 			if (errno != EINTR)
>>>>>>>>> 				perror("poll");
>>>>>>>>> 			break;
>>>>>>>>> 		} else
>>>>>>>>> 			fprintf(stderr, "poll: timeout\n");
>>>>>>>>> 	}
>>>>>>>>> 	fprintf(stderr,
>>>>>>>>> 		"pid %d %sblocking, bufsize %d, %d seconds, %lu
>>>>>>>>> bytes
>>>>>>>>> read, %lu success, %lu eagain, %lu errors\n",
>>>>>>>>> 		getpid(), CONDITION ? "non-" : "", size, sec,
>>>>>>>>> bytes,
>>>>>>>>> succ, eagains, errors);
>>>>>>>>> }
>>>>>>>>>
>>>>>>>>> int main(void)
>>>>>>>>> {
>>>>>>>>> 	int fd;
>>>>>>>>>
>>>>>>>>> 	fork(); fork();
>>>>>>>>> 	fd = open("/dev/hwrng", O_RDONLY);
>>>>>>>>> 	if (fd == -1) {
>>>>>>>>> 		perror("open");
>>>>>>>>> 		return 1;
>>>>>>>>> 	};
>>>>>>>>> 	signal(SIGALRM, handler);
>>>>>>>>> 	alarm(SECONDS);
>>>>>>>>> 	loop(fd, SECONDS);
>>>>>>>>> 	close(fd);
>>>>>>>>> 	wait(NULL);
>>>>>>>>> 	return 0;
>>>>>>>>> }
>>>>>>>>>
>>>>>>>>> void loop(int fd)
>>>>>>>>> {
>>>>>>>>>         struct pollfd pfd0 = { .fd = fd, .events  = POLLIN,
>>>>>>>>> };
>>>>>>>>>         int rc;
>>>>>>>>>         unsigned int n;
>>>>>>>>>
>>>>>>>>>         for (n = LOOPS; n > 0; n--) {
>>>>>>>>>                 struct pollfd pfd = pfd0;
>>>>>>>>>                 char buf[SIZE];
>>>>>>>>>
>>>>>>>>>                 rc = poll(&pfd, 1, 1);
>>>>>>>>>                 if (rc > 0) {
>>>>>>>>>                         int rd = read(fd, buf,
>>>>>>>>> sizeof(buf));
>>>>>>>>>
>>>>>>>>>                         if (rd == -1)
>>>>>>>>>                                 perror("read");
>>>>>>>>>                         else
>>>>>>>>>                                 printf("read %d bytes\n",
>>>>>>>>> rd);
>>>>>>>>>                 } else if (rc == -1)
>>>>>>>>>                         perror("poll");
>>>>>>>>>                 else
>>>>>>>>>                         fprintf(stderr, "timeout\n");
>>>>>>>>>
>>>>>>>>>         }
>>>>>>>>> }
>>>>>>>>>
>>>>>>>>> int main(void)
>>>>>>>>> {
>>>>>>>>>         int fd;
>>>>>>>>>
>>>>>>>>>         fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
>>>>>>>>>         if (fd == -1) {
>>>>>>>>>                 perror("open");
>>>>>>>>>                 return 1;
>>>>>>>>>         };
>>>>>>>>>         loop(fd);
>>>>>>>>>         close(fd);
>>>>>>>>>         return 0;
>>>>>>>>> }
>>>>>>>>>
>>>>>>>>> This can be observed in the real word e.g. with nested
>>>>>>>>> qemu/KVM
>>>>>>>>> virtual
>>>>>>>>> machines, if both the "outer" and "inner" VMs have a
>>>>>>>>> virtio-rng
>>>>>>>>> device.
>>>>>>>>> If the "inner" VM requests random data, qemu running in the
>>>>>>>>> "outer" VM
>>>>>>>>> uses this device in a non-blocking manner like the test
>>>>>>>>> program
>>>>>>>>> above.
>>>>>>>>>
>>>>>>>>> Fix it by returning available data if a previous hypervisor
>>>>>>>>> call
>>>>>>>>> has
>>>>>>>>> completed. I tested this patch with the program above, and
>>>>>>>>> with
>>>>>>>>> rng-tools.
>>>>>>>>>
>>>>>>>>> v2 -> v3: Simplified the implementation as suggested by
>>>>>>>>> Laurent
>>>>>>>>> Vivier
>>>>>>>>>
>>>>>>>>> Signed-off-by: Martin Wilck <mwilck@suse.com>
>>>>>>>>> ---
>>>>>>>>>  drivers/char/hw_random/virtio-rng.c | 4 ++--
>>>>>>>>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/char/hw_random/virtio-rng.c
>>>>>>>>> b/drivers/char/hw_random/virtio-rng.c
>>>>>>>>> index a90001e02bf7..8eaeceecb41e 100644
>>>>>>>>> --- a/drivers/char/hw_random/virtio-rng.c
>>>>>>>>> +++ b/drivers/char/hw_random/virtio-rng.c
>>>>>>>>> @@ -65,7 +65,7 @@ static int virtio_read(struct hwrng *rng,
>>>>>>>>> void
>>>>>>>>> *buf, size_t size, bool wait)
>>>>>>>>>  		register_buffer(vi, buf, size);
>>>>>>>>>  	}
>>>>>>>>>  
>>>>>>>>> -	if (!wait)
>>>>>>>>> +	if (!wait && !completion_done(&vi->have_data))
>>>>>>>>>  		return 0;
>>>>>>>>>  
>>>>>>>>>  	ret = wait_for_completion_killable(&vi->have_data);
>>>>>>>>> @@ -74,7 +74,7 @@ static int virtio_read(struct hwrng *rng,
>>>>>>>>> void
>>>>>>>>> *buf, size_t size, bool wait)
>>>>>>>>>  
>>>>>>>>>  	vi->busy = false;
>>>>>>>>>  
>>>>>>>>> -	return vi->data_avail;
>>>>>>>>> +	return min_t(size_t, size, vi->data_avail);
>>>>>>>>>  }
>>>>>>>>>  
>>>>>>>>>  static void virtio_cleanup(struct hwrng *rng)
>>>>>>>>>
>>>>>>>>
>>>>>>>> Reviewed-by: Laurent Vivier <lvivier@redhat.com>
>>>>>>>
>>>>>>> Laurent didn't we agree the real fix is private buffers in the
>>>>>>> driver,
>>>>>>> and copying out from there?
>>>>>>>
>>>>>>
>>>>>> Can we perhaps proceed with this for now? AFAICS the private
>>>>>> buffer
>>>>>> implementation would be a larger effort, while we have the issues
>>>>>> with
>>>>>> nested VMs getting no entropy today.
>>>>>>
>>>>>
>>>>> I agree. I think it's important to have a simple and quick fix for
>>>>> the
>>>>> problem reported by Martin.
>>>>>
>>>>> We need the private buffers but not sure how long it will take to
>>>>> have
>>>>> them included in the kernel and how many new bugs will be
>>>>> introduced
>>>>> doing that as the code is hard to understand and the core is shared
>>>>> with
>>>>> several other hardware backends that can be impacted by the changes
>>>>> needed.
>>>>>
>>>>> Thanks,
>>>>> Laurent
>>>>
>>>> However I am not sure with the patch applies we never return
>>>> the same buffer to userspace twice, e.g. if one is
>>>> non blocking another blocking. Doing that would be a bug.
>>>>
>>>
>>> As Laurent mentioned in 
>>> https://lists.gnu.org/archive/html/qemu-devel/2020-08/msg02039.html,
>>> there are only 2 different buffers that may be passed to virtio_read(),
>>> rng_buffer and rng_fillbuf.
>>> The latter is only used in blocking mode.
>>>
>>> AFAICS there's just one problematic situation: 
>>>
>>>  1 a user space process reads random data without blocking and runs
>>> register_buffer(), gets no data, releases reading_mutex
>>>  2 the hwrng kthread grabs the mutex and makes a sync call, vi->busy is
>>> still set, so no new completion is initialized.
>>>  3 hwrng calls wait_for_completion_killable() and sees the completion
>>>    that had been initialized by the user space process previously,
>>>  4 hwrng "thinks" it got some positive randomness, but random data have
>>>    actually been written into rng_buffer, not rng_fillbuff.
>>>
>>> This is indeed bad, but it can happen with the current code as well.
>>> Actually, it's more likely to happen with the current code, because
>>> asynchronous callers might hang forever trying to get entropy,
>>> making this scenario more likely (if there's a process, like nested
>>> qemu, that would keep calling . So this wouldn't be a regression caused
>>> by my patch, AFAICT.
>>>
>>> How can we avoid this problem entirely? A) With private buffers, of
>>> course. B) Another, a bit hackish, approach would be to remember the
>>> active "buffer" pointer in virtio_rng, and restart the IO when a
>>> another buffer is passed down. C) Finally, we could modify
>>> virtio_read() such that blocking calls always re-initialize the buffer;
>>> they'd then have to wait for a potential already running IO from a
>>> previous, non-blocking access to finish first.
>>>
>>> But I believe this is something which could (and should) be done
>>> independently. Alternatively, I could add B) or C). A) I'd rather leave
>>> to Laurent.
>>>
>>> Regards,
>>> Martin
>>
>> Of the simple solutions, C seems cleanest.
>> Laurent, any interest in working on A meanwhile?
>>
> 
> Sorry, I didn't see your email.
> 
> I have no time to work on this for the moment. But virtio-rng fixes are on top of my TODO
> list...
> 
> Thanks,
> Laurent
> 
> 

Hi Laurent, Martin,

is this resolved now?

I wonder if this is covered by Laurent's kernel commit:

commit 5c8e933050044d6dd2a000f9a5756ae73cbe7c44
Author: Laurent Vivier <lvivier@redhat.com>
Date:   Thu Oct 28 12:11:10 2021 +0200

    hwrng: virtio - don't waste entropy
    
    if we don't use all the entropy available in the buffer, keep it
    and use it later.
    
    Signed-off-by: Laurent Vivier <lvivier@redhat.com>
    Link: https://lore.kernel.org/r/20211028101111.128049-4-lvivier@redhat.com
    Signed-off-by: Michael S. Tsirkin <mst@redhat.com>


?

Thanks,

Claudio
Martin Wilck Dec. 14, 2022, 3:36 p.m. UTC | #11
On Wed, 2022-12-14 at 14:41 +0100, Claudio Fontana wrote:
> On 11/26/20 11:49, Laurent Vivier wrote:
> 
> 
> Hi Laurent, Martin,
> 
> is this resolved now?
> 
> I wonder if this is covered by Laurent's kernel commit:
> 
> commit 5c8e933050044d6dd2a000f9a5756ae73cbe7c44
> Author: Laurent Vivier <lvivier@redhat.com>
> Date:   Thu Oct 28 12:11:10 2021 +0200
> 
>     hwrng: virtio - don't waste entropy
>     
>     if we don't use all the entropy available in the buffer, keep it
>     and use it later.
>     
>     Signed-off-by: Laurent Vivier <lvivier@redhat.com>
>     Link:
> https://lore.kernel.org/r/20211028101111.128049-4-lvivier@redhat.com
>     Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> 
> 

Hi Claudio,

sorry, I haven't had the time to look into this in more depth.
I don't know what the current situation is.

Martin
diff mbox series

Patch

diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index a90001e02bf7..8eaeceecb41e 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -65,7 +65,7 @@  static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
 		register_buffer(vi, buf, size);
 	}
 
-	if (!wait)
+	if (!wait && !completion_done(&vi->have_data))
 		return 0;
 
 	ret = wait_for_completion_killable(&vi->have_data);
@@ -74,7 +74,7 @@  static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
 
 	vi->busy = false;
 
-	return vi->data_avail;
+	return min_t(size_t, size, vi->data_avail);
 }
 
 static void virtio_cleanup(struct hwrng *rng)