diff mbox

virtio leaks cpu mappings, was: qemu crash with virtio on Xen domUs (backtrace included)

Message ID alpine.DEB.2.02.1411241816040.2675@kaball.uk.xensource.com
State New
Headers show

Commit Message

Stefano Stabellini Nov. 24, 2014, 6:44 p.m. UTC
On Mon, 24 Nov 2014, Stefano Stabellini wrote:
> On Mon, 24 Nov 2014, Stefano Stabellini wrote:
> > CC'ing Paolo.
> > 
> > 
> > Wen,
> > thanks for the logs.
> > 
> > I investigated a little bit and it seems to me that the bug occurs when
> > QEMU tries to unmap only a portion of a memory region previously mapped.
> > That doesn't work with xen-mapcache.
> > 
> > See these logs for example:
> > 
> > DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb68 len=0xa
> > DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
> 
> Sorry the logs don't quite match, it was supposed to be:
> 
> DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb64 len=0xa
> DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6

It looks like the problem is caused by iov_discard_front, called by
virtio_net_handle_ctrl. By changing iov_base after the sg has already
been mapped (cpu_physical_memory_map), it causes a leak in the mapping
because the corresponding cpu_physical_memory_unmap will only unmap a
portion of the original sg.  On Xen the problem is worse because
xen-mapcache aborts.

Comments

Konrad Rzeszutek Wilk Nov. 24, 2014, 6:52 p.m. UTC | #1
On Mon, Nov 24, 2014 at 06:44:45PM +0000, Stefano Stabellini wrote:
> On Mon, 24 Nov 2014, Stefano Stabellini wrote:
> > On Mon, 24 Nov 2014, Stefano Stabellini wrote:
> > > CC'ing Paolo.
> > > 
> > > 
> > > Wen,
> > > thanks for the logs.
> > > 
> > > I investigated a little bit and it seems to me that the bug occurs when
> > > QEMU tries to unmap only a portion of a memory region previously mapped.
> > > That doesn't work with xen-mapcache.
> > > 
> > > See these logs for example:
> > > 
> > > DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb68 len=0xa
> > > DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
> > 
> > Sorry the logs don't quite match, it was supposed to be:
> > 
> > DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb64 len=0xa
> > DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
> 
> It looks like the problem is caused by iov_discard_front, called by
> virtio_net_handle_ctrl. By changing iov_base after the sg has already
> been mapped (cpu_physical_memory_map), it causes a leak in the mapping
> because the corresponding cpu_physical_memory_unmap will only unmap a
> portion of the original sg.  On Xen the problem is worse because
> xen-mapcache aborts.

Didn't um Andy post patches for ths:

http://lists.xen.org/archives/html/xen-devel/2014-09/msg02864.html

> 
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 2ac6ce5..b2b5c2d 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -775,7 +775,7 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>      struct iovec *iov;
>      unsigned int iov_cnt;
>  
> -    while (virtqueue_pop(vq, &elem)) {
> +    while (virtqueue_pop_nomap(vq, &elem)) {
>          if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
>              iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
>              error_report("virtio-net ctrl missing headers");
> @@ -784,8 +784,12 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>  
>          iov = elem.out_sg;
>          iov_cnt = elem.out_num;
> -        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
>          iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
> +
> +        virtqueue_map_sg(elem.in_sg, elem.in_addr, elem.in_num, 1);
> +        virtqueue_map_sg(elem.out_sg, elem.out_addr, elem.out_num, 0);
> +
> +        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
>          if (s != sizeof(ctrl)) {
>              status = VIRTIO_NET_ERR;
>          } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 3e4b70c..6a4bd3a 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -446,7 +446,7 @@ void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
>      }
>  }
>  
> -int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> +int virtqueue_pop_nomap(VirtQueue *vq, VirtQueueElement *elem)
>  {
>      unsigned int i, head, max;
>      hwaddr desc_pa = vq->vring.desc;
> @@ -505,9 +505,6 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
>          }
>      } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
>  
> -    /* Now map what we have collected */
> -    virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
> -    virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
>  
>      elem->index = head;
>  
> @@ -517,6 +514,16 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
>      return elem->in_num + elem->out_num;
>  }
>  
> +int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> +{
> +    int rc = virtqueue_pop_nomap(vq, elem);
> +    if (rc > 0) {
> +        virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
> +        virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
> +    }
> +    return rc;
> +}
> +
>  /* virtio device */
>  static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
>  {
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 3e54e90..40a3977 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -174,6 +174,7 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
>  void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
>      size_t num_sg, int is_write);
>  int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
> +int virtqueue_pop_nomap(VirtQueue *vq, VirtQueueElement *elem);
>  int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
>                            unsigned int out_bytes);
>  void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
Stefano Stabellini Nov. 24, 2014, 7:01 p.m. UTC | #2
On Mon, 24 Nov 2014, Konrad Rzeszutek Wilk wrote:
> On Mon, Nov 24, 2014 at 06:44:45PM +0000, Stefano Stabellini wrote:
> > On Mon, 24 Nov 2014, Stefano Stabellini wrote:
> > > On Mon, 24 Nov 2014, Stefano Stabellini wrote:
> > > > CC'ing Paolo.
> > > > 
> > > > 
> > > > Wen,
> > > > thanks for the logs.
> > > > 
> > > > I investigated a little bit and it seems to me that the bug occurs when
> > > > QEMU tries to unmap only a portion of a memory region previously mapped.
> > > > That doesn't work with xen-mapcache.
> > > > 
> > > > See these logs for example:
> > > > 
> > > > DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb68 len=0xa
> > > > DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
> > > 
> > > Sorry the logs don't quite match, it was supposed to be:
> > > 
> > > DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb64 len=0xa
> > > DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
> > 
> > It looks like the problem is caused by iov_discard_front, called by
> > virtio_net_handle_ctrl. By changing iov_base after the sg has already
> > been mapped (cpu_physical_memory_map), it causes a leak in the mapping
> > because the corresponding cpu_physical_memory_unmap will only unmap a
> > portion of the original sg.  On Xen the problem is worse because
> > xen-mapcache aborts.
> 
> Didn't um Andy post patches for ths:
> 
> http://lists.xen.org/archives/html/xen-devel/2014-09/msg02864.html

It looks like these are fixing a linux side issue (the frontend) while
this patch is for a bug in QEMU (the backend).


> > 
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index 2ac6ce5..b2b5c2d 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -775,7 +775,7 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
> >      struct iovec *iov;
> >      unsigned int iov_cnt;
> >  
> > -    while (virtqueue_pop(vq, &elem)) {
> > +    while (virtqueue_pop_nomap(vq, &elem)) {
> >          if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
> >              iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
> >              error_report("virtio-net ctrl missing headers");
> > @@ -784,8 +784,12 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
> >  
> >          iov = elem.out_sg;
> >          iov_cnt = elem.out_num;
> > -        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
> >          iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
> > +
> > +        virtqueue_map_sg(elem.in_sg, elem.in_addr, elem.in_num, 1);
> > +        virtqueue_map_sg(elem.out_sg, elem.out_addr, elem.out_num, 0);
> > +
> > +        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
> >          if (s != sizeof(ctrl)) {
> >              status = VIRTIO_NET_ERR;
> >          } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
> > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > index 3e4b70c..6a4bd3a 100644
> > --- a/hw/virtio/virtio.c
> > +++ b/hw/virtio/virtio.c
> > @@ -446,7 +446,7 @@ void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
> >      }
> >  }
> >  
> > -int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> > +int virtqueue_pop_nomap(VirtQueue *vq, VirtQueueElement *elem)
> >  {
> >      unsigned int i, head, max;
> >      hwaddr desc_pa = vq->vring.desc;
> > @@ -505,9 +505,6 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> >          }
> >      } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
> >  
> > -    /* Now map what we have collected */
> > -    virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
> > -    virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
> >  
> >      elem->index = head;
> >  
> > @@ -517,6 +514,16 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> >      return elem->in_num + elem->out_num;
> >  }
> >  
> > +int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> > +{
> > +    int rc = virtqueue_pop_nomap(vq, elem);
> > +    if (rc > 0) {
> > +        virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
> > +        virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
> > +    }
> > +    return rc;
> > +}
> > +
> >  /* virtio device */
> >  static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
> >  {
> > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > index 3e54e90..40a3977 100644
> > --- a/include/hw/virtio/virtio.h
> > +++ b/include/hw/virtio/virtio.h
> > @@ -174,6 +174,7 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
> >  void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
> >      size_t num_sg, int is_write);
> >  int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
> > +int virtqueue_pop_nomap(VirtQueue *vq, VirtQueueElement *elem);
> >  int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
> >                            unsigned int out_bytes);
> >  void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
> > 
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xen.org
> > http://lists.xen.org/xen-devel
>
Wen Congyang Nov. 25, 2014, 1:32 a.m. UTC | #3
On 11/25/2014 02:44 AM, Stefano Stabellini wrote:
> On Mon, 24 Nov 2014, Stefano Stabellini wrote:
>> On Mon, 24 Nov 2014, Stefano Stabellini wrote:
>>> CC'ing Paolo.
>>>
>>>
>>> Wen,
>>> thanks for the logs.
>>>
>>> I investigated a little bit and it seems to me that the bug occurs when
>>> QEMU tries to unmap only a portion of a memory region previously mapped.
>>> That doesn't work with xen-mapcache.
>>>
>>> See these logs for example:
>>>
>>> DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb68 len=0xa
>>> DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
>>
>> Sorry the logs don't quite match, it was supposed to be:
>>
>> DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb64 len=0xa
>> DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
> 
> It looks like the problem is caused by iov_discard_front, called by
> virtio_net_handle_ctrl. By changing iov_base after the sg has already
> been mapped (cpu_physical_memory_map), it causes a leak in the mapping
> because the corresponding cpu_physical_memory_unmap will only unmap a
> portion of the original sg.  On Xen the problem is worse because
> xen-mapcache aborts.

This patch works for me.

Thanks
Wen Congyang

> 
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 2ac6ce5..b2b5c2d 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -775,7 +775,7 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>      struct iovec *iov;
>      unsigned int iov_cnt;
>  
> -    while (virtqueue_pop(vq, &elem)) {
> +    while (virtqueue_pop_nomap(vq, &elem)) {
>          if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
>              iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
>              error_report("virtio-net ctrl missing headers");
> @@ -784,8 +784,12 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>  
>          iov = elem.out_sg;
>          iov_cnt = elem.out_num;
> -        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
>          iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
> +
> +        virtqueue_map_sg(elem.in_sg, elem.in_addr, elem.in_num, 1);
> +        virtqueue_map_sg(elem.out_sg, elem.out_addr, elem.out_num, 0);
> +
> +        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
>          if (s != sizeof(ctrl)) {
>              status = VIRTIO_NET_ERR;
>          } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 3e4b70c..6a4bd3a 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -446,7 +446,7 @@ void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
>      }
>  }
>  
> -int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> +int virtqueue_pop_nomap(VirtQueue *vq, VirtQueueElement *elem)
>  {
>      unsigned int i, head, max;
>      hwaddr desc_pa = vq->vring.desc;
> @@ -505,9 +505,6 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
>          }
>      } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
>  
> -    /* Now map what we have collected */
> -    virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
> -    virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
>  
>      elem->index = head;
>  
> @@ -517,6 +514,16 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
>      return elem->in_num + elem->out_num;
>  }
>  
> +int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> +{
> +    int rc = virtqueue_pop_nomap(vq, elem);
> +    if (rc > 0) {
> +        virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
> +        virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
> +    }
> +    return rc;
> +}
> +
>  /* virtio device */
>  static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
>  {
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 3e54e90..40a3977 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -174,6 +174,7 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
>  void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
>      size_t num_sg, int is_write);
>  int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
> +int virtqueue_pop_nomap(VirtQueue *vq, VirtQueueElement *elem);
>  int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
>                            unsigned int out_bytes);
>  void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
> .
>
Jason Wang Nov. 25, 2014, 6:16 a.m. UTC | #4
On 11/25/2014 02:44 AM, Stefano Stabellini wrote:
> On Mon, 24 Nov 2014, Stefano Stabellini wrote:
>> On Mon, 24 Nov 2014, Stefano Stabellini wrote:
>>> CC'ing Paolo.
>>>
>>>
>>> Wen,
>>> thanks for the logs.
>>>
>>> I investigated a little bit and it seems to me that the bug occurs when
>>> QEMU tries to unmap only a portion of a memory region previously mapped.
>>> That doesn't work with xen-mapcache.
>>>
>>> See these logs for example:
>>>
>>> DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb68 len=0xa
>>> DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
>> Sorry the logs don't quite match, it was supposed to be:
>>
>> DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb64 len=0xa
>> DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
> It looks like the problem is caused by iov_discard_front, called by
> virtio_net_handle_ctrl. By changing iov_base after the sg has already
> been mapped (cpu_physical_memory_map), it causes a leak in the mapping
> because the corresponding cpu_physical_memory_unmap will only unmap a
> portion of the original sg.  On Xen the problem is worse because
> xen-mapcache aborts.
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 2ac6ce5..b2b5c2d 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -775,7 +775,7 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>      struct iovec *iov;
>      unsigned int iov_cnt;
>  
> -    while (virtqueue_pop(vq, &elem)) {
> +    while (virtqueue_pop_nomap(vq, &elem)) {
>          if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
>              iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
>              error_report("virtio-net ctrl missing headers");
> @@ -784,8 +784,12 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>  
>          iov = elem.out_sg;
>          iov_cnt = elem.out_num;
> -        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
>          iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
> +
> +        virtqueue_map_sg(elem.in_sg, elem.in_addr, elem.in_num, 1);
> +        virtqueue_map_sg(elem.out_sg, elem.out_addr, elem.out_num, 0);
> +
> +        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));

Does this really work? The code in fact skips the location that contains
virtio_net_ctrl_hdr. And virtio_net_handle_mac() still calls
iov_discard_front().

How about copy iov to a temp variable and use it in this function?
Stefano Stabellini Nov. 25, 2014, 1:53 p.m. UTC | #5
On Tue, 25 Nov 2014, Jason Wang wrote:
> On 11/25/2014 02:44 AM, Stefano Stabellini wrote:
> > On Mon, 24 Nov 2014, Stefano Stabellini wrote:
> >> On Mon, 24 Nov 2014, Stefano Stabellini wrote:
> >>> CC'ing Paolo.
> >>>
> >>>
> >>> Wen,
> >>> thanks for the logs.
> >>>
> >>> I investigated a little bit and it seems to me that the bug occurs when
> >>> QEMU tries to unmap only a portion of a memory region previously mapped.
> >>> That doesn't work with xen-mapcache.
> >>>
> >>> See these logs for example:
> >>>
> >>> DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb68 len=0xa
> >>> DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
> >> Sorry the logs don't quite match, it was supposed to be:
> >>
> >> DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb64 len=0xa
> >> DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
> > It looks like the problem is caused by iov_discard_front, called by
> > virtio_net_handle_ctrl. By changing iov_base after the sg has already
> > been mapped (cpu_physical_memory_map), it causes a leak in the mapping
> > because the corresponding cpu_physical_memory_unmap will only unmap a
> > portion of the original sg.  On Xen the problem is worse because
> > xen-mapcache aborts.
> >
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index 2ac6ce5..b2b5c2d 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -775,7 +775,7 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
> >      struct iovec *iov;
> >      unsigned int iov_cnt;
> >  
> > -    while (virtqueue_pop(vq, &elem)) {
> > +    while (virtqueue_pop_nomap(vq, &elem)) {
> >          if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
> >              iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
> >              error_report("virtio-net ctrl missing headers");
> > @@ -784,8 +784,12 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
> >  
> >          iov = elem.out_sg;
> >          iov_cnt = elem.out_num;
> > -        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
> >          iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
> > +
> > +        virtqueue_map_sg(elem.in_sg, elem.in_addr, elem.in_num, 1);
> > +        virtqueue_map_sg(elem.out_sg, elem.out_addr, elem.out_num, 0);
> > +
> > +        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
> 
> Does this really work?

It seems to work here, as in it doesn't crash QEMU and I am able to boot
a guest with network. I didn't try any MAC related commands.


> The code in fact skips the location that contains
> virtio_net_ctrl_hdr. And virtio_net_handle_mac() still calls
> iov_discard_front().
>
> How about copy iov to a temp variable and use it in this function?

That would only work if I moved the cpu_physical_memory_unmap call
outside of virtqueue_fill, so that we can pass different iov to them.
We need to unmap the same iov that was previously mapped by
virtqueue_pop.
diff mbox

Patch

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 2ac6ce5..b2b5c2d 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -775,7 +775,7 @@  static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
     struct iovec *iov;
     unsigned int iov_cnt;
 
-    while (virtqueue_pop(vq, &elem)) {
+    while (virtqueue_pop_nomap(vq, &elem)) {
         if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
             iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
             error_report("virtio-net ctrl missing headers");
@@ -784,8 +784,12 @@  static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 
         iov = elem.out_sg;
         iov_cnt = elem.out_num;
-        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
+
+        virtqueue_map_sg(elem.in_sg, elem.in_addr, elem.in_num, 1);
+        virtqueue_map_sg(elem.out_sg, elem.out_addr, elem.out_num, 0);
+
+        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
         if (s != sizeof(ctrl)) {
             status = VIRTIO_NET_ERR;
         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 3e4b70c..6a4bd3a 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -446,7 +446,7 @@  void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
     }
 }
 
-int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
+int virtqueue_pop_nomap(VirtQueue *vq, VirtQueueElement *elem)
 {
     unsigned int i, head, max;
     hwaddr desc_pa = vq->vring.desc;
@@ -505,9 +505,6 @@  int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
         }
     } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
 
-    /* Now map what we have collected */
-    virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
-    virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
 
     elem->index = head;
 
@@ -517,6 +514,16 @@  int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
     return elem->in_num + elem->out_num;
 }
 
+int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
+{
+    int rc = virtqueue_pop_nomap(vq, elem);
+    if (rc > 0) {
+        virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
+        virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
+    }
+    return rc;
+}
+
 /* virtio device */
 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
 {
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 3e54e90..40a3977 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -174,6 +174,7 @@  void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
     size_t num_sg, int is_write);
 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
+int virtqueue_pop_nomap(VirtQueue *vq, VirtQueueElement *elem);
 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
                           unsigned int out_bytes);
 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,