diff mbox

[V4] virtio-net: send gratuitous packet when needed

Message ID 20120313090841.11110.82654.stgit@amd-6168-8-1.englab.nay.redhat.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Jason Wang March 13, 2012, 9:08 a.m. UTC
As hypervior does not have the knowledge of guest network configuration, it's
better to ask guest to send gratuitous packet when needed.

Guest test VIRTIO_NET_S_ANNOUNCE bit during config change interrupt and when it
is set, a workqueue is scheduled to send gratuitous packet through
NETDEV_NOTIFY_PEERS. This feature is negotiated through bit
VIRTIO_NET_F_GUEST_ANNOUNCE.

Changes from v3:
- cancel the workqueue during freeze

Changes from v2:
- fix the race between unregister_dev() and workqueue

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c   |   31 ++++++++++++++++++++++++++++++-
 include/linux/virtio_net.h |    2 ++
 2 files changed, 32 insertions(+), 1 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Michael S. Tsirkin March 13, 2012, 2:33 p.m. UTC | #1
On Tue, Mar 13, 2012 at 05:08:41PM +0800, Jason Wang wrote:
> As hypervior does not have the knowledge of guest network configuration, it's
> better to ask guest to send gratuitous packet when needed.

packet -> packets
> 
> Guest test VIRTIO_NET_S_ANNOUNCE bit during config change interrupt and when it

test -> tests

> is set, a workqueue is scheduled to send gratuitous packet through
> NETDEV_NOTIFY_PEERS. This feature is negotiated through bit
> VIRTIO_NET_F_GUEST_ANNOUNCE.
> 
> Changes from v3:
> - cancel the workqueue during freeze
> 
> Changes from v2:
> - fix the race between unregister_dev() and workqueue
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  drivers/net/virtio_net.c   |   31 ++++++++++++++++++++++++++++++-
>  include/linux/virtio_net.h |    2 ++
>  2 files changed, 32 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 4880aa8..45f7ac6 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -72,6 +72,9 @@ struct virtnet_info {
>  	/* Work struct for refilling if we run low on memory. */
>  	struct delayed_work refill;
>  
> +	/* Work struct for sending gratituous packet. */

packets

> +	struct work_struct announce;
> +
>  	/* Chain pages by the private ptr. */
>  	struct page *pages;
>  
> @@ -512,6 +515,13 @@ static void refill_work(struct work_struct *work)
>  		queue_delayed_work(system_nrt_wq, &vi->refill, HZ/2);
>  }
>  
> +static void announce_work(struct work_struct *work)
> +{
> +	struct virtnet_info *vi = container_of(work, struct virtnet_info,
> +					       announce);
> +	netif_notify_peers(vi->dev);
> +}
> +
>  static int virtnet_poll(struct napi_struct *napi, int budget)
>  {
>  	struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
> @@ -787,6 +797,8 @@ static int virtnet_close(struct net_device *dev)
>  
>  	/* Make sure refill_work doesn't re-enable napi! */
>  	cancel_delayed_work_sync(&vi->refill);
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
> +		cancel_work_sync(&vi->announce);

don't make this cancel conditional on has_feature -
this is out of data path, and code will be cleaner
if we do it unconditionally.

>  	napi_disable(&vi->napi);
>  
>  	return 0;
> @@ -962,11 +974,23 @@ static void virtnet_update_status(struct virtnet_info *vi)
>  		return;
>  
>  	/* Ignore unknown (future) status bits */
> -	v &= VIRTIO_NET_S_LINK_UP;
> +	v &= VIRTIO_NET_S_LINK_UP | VIRTIO_NET_S_ANNOUNCE;
>  
>  	if (vi->status == v)
>  		return;
>  
> +	if (v & VIRTIO_NET_S_ANNOUNCE) {
> +		v &= ~VIRTIO_NET_S_ANNOUNCE;
> +		vi->vdev->config->set(vi->vdev,
> +				      offsetof(struct virtio_net_config,
> +					       status),
> +				      &v, sizeof(v));
> +
> +		if ((v & VIRTIO_NET_S_LINK_UP) &&
> +		    virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
> +			schedule_work(&vi->announce);
> +	}
> +

It's probably easier to just do this unconditionally.
The only reason a feature bit might make sense is
that this way host knows guest will announce self.

Alternatively, if you want the ability to reuse the
status bit for something else, set must be conditional as well.

>  	vi->status = v;
>  
>  	if (vi->status & VIRTIO_NET_S_LINK_UP) {
> @@ -1076,6 +1100,8 @@ static int virtnet_probe(struct virtio_device *vdev)
>  		goto free;
>  
>  	INIT_DELAYED_WORK(&vi->refill, refill_work);
> +	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
> +		INIT_WORK(&vi->announce, announce_work);

Do this unconditionally too.

>  	sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg));
>  	sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg));
>  
> @@ -1187,6 +1213,8 @@ static int virtnet_freeze(struct virtio_device *vdev)
>  	virtqueue_disable_cb(vi->svq);
>  	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
>  		virtqueue_disable_cb(vi->cvq);
> +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
> +		cancel_work_sync(&vi->announce);
>  
>  	netif_device_detach(vi->dev);
>  	cancel_delayed_work_sync(&vi->refill);
> @@ -1233,6 +1261,7 @@ static unsigned int features[] = {
>  	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
>  	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
>  	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
> +	VIRTIO_NET_F_GUEST_ANNOUNCE,
>  };
>  
>  static struct virtio_driver virtio_net_driver = {
> diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
> index 970d5a2..44a38d6 100644
> --- a/include/linux/virtio_net.h
> +++ b/include/linux/virtio_net.h
> @@ -49,8 +49,10 @@
>  #define VIRTIO_NET_F_CTRL_RX	18	/* Control channel RX mode support */
>  #define VIRTIO_NET_F_CTRL_VLAN	19	/* Control channel VLAN filtering */
>  #define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support */
> +#define VIRTIO_NET_F_GUEST_ANNOUNCE 21  /* Guest can send gratituous packet */
>  
>  #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
> +#define VIRTIO_NET_S_ANNOUNCE   2       /* Announcement is needed */

I would put this in bit 8 (0x100), this way low status byte
is RO, high byte is RW.

>  
>  struct virtio_net_config {
>  	/* The config defining mac address (if VIRTIO_NET_F_MAC) */
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rusty Russell March 19, 2012, 2:16 a.m. UTC | #2
On Tue, 13 Mar 2012 16:33:31 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
> > index 970d5a2..44a38d6 100644
> > --- a/include/linux/virtio_net.h
> > +++ b/include/linux/virtio_net.h
> > @@ -49,8 +49,10 @@
> >  #define VIRTIO_NET_F_CTRL_RX	18	/* Control channel RX mode support */
> >  #define VIRTIO_NET_F_CTRL_VLAN	19	/* Control channel VLAN filtering */
> >  #define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support */
> > +#define VIRTIO_NET_F_GUEST_ANNOUNCE 21  /* Guest can send gratituous packet */
> >  
> >  #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
> > +#define VIRTIO_NET_S_ANNOUNCE   2       /* Announcement is needed */
> 
> I would put this in bit 8 (0x100), this way low status byte
> is RO, high byte is RW.

The whole idea of acking by clearing the bit is unreliable, moving to a
separate byte just controls the damage.

How about you use bits 8-15 as a counter?  It's still theoretically
unreliable if 256 notifications pass before the guest notices, but it's
probably better and clearer than this.

I leave the final call to MST though.

Thanks,
Rusty.
Michael S. Tsirkin March 19, 2012, 8:44 a.m. UTC | #3
On Mon, Mar 19, 2012 at 12:46:29PM +1030, Rusty Russell wrote:
> On Tue, 13 Mar 2012 16:33:31 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
> > > index 970d5a2..44a38d6 100644
> > > --- a/include/linux/virtio_net.h
> > > +++ b/include/linux/virtio_net.h
> > > @@ -49,8 +49,10 @@
> > >  #define VIRTIO_NET_F_CTRL_RX	18	/* Control channel RX mode support */
> > >  #define VIRTIO_NET_F_CTRL_VLAN	19	/* Control channel VLAN filtering */
> > >  #define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support */
> > > +#define VIRTIO_NET_F_GUEST_ANNOUNCE 21  /* Guest can send gratituous packet */
> > >  
> > >  #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
> > > +#define VIRTIO_NET_S_ANNOUNCE   2       /* Announcement is needed */
> > 
> > I would put this in bit 8 (0x100), this way low status byte
> > is RO, high byte is RW.
> 
> The whole idea of acking by clearing the bit is unreliable, moving to a
> separate byte just controls the damage.
> 
> How about you use bits 8-15 as a counter?  It's still theoretically
> unreliable if 256 notifications pass before the guest notices, but it's
> probably better and clearer than this.
> 
> I leave the final call to MST though.
> 
> Thanks,
> Rusty.

I guess the point was that we want a single packet
so we don't care if multiple notifications are coalesced
into a single one.

> -- 
>   How could I marry someone with more hair than me?  http://baldalex.org
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Wang March 19, 2012, 9:42 a.m. UTC | #4
On 03/19/2012 04:44 PM, Michael S. Tsirkin wrote:
> On Mon, Mar 19, 2012 at 12:46:29PM +1030, Rusty Russell wrote:
>> On Tue, 13 Mar 2012 16:33:31 +0200, "Michael S. Tsirkin"<mst@redhat.com>  wrote:
>>>> diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
>>>> index 970d5a2..44a38d6 100644
>>>> --- a/include/linux/virtio_net.h
>>>> +++ b/include/linux/virtio_net.h
>>>> @@ -49,8 +49,10 @@
>>>>   #define VIRTIO_NET_F_CTRL_RX	18	/* Control channel RX mode support */
>>>>   #define VIRTIO_NET_F_CTRL_VLAN	19	/* Control channel VLAN filtering */
>>>>   #define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support */
>>>> +#define VIRTIO_NET_F_GUEST_ANNOUNCE 21  /* Guest can send gratituous packet */
>>>>
>>>>   #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
>>>> +#define VIRTIO_NET_S_ANNOUNCE   2       /* Announcement is needed */
>>> I would put this in bit 8 (0x100), this way low status byte
>>> is RO, high byte is RW.
>> The whole idea of acking by clearing the bit is unreliable, moving to a
>> separate byte just controls the damage.
>>
>> How about you use bits 8-15 as a counter?  It's still theoretically
>> unreliable if 256 notifications pass before the guest notices, but it's
>> probably better and clearer than this.
>>
>> I leave the final call to MST though.
>>
>> Thanks,
>> Rusty.
> I guess the point was that we want a single packet
> so we don't care if multiple notifications are coalesced
> into a single one.
>

To reduce the possibility of dropping or losing of gratuitous packet by 
the network, qemu usually send the gratuitous packets for many times ( 
currently 5 time with a increment gap between them such as 50ms, 150ms, 
250ms ...). As there's no method can guarantee the gratuitous packet 
were received by switch in guest, no need to care about the coalesced 
notifications in guest. And we may leave the work to qemu or just don't 
care about this.

>> -- 
>>    How could I marry someone with more hair than me?  http://baldalex.org
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4880aa8..45f7ac6 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -72,6 +72,9 @@  struct virtnet_info {
 	/* Work struct for refilling if we run low on memory. */
 	struct delayed_work refill;
 
+	/* Work struct for sending gratituous packet. */
+	struct work_struct announce;
+
 	/* Chain pages by the private ptr. */
 	struct page *pages;
 
@@ -512,6 +515,13 @@  static void refill_work(struct work_struct *work)
 		queue_delayed_work(system_nrt_wq, &vi->refill, HZ/2);
 }
 
+static void announce_work(struct work_struct *work)
+{
+	struct virtnet_info *vi = container_of(work, struct virtnet_info,
+					       announce);
+	netif_notify_peers(vi->dev);
+}
+
 static int virtnet_poll(struct napi_struct *napi, int budget)
 {
 	struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
@@ -787,6 +797,8 @@  static int virtnet_close(struct net_device *dev)
 
 	/* Make sure refill_work doesn't re-enable napi! */
 	cancel_delayed_work_sync(&vi->refill);
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
+		cancel_work_sync(&vi->announce);
 	napi_disable(&vi->napi);
 
 	return 0;
@@ -962,11 +974,23 @@  static void virtnet_update_status(struct virtnet_info *vi)
 		return;
 
 	/* Ignore unknown (future) status bits */
-	v &= VIRTIO_NET_S_LINK_UP;
+	v &= VIRTIO_NET_S_LINK_UP | VIRTIO_NET_S_ANNOUNCE;
 
 	if (vi->status == v)
 		return;
 
+	if (v & VIRTIO_NET_S_ANNOUNCE) {
+		v &= ~VIRTIO_NET_S_ANNOUNCE;
+		vi->vdev->config->set(vi->vdev,
+				      offsetof(struct virtio_net_config,
+					       status),
+				      &v, sizeof(v));
+
+		if ((v & VIRTIO_NET_S_LINK_UP) &&
+		    virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
+			schedule_work(&vi->announce);
+	}
+
 	vi->status = v;
 
 	if (vi->status & VIRTIO_NET_S_LINK_UP) {
@@ -1076,6 +1100,8 @@  static int virtnet_probe(struct virtio_device *vdev)
 		goto free;
 
 	INIT_DELAYED_WORK(&vi->refill, refill_work);
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
+		INIT_WORK(&vi->announce, announce_work);
 	sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg));
 	sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg));
 
@@ -1187,6 +1213,8 @@  static int virtnet_freeze(struct virtio_device *vdev)
 	virtqueue_disable_cb(vi->svq);
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
 		virtqueue_disable_cb(vi->cvq);
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
+		cancel_work_sync(&vi->announce);
 
 	netif_device_detach(vi->dev);
 	cancel_delayed_work_sync(&vi->refill);
@@ -1233,6 +1261,7 @@  static unsigned int features[] = {
 	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
 	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
 	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
+	VIRTIO_NET_F_GUEST_ANNOUNCE,
 };
 
 static struct virtio_driver virtio_net_driver = {
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 970d5a2..44a38d6 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -49,8 +49,10 @@ 
 #define VIRTIO_NET_F_CTRL_RX	18	/* Control channel RX mode support */
 #define VIRTIO_NET_F_CTRL_VLAN	19	/* Control channel VLAN filtering */
 #define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE 21  /* Guest can send gratituous packet */
 
 #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
+#define VIRTIO_NET_S_ANNOUNCE   2       /* Announcement is needed */
 
 struct virtio_net_config {
 	/* The config defining mac address (if VIRTIO_NET_F_MAC) */