diff mbox

[RFC,v5,net-next,4/6] virtio-net: add basic interrupt coalescing support

Message ID 1423471165-34243-5-git-send-email-jasowang@redhat.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Jason Wang Feb. 9, 2015, 8:39 a.m. UTC
This patch enables the interrupt coalescing setting through ethtool.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c        | 67 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/virtio_net.h | 12 ++++++++
 2 files changed, 79 insertions(+)

Comments

Rusty Russell Feb. 10, 2015, 1:32 a.m. UTC | #1
Jason Wang <jasowang@redhat.com> writes:
> This patch enables the interrupt coalescing setting through ethtool.

The problem is that there's nothing network specific about interrupt
coalescing.  I can see other devices wanting exactly the same thing,
which means we'd deprecate this in the next virtio standard.

I think the right answer is to extend like we did with
vring_used_event(), eg:

1) Add a new feature VIRTIO_F_RING_COALESCE.
2) Add another a 32-bit field after vring_used_event(), eg:
        #define vring_used_delay(vr) (*(u32 *)((vr)->avail->ring[(vr)->num + 2]))

This loses the ability to coalesce by number of frames, but we can still
do number of sg entries, as we do now with used_event, and we could
change virtqueue_enable_cb_delayed() to take a precise number if we
wanted.

My feeling is that this should be a v1.0-only feature though
(eg. feature bit 33).

Cheers,
Rusty.

> Cc: Rusty Russell <rusty@rustcorp.com.au>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  drivers/net/virtio_net.c        | 67 +++++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/virtio_net.h | 12 ++++++++
>  2 files changed, 79 insertions(+)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index cc5f5de..2b958fb 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -145,6 +145,11 @@ struct virtnet_info {
>  
>  	/* Budget for polling tx completion */
>  	u32 tx_work_limit;
> +
> +	__u32 rx_coalesce_usecs;
> +	__u32 rx_max_coalesced_frames;
> +	__u32 tx_coalesce_usecs;
> +	__u32 tx_max_coalesced_frames;
>  };
>  
>  struct padded_vnet_hdr {
> @@ -1404,12 +1409,73 @@ static void virtnet_get_channels(struct net_device *dev,
>  	channels->other_count = 0;
>  }
>  
> +static int virtnet_set_coalesce(struct net_device *dev,
> +				struct ethtool_coalesce *ec)
> +{
> +	struct virtnet_info *vi = netdev_priv(dev);
> +	struct scatterlist sg;
> +	struct virtio_net_ctrl_coalesce c;
> +
> +	if (!vi->has_cvq ||
> +	    !virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_COALESCE))
> +		return -EOPNOTSUPP;
> +	if (vi->rx_coalesce_usecs != ec->rx_coalesce_usecs ||
> +	    vi->rx_max_coalesced_frames != ec->rx_max_coalesced_frames) {
> +		c.coalesce_usecs = ec->rx_coalesce_usecs;
> +		c.max_coalesced_frames = ec->rx_max_coalesced_frames;
> +		sg_init_one(&sg, &c, sizeof(c));
> +		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_COALESCE,
> +					  VIRTIO_NET_CTRL_COALESCE_RX_SET,
> +					  &sg)) {
> +			dev_warn(&dev->dev, "Fail to set rx coalescing\n");
> +			return -EINVAL;
> +		}
> +		vi->rx_coalesce_usecs = ec->rx_coalesce_usecs;
> +		vi->rx_max_coalesced_frames = ec->rx_max_coalesced_frames;
> +	}
> +
> +	if (vi->tx_coalesce_usecs != ec->tx_coalesce_usecs ||
> +	    vi->tx_max_coalesced_frames != ec->tx_max_coalesced_frames) {
> +		c.coalesce_usecs = ec->tx_coalesce_usecs;
> +		c.max_coalesced_frames = ec->tx_max_coalesced_frames;
> +		sg_init_one(&sg, &c, sizeof(c));
> +		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_COALESCE,
> +					  VIRTIO_NET_CTRL_COALESCE_TX_SET,
> +					  &sg)) {
> +			dev_warn(&dev->dev, "Fail to set tx coalescing\n");
> +			return -EINVAL;
> +		}
> +		vi->tx_coalesce_usecs = ec->tx_coalesce_usecs;
> +		vi->tx_max_coalesced_frames = ec->tx_max_coalesced_frames;
> +	}
> +
> +	vi->tx_work_limit = ec->tx_max_coalesced_frames_irq;
> +
> +	return 0;
> +}
> +
> +static int virtnet_get_coalesce(struct net_device *dev,
> +				struct ethtool_coalesce *ec)
> +{
> +	struct virtnet_info *vi = netdev_priv(dev);
> +
> +	ec->rx_coalesce_usecs = vi->rx_coalesce_usecs;
> +	ec->rx_max_coalesced_frames = vi->rx_max_coalesced_frames;
> +	ec->tx_coalesce_usecs = vi->tx_coalesce_usecs;
> +	ec->tx_max_coalesced_frames = vi->tx_max_coalesced_frames;
> +	ec->tx_max_coalesced_frames_irq = vi->tx_work_limit;
> +
> +	return 0;
> +}
> +
>  static const struct ethtool_ops virtnet_ethtool_ops = {
>  	.get_drvinfo = virtnet_get_drvinfo,
>  	.get_link = ethtool_op_get_link,
>  	.get_ringparam = virtnet_get_ringparam,
>  	.set_channels = virtnet_set_channels,
>  	.get_channels = virtnet_get_channels,
> +	.set_coalesce = virtnet_set_coalesce,
> +	.get_coalesce = virtnet_get_coalesce,
>  };
>  
>  #define MIN_MTU 68
> @@ -2048,6 +2114,7 @@ static unsigned int features[] = {
>  	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
>  	VIRTIO_NET_F_CTRL_MAC_ADDR,
>  	VIRTIO_F_ANY_LAYOUT,
> +	VIRTIO_NET_F_CTRL_COALESCE,
>  };
>  
>  static struct virtio_driver virtio_net_driver = {
> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> index b5f1677..332009d 100644
> --- a/include/uapi/linux/virtio_net.h
> +++ b/include/uapi/linux/virtio_net.h
> @@ -34,6 +34,7 @@
>  /* The feature bitmap for virtio net */
>  #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
>  #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
> +#define VIRTIO_NET_F_CTRL_COALESCE 3	/* Set coalescing */
>  #define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
>  #define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
>  #define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
> @@ -202,4 +203,15 @@ struct virtio_net_ctrl_mq {
>   #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
>   #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
>  
> +struct virtio_net_ctrl_coalesce {
> +	__u32 coalesce_usecs;
> +	__u32 max_coalesced_frames;
> +};
> +
> +#define VIRTIO_NET_CTRL_COALESCE 6
> + #define VIRTIO_NET_CTRL_COALESCE_TX_SET 0
> + #define VIRTIO_NET_CTRL_COALESCE_TX_GET 1
> + #define VIRTIO_NET_CTRL_COALESCE_RX_SET 2
> + #define VIRTIO_NET_CTRL_COALESCE_RX_GET 3
> +
>  #endif /* _LINUX_VIRTIO_NET_H */
> -- 
> 1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Wang Feb. 10, 2015, 6:51 a.m. UTC | #2
On Tue, Feb 10, 2015 at 9:32 AM, Rusty Russell <rusty@rustcorp.com.au> 
wrote:
> Jason Wang <jasowang@redhat.com> writes:
>>  This patch enables the interrupt coalescing setting through ethtool.
> 
> The problem is that there's nothing network specific about interrupt
> coalescing.  I can see other devices wanting exactly the same thing,
> which means we'd deprecate this in the next virtio standard.
> 
> I think the right answer is to extend like we did with
> vring_used_event(), eg:
> 
> 1) Add a new feature VIRTIO_F_RING_COALESCE.
> 2) Add another a 32-bit field after vring_used_event(), eg:
>         #define vring_used_delay(vr) (*(u32 
> *)((vr)->avail->ring[(vr)->num + 2]))

Yes. This looks better and we don't even need device specific 
configuration method.

> 
> This loses the ability to coalesce by number of frames, but we can 
> still
> do number of sg entries, as we do now with used_event, and we could
> change virtqueue_enable_cb_delayed() to take a precise number if we
> wanted.

Can we give a device specific meaning for this? For virtio-net, we want 
to expose the coalescing settings through ethtool (tx-frames). And it 
was usually used with a timer, so probably another field after 
vring_used_delay() for this timer interval to trigger the interrupt if 
no new used buffers come after this interval.

> 
> 
> My feeling is that this should be a v1.0-only feature though
> (eg. feature bit 33).

Yes it should.

> 
> Cheers,
> Rusty.
> 
>>  Cc: Rusty Russell <rusty@rustcorp.com.au>
>>  Cc: Michael S. Tsirkin <mst@redhat.com>
>>  Signed-off-by: Jason Wang <jasowang@redhat.com>
>>  ---
>>   drivers/net/virtio_net.c        | 67 
>> +++++++++++++++++++++++++++++++++++++++++
>>   include/uapi/linux/virtio_net.h | 12 ++++++++
>>   2 files changed, 79 insertions(+)
>> 
>>  diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>  index cc5f5de..2b958fb 100644
>>  --- a/drivers/net/virtio_net.c
>>  +++ b/drivers/net/virtio_net.c
>>  @@ -145,6 +145,11 @@ struct virtnet_info {
>>   
>>   	/* Budget for polling tx completion */
>>   	u32 tx_work_limit;
>>  +
>>  +	__u32 rx_coalesce_usecs;
>>  +	__u32 rx_max_coalesced_frames;
>>  +	__u32 tx_coalesce_usecs;
>>  +	__u32 tx_max_coalesced_frames;
>>   };
>>   
>>   struct padded_vnet_hdr {
>>  @@ -1404,12 +1409,73 @@ static void virtnet_get_channels(struct 
>> net_device *dev,
>>   	channels->other_count = 0;
>>   }
>>   
>>  +static int virtnet_set_coalesce(struct net_device *dev,
>>  +				struct ethtool_coalesce *ec)
>>  +{
>>  +	struct virtnet_info *vi = netdev_priv(dev);
>>  +	struct scatterlist sg;
>>  +	struct virtio_net_ctrl_coalesce c;
>>  +
>>  +	if (!vi->has_cvq ||
>>  +	    !virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_COALESCE))
>>  +		return -EOPNOTSUPP;
>>  +	if (vi->rx_coalesce_usecs != ec->rx_coalesce_usecs ||
>>  +	    vi->rx_max_coalesced_frames != ec->rx_max_coalesced_frames) {
>>  +		c.coalesce_usecs = ec->rx_coalesce_usecs;
>>  +		c.max_coalesced_frames = ec->rx_max_coalesced_frames;
>>  +		sg_init_one(&sg, &c, sizeof(c));
>>  +		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_COALESCE,
>>  +					  VIRTIO_NET_CTRL_COALESCE_RX_SET,
>>  +					  &sg)) {
>>  +			dev_warn(&dev->dev, "Fail to set rx coalescing\n");
>>  +			return -EINVAL;
>>  +		}
>>  +		vi->rx_coalesce_usecs = ec->rx_coalesce_usecs;
>>  +		vi->rx_max_coalesced_frames = ec->rx_max_coalesced_frames;
>>  +	}
>>  +
>>  +	if (vi->tx_coalesce_usecs != ec->tx_coalesce_usecs ||
>>  +	    vi->tx_max_coalesced_frames != ec->tx_max_coalesced_frames) {
>>  +		c.coalesce_usecs = ec->tx_coalesce_usecs;
>>  +		c.max_coalesced_frames = ec->tx_max_coalesced_frames;
>>  +		sg_init_one(&sg, &c, sizeof(c));
>>  +		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_COALESCE,
>>  +					  VIRTIO_NET_CTRL_COALESCE_TX_SET,
>>  +					  &sg)) {
>>  +			dev_warn(&dev->dev, "Fail to set tx coalescing\n");
>>  +			return -EINVAL;
>>  +		}
>>  +		vi->tx_coalesce_usecs = ec->tx_coalesce_usecs;
>>  +		vi->tx_max_coalesced_frames = ec->tx_max_coalesced_frames;
>>  +	}
>>  +
>>  +	vi->tx_work_limit = ec->tx_max_coalesced_frames_irq;
>>  +
>>  +	return 0;
>>  +}
>>  +
>>  +static int virtnet_get_coalesce(struct net_device *dev,
>>  +				struct ethtool_coalesce *ec)
>>  +{
>>  +	struct virtnet_info *vi = netdev_priv(dev);
>>  +
>>  +	ec->rx_coalesce_usecs = vi->rx_coalesce_usecs;
>>  +	ec->rx_max_coalesced_frames = vi->rx_max_coalesced_frames;
>>  +	ec->tx_coalesce_usecs = vi->tx_coalesce_usecs;
>>  +	ec->tx_max_coalesced_frames = vi->tx_max_coalesced_frames;
>>  +	ec->tx_max_coalesced_frames_irq = vi->tx_work_limit;
>>  +
>>  +	return 0;
>>  +}
>>  +
>>   static const struct ethtool_ops virtnet_ethtool_ops = {
>>   	.get_drvinfo = virtnet_get_drvinfo,
>>   	.get_link = ethtool_op_get_link,
>>   	.get_ringparam = virtnet_get_ringparam,
>>   	.set_channels = virtnet_set_channels,
>>   	.get_channels = virtnet_get_channels,
>>  +	.set_coalesce = virtnet_set_coalesce,
>>  +	.get_coalesce = virtnet_get_coalesce,
>>   };
>>   
>>   #define MIN_MTU 68
>>  @@ -2048,6 +2114,7 @@ static unsigned int features[] = {
>>   	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
>>   	VIRTIO_NET_F_CTRL_MAC_ADDR,
>>   	VIRTIO_F_ANY_LAYOUT,
>>  +	VIRTIO_NET_F_CTRL_COALESCE,
>>   };
>>   
>>   static struct virtio_driver virtio_net_driver = {
>>  diff --git a/include/uapi/linux/virtio_net.h 
>> b/include/uapi/linux/virtio_net.h
>>  index b5f1677..332009d 100644
>>  --- a/include/uapi/linux/virtio_net.h
>>  +++ b/include/uapi/linux/virtio_net.h
>>  @@ -34,6 +34,7 @@
>>   /* The feature bitmap for virtio net */
>>   #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
>>   #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial 
>> csum */
>>  +#define VIRTIO_NET_F_CTRL_COALESCE 3	/* Set coalescing */
>>   #define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
>>   #define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
>>   #define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
>>  @@ -202,4 +203,15 @@ struct virtio_net_ctrl_mq {
>>    #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
>>    #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
>>   
>>  +struct virtio_net_ctrl_coalesce {
>>  +	__u32 coalesce_usecs;
>>  +	__u32 max_coalesced_frames;
>>  +};
>>  +
>>  +#define VIRTIO_NET_CTRL_COALESCE 6
>>  + #define VIRTIO_NET_CTRL_COALESCE_TX_SET 0
>>  + #define VIRTIO_NET_CTRL_COALESCE_TX_GET 1
>>  + #define VIRTIO_NET_CTRL_COALESCE_RX_SET 2
>>  + #define VIRTIO_NET_CTRL_COALESCE_RX_GET 3
>>  +
>>   #endif /* _LINUX_VIRTIO_NET_H */
>>  -- 
>>  1.8.3.1
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Feb. 10, 2015, 10:25 a.m. UTC | #3
On Tue, Feb 10, 2015 at 06:59:30AM +0008, Jason Wang wrote:
> 
> 
> On Tue, Feb 10, 2015 at 9:32 AM, Rusty Russell <rusty@rustcorp.com.au>
> wrote:
> >Jason Wang <jasowang@redhat.com> writes:
> >> This patch enables the interrupt coalescing setting through ethtool.
> >
> >The problem is that there's nothing network specific about interrupt
> >coalescing.  I can see other devices wanting exactly the same thing,
> >which means we'd deprecate this in the next virtio standard.
> >
> >I think the right answer is to extend like we did with
> >vring_used_event(), eg:
> >
> >1) Add a new feature VIRTIO_F_RING_COALESCE.
> >2) Add another a 32-bit field after vring_used_event(), eg:
> >        #define vring_used_delay(vr) (*(u32 *)((vr)->avail->ring[(vr)->num
> >+ 2]))
> 
> Yes. This looks better and we don't even need device specific configuration
> method.
> 
> >
> >This loses the ability to coalesce by number of frames, but we can still
> >do number of sg entries, as we do now with used_event, and we could
> >change virtqueue_enable_cb_delayed() to take a precise number if we
> >wanted.
> 
> Can we give a device specific meaning for this? For virtio-net, we want to
> expose the coalescing settings through ethtool (tx-frames). And it was
> usually used with a timer, so probably another field after
> vring_used_delay() for this timer interval to trigger the interrupt if no
> new used buffers come after this interval.

I think what Rusty has in mind is precisely sticking the delay
in vring_used_delay.


> >
> >
> >My feeling is that this should be a v1.0-only feature though
> >(eg. feature bit 33).
> 
> Yes it should.
> 
> >
> >Cheers,
> >Rusty.
> >
> >> Cc: Rusty Russell <rusty@rustcorp.com.au>
> >> Cc: Michael S. Tsirkin <mst@redhat.com>
> >> Signed-off-by: Jason Wang <jasowang@redhat.com>
> >> ---
> >>  drivers/net/virtio_net.c        | 67
> >>+++++++++++++++++++++++++++++++++++++++++
> >>  include/uapi/linux/virtio_net.h | 12 ++++++++
> >>  2 files changed, 79 insertions(+)
> >>
> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >> index cc5f5de..2b958fb 100644
> >> --- a/drivers/net/virtio_net.c
> >> +++ b/drivers/net/virtio_net.c
> >> @@ -145,6 +145,11 @@ struct virtnet_info {
> >>  	/* Budget for polling tx completion */
> >>  	u32 tx_work_limit;
> >> +
> >> +	__u32 rx_coalesce_usecs;
> >> +	__u32 rx_max_coalesced_frames;
> >> +	__u32 tx_coalesce_usecs;
> >> +	__u32 tx_max_coalesced_frames;
> >>  };
> >>  struct padded_vnet_hdr {
> >> @@ -1404,12 +1409,73 @@ static void virtnet_get_channels(struct
> >>net_device *dev,
> >>  	channels->other_count = 0;
> >>  }
> >> +static int virtnet_set_coalesce(struct net_device *dev,
> >> +				struct ethtool_coalesce *ec)
> >> +{
> >> +	struct virtnet_info *vi = netdev_priv(dev);
> >> +	struct scatterlist sg;
> >> +	struct virtio_net_ctrl_coalesce c;
> >> +
> >> +	if (!vi->has_cvq ||
> >> +	    !virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_COALESCE))
> >> +		return -EOPNOTSUPP;
> >> +	if (vi->rx_coalesce_usecs != ec->rx_coalesce_usecs ||
> >> +	    vi->rx_max_coalesced_frames != ec->rx_max_coalesced_frames) {
> >> +		c.coalesce_usecs = ec->rx_coalesce_usecs;
> >> +		c.max_coalesced_frames = ec->rx_max_coalesced_frames;
> >> +		sg_init_one(&sg, &c, sizeof(c));
> >> +		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_COALESCE,
> >> +					  VIRTIO_NET_CTRL_COALESCE_RX_SET,
> >> +					  &sg)) {
> >> +			dev_warn(&dev->dev, "Fail to set rx coalescing\n");
> >> +			return -EINVAL;
> >> +		}
> >> +		vi->rx_coalesce_usecs = ec->rx_coalesce_usecs;
> >> +		vi->rx_max_coalesced_frames = ec->rx_max_coalesced_frames;
> >> +	}
> >> +
> >> +	if (vi->tx_coalesce_usecs != ec->tx_coalesce_usecs ||
> >> +	    vi->tx_max_coalesced_frames != ec->tx_max_coalesced_frames) {
> >> +		c.coalesce_usecs = ec->tx_coalesce_usecs;
> >> +		c.max_coalesced_frames = ec->tx_max_coalesced_frames;
> >> +		sg_init_one(&sg, &c, sizeof(c));
> >> +		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_COALESCE,
> >> +					  VIRTIO_NET_CTRL_COALESCE_TX_SET,
> >> +					  &sg)) {
> >> +			dev_warn(&dev->dev, "Fail to set tx coalescing\n");
> >> +			return -EINVAL;
> >> +		}
> >> +		vi->tx_coalesce_usecs = ec->tx_coalesce_usecs;
> >> +		vi->tx_max_coalesced_frames = ec->tx_max_coalesced_frames;
> >> +	}
> >> +
> >> +	vi->tx_work_limit = ec->tx_max_coalesced_frames_irq;
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static int virtnet_get_coalesce(struct net_device *dev,
> >> +				struct ethtool_coalesce *ec)
> >> +{
> >> +	struct virtnet_info *vi = netdev_priv(dev);
> >> +
> >> +	ec->rx_coalesce_usecs = vi->rx_coalesce_usecs;
> >> +	ec->rx_max_coalesced_frames = vi->rx_max_coalesced_frames;
> >> +	ec->tx_coalesce_usecs = vi->tx_coalesce_usecs;
> >> +	ec->tx_max_coalesced_frames = vi->tx_max_coalesced_frames;
> >> +	ec->tx_max_coalesced_frames_irq = vi->tx_work_limit;
> >> +
> >> +	return 0;
> >> +}
> >> +
> >>  static const struct ethtool_ops virtnet_ethtool_ops = {
> >>  	.get_drvinfo = virtnet_get_drvinfo,
> >>  	.get_link = ethtool_op_get_link,
> >>  	.get_ringparam = virtnet_get_ringparam,
> >>  	.set_channels = virtnet_set_channels,
> >>  	.get_channels = virtnet_get_channels,
> >> +	.set_coalesce = virtnet_set_coalesce,
> >> +	.get_coalesce = virtnet_get_coalesce,
> >>  };
> >>  #define MIN_MTU 68
> >> @@ -2048,6 +2114,7 @@ static unsigned int features[] = {
> >>  	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
> >>  	VIRTIO_NET_F_CTRL_MAC_ADDR,
> >>  	VIRTIO_F_ANY_LAYOUT,
> >> +	VIRTIO_NET_F_CTRL_COALESCE,
> >>  };
> >>  static struct virtio_driver virtio_net_driver = {
> >> diff --git a/include/uapi/linux/virtio_net.h
> >>b/include/uapi/linux/virtio_net.h
> >> index b5f1677..332009d 100644
> >> --- a/include/uapi/linux/virtio_net.h
> >> +++ b/include/uapi/linux/virtio_net.h
> >> @@ -34,6 +34,7 @@
> >>  /* The feature bitmap for virtio net */
> >>  #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
> >>  #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial
> >>csum */
> >> +#define VIRTIO_NET_F_CTRL_COALESCE 3	/* Set coalescing */
> >>  #define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
> >>  #define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
> >>  #define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
> >> @@ -202,4 +203,15 @@ struct virtio_net_ctrl_mq {
> >>   #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
> >>   #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
> >> +struct virtio_net_ctrl_coalesce {
> >> +	__u32 coalesce_usecs;
> >> +	__u32 max_coalesced_frames;
> >> +};
> >> +
> >> +#define VIRTIO_NET_CTRL_COALESCE 6
> >> + #define VIRTIO_NET_CTRL_COALESCE_TX_SET 0
> >> + #define VIRTIO_NET_CTRL_COALESCE_TX_GET 1
> >> + #define VIRTIO_NET_CTRL_COALESCE_RX_SET 2
> >> + #define VIRTIO_NET_CTRL_COALESCE_RX_GET 3
> >> +
> >>  #endif /* _LINUX_VIRTIO_NET_H */
> >> --  1.8.3.1
> >--
> >To unsubscribe from this list: send the line "unsubscribe netdev" in
> >the body of a message to majordomo@vger.kernel.org
> >More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Feb. 10, 2015, 10:40 a.m. UTC | #4
On Tue, Feb 10, 2015 at 12:02:37PM +1030, Rusty Russell wrote:
> Jason Wang <jasowang@redhat.com> writes:
> > This patch enables the interrupt coalescing setting through ethtool.
> 
> The problem is that there's nothing network specific about interrupt
> coalescing.  I can see other devices wanting exactly the same thing,
> which means we'd deprecate this in the next virtio standard.
> 
> I think the right answer is to extend like we did with
> vring_used_event(), eg:
> 
> 1) Add a new feature VIRTIO_F_RING_COALESCE.
> 2) Add another a 32-bit field after vring_used_event(), eg:
>         #define vring_used_delay(vr) (*(u32 *)((vr)->avail->ring[(vr)->num + 2]))
> 
> This loses the ability to coalesce by number of frames, but we can still
> do number of sg entries, as we do now with used_event, and we could
> change virtqueue_enable_cb_delayed() to take a precise number if we
> wanted.

But do we expect delay to be update dynamically?
If not, why not stick it in config space?

> My feeling is that this should be a v1.0-only feature though
> (eg. feature bit 33).
> 
> Cheers,
> Rusty.

Yes, e.g. we can't extend config space for legacy virtio pci.

> > Cc: Rusty Russell <rusty@rustcorp.com.au>
> > Cc: Michael S. Tsirkin <mst@redhat.com>
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> >  drivers/net/virtio_net.c        | 67 +++++++++++++++++++++++++++++++++++++++++
> >  include/uapi/linux/virtio_net.h | 12 ++++++++
> >  2 files changed, 79 insertions(+)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index cc5f5de..2b958fb 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -145,6 +145,11 @@ struct virtnet_info {
> >  
> >  	/* Budget for polling tx completion */
> >  	u32 tx_work_limit;
> > +
> > +	__u32 rx_coalesce_usecs;
> > +	__u32 rx_max_coalesced_frames;
> > +	__u32 tx_coalesce_usecs;
> > +	__u32 tx_max_coalesced_frames;
> >  };
> >  
> >  struct padded_vnet_hdr {
> > @@ -1404,12 +1409,73 @@ static void virtnet_get_channels(struct net_device *dev,
> >  	channels->other_count = 0;
> >  }
> >  
> > +static int virtnet_set_coalesce(struct net_device *dev,
> > +				struct ethtool_coalesce *ec)
> > +{
> > +	struct virtnet_info *vi = netdev_priv(dev);
> > +	struct scatterlist sg;
> > +	struct virtio_net_ctrl_coalesce c;
> > +
> > +	if (!vi->has_cvq ||
> > +	    !virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_COALESCE))
> > +		return -EOPNOTSUPP;
> > +	if (vi->rx_coalesce_usecs != ec->rx_coalesce_usecs ||
> > +	    vi->rx_max_coalesced_frames != ec->rx_max_coalesced_frames) {
> > +		c.coalesce_usecs = ec->rx_coalesce_usecs;
> > +		c.max_coalesced_frames = ec->rx_max_coalesced_frames;
> > +		sg_init_one(&sg, &c, sizeof(c));
> > +		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_COALESCE,
> > +					  VIRTIO_NET_CTRL_COALESCE_RX_SET,
> > +					  &sg)) {
> > +			dev_warn(&dev->dev, "Fail to set rx coalescing\n");
> > +			return -EINVAL;
> > +		}
> > +		vi->rx_coalesce_usecs = ec->rx_coalesce_usecs;
> > +		vi->rx_max_coalesced_frames = ec->rx_max_coalesced_frames;
> > +	}
> > +
> > +	if (vi->tx_coalesce_usecs != ec->tx_coalesce_usecs ||
> > +	    vi->tx_max_coalesced_frames != ec->tx_max_coalesced_frames) {
> > +		c.coalesce_usecs = ec->tx_coalesce_usecs;
> > +		c.max_coalesced_frames = ec->tx_max_coalesced_frames;
> > +		sg_init_one(&sg, &c, sizeof(c));
> > +		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_COALESCE,
> > +					  VIRTIO_NET_CTRL_COALESCE_TX_SET,
> > +					  &sg)) {
> > +			dev_warn(&dev->dev, "Fail to set tx coalescing\n");
> > +			return -EINVAL;
> > +		}
> > +		vi->tx_coalesce_usecs = ec->tx_coalesce_usecs;
> > +		vi->tx_max_coalesced_frames = ec->tx_max_coalesced_frames;
> > +	}
> > +
> > +	vi->tx_work_limit = ec->tx_max_coalesced_frames_irq;
> > +
> > +	return 0;
> > +}
> > +
> > +static int virtnet_get_coalesce(struct net_device *dev,
> > +				struct ethtool_coalesce *ec)
> > +{
> > +	struct virtnet_info *vi = netdev_priv(dev);
> > +
> > +	ec->rx_coalesce_usecs = vi->rx_coalesce_usecs;
> > +	ec->rx_max_coalesced_frames = vi->rx_max_coalesced_frames;
> > +	ec->tx_coalesce_usecs = vi->tx_coalesce_usecs;
> > +	ec->tx_max_coalesced_frames = vi->tx_max_coalesced_frames;
> > +	ec->tx_max_coalesced_frames_irq = vi->tx_work_limit;
> > +
> > +	return 0;
> > +}
> > +
> >  static const struct ethtool_ops virtnet_ethtool_ops = {
> >  	.get_drvinfo = virtnet_get_drvinfo,
> >  	.get_link = ethtool_op_get_link,
> >  	.get_ringparam = virtnet_get_ringparam,
> >  	.set_channels = virtnet_set_channels,
> >  	.get_channels = virtnet_get_channels,
> > +	.set_coalesce = virtnet_set_coalesce,
> > +	.get_coalesce = virtnet_get_coalesce,
> >  };
> >  
> >  #define MIN_MTU 68
> > @@ -2048,6 +2114,7 @@ static unsigned int features[] = {
> >  	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
> >  	VIRTIO_NET_F_CTRL_MAC_ADDR,
> >  	VIRTIO_F_ANY_LAYOUT,
> > +	VIRTIO_NET_F_CTRL_COALESCE,
> >  };
> >  
> >  static struct virtio_driver virtio_net_driver = {
> > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> > index b5f1677..332009d 100644
> > --- a/include/uapi/linux/virtio_net.h
> > +++ b/include/uapi/linux/virtio_net.h
> > @@ -34,6 +34,7 @@
> >  /* The feature bitmap for virtio net */
> >  #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
> >  #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
> > +#define VIRTIO_NET_F_CTRL_COALESCE 3	/* Set coalescing */
> >  #define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
> >  #define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
> >  #define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
> > @@ -202,4 +203,15 @@ struct virtio_net_ctrl_mq {
> >   #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
> >   #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
> >  
> > +struct virtio_net_ctrl_coalesce {
> > +	__u32 coalesce_usecs;
> > +	__u32 max_coalesced_frames;
> > +};
> > +
> > +#define VIRTIO_NET_CTRL_COALESCE 6
> > + #define VIRTIO_NET_CTRL_COALESCE_TX_SET 0
> > + #define VIRTIO_NET_CTRL_COALESCE_TX_GET 1
> > + #define VIRTIO_NET_CTRL_COALESCE_RX_SET 2
> > + #define VIRTIO_NET_CTRL_COALESCE_RX_GET 3
> > +
> >  #endif /* _LINUX_VIRTIO_NET_H */
> > -- 
> > 1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rusty Russell Feb. 13, 2015, 2:52 a.m. UTC | #5
"Michael S. Tsirkin" <mst@redhat.com> writes:
> On Tue, Feb 10, 2015 at 12:02:37PM +1030, Rusty Russell wrote:
>> Jason Wang <jasowang@redhat.com> writes:
>> > This patch enables the interrupt coalescing setting through ethtool.
>> 
>> The problem is that there's nothing network specific about interrupt
>> coalescing.  I can see other devices wanting exactly the same thing,
>> which means we'd deprecate this in the next virtio standard.
>> 
>> I think the right answer is to extend like we did with
>> vring_used_event(), eg:
>> 
>> 1) Add a new feature VIRTIO_F_RING_COALESCE.
>> 2) Add another a 32-bit field after vring_used_event(), eg:
>>         #define vring_used_delay(vr) (*(u32 *)((vr)->avail->ring[(vr)->num + 2]))
>> 
>> This loses the ability to coalesce by number of frames, but we can still
>> do number of sg entries, as we do now with used_event, and we could
>> change virtqueue_enable_cb_delayed() to take a precise number if we
>> wanted.
>
> But do we expect delay to be update dynamically?
> If not, why not stick it in config space?

Hmm, we could update it dynamically (and will, in the case of ethtool).
But it won't be common, so we could append a field to
virtio_pci_common_cfg for PCI.

I think MMIO and CCW would be easy to extend too, but CC'd to check.

>> My feeling is that this should be a v1.0-only feature though
>> (eg. feature bit 33).
>
> Yes, e.g. we can't extend config space for legacy virtio pci.

Thanks,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pawel Moll Feb. 13, 2015, 12:41 p.m. UTC | #6
On Fri, 2015-02-13 at 02:52 +0000, Rusty Russell wrote:
> "Michael S. Tsirkin" <mst@redhat.com> writes:
> > On Tue, Feb 10, 2015 at 12:02:37PM +1030, Rusty Russell wrote:
> >> Jason Wang <jasowang@redhat.com> writes:
> >> > This patch enables the interrupt coalescing setting through ethtool.
> >> 
> >> The problem is that there's nothing network specific about interrupt
> >> coalescing.  I can see other devices wanting exactly the same thing,
> >> which means we'd deprecate this in the next virtio standard.
> >> 
> >> I think the right answer is to extend like we did with
> >> vring_used_event(), eg:
> >> 
> >> 1) Add a new feature VIRTIO_F_RING_COALESCE.
> >> 2) Add another a 32-bit field after vring_used_event(), eg:
> >>         #define vring_used_delay(vr) (*(u32 *)((vr)->avail->ring[(vr)->num + 2]))
> >> 
> >> This loses the ability to coalesce by number of frames, but we can still
> >> do number of sg entries, as we do now with used_event, and we could
> >> change virtqueue_enable_cb_delayed() to take a precise number if we
> >> wanted.
> >
> > But do we expect delay to be update dynamically?
> > If not, why not stick it in config space?
> 
> Hmm, we could update it dynamically (and will, in the case of ethtool).
> But it won't be common, so we could append a field to
> virtio_pci_common_cfg for PCI.
> 
> I think MMIO and CCW would be easy to extend too, but CC'd to check.

As far as I understand the virtio_pci_common_cfg principle (just had a
look, for the first time ;-), it's now an equivalent of the MMIO control
registers block. I see no major problem with adding another one.

Or were you thinking about introducing some standard for the "real"
config space? (fine with me as well - the transport will have nothing to
do :-)

Paweł

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Cornelia Huck Feb. 13, 2015, 6:19 p.m. UTC | #7
On Fri, 13 Feb 2015 13:22:09 +1030
Rusty Russell <rusty@rustcorp.com.au> wrote:

> "Michael S. Tsirkin" <mst@redhat.com> writes:
> > On Tue, Feb 10, 2015 at 12:02:37PM +1030, Rusty Russell wrote:
> >> Jason Wang <jasowang@redhat.com> writes:
> >> > This patch enables the interrupt coalescing setting through ethtool.
> >> 
> >> The problem is that there's nothing network specific about interrupt
> >> coalescing.  I can see other devices wanting exactly the same thing,
> >> which means we'd deprecate this in the next virtio standard.
> >> 
> >> I think the right answer is to extend like we did with
> >> vring_used_event(), eg:
> >> 
> >> 1) Add a new feature VIRTIO_F_RING_COALESCE.
> >> 2) Add another a 32-bit field after vring_used_event(), eg:
> >>         #define vring_used_delay(vr) (*(u32 *)((vr)->avail->ring[(vr)->num + 2]))
> >> 
> >> This loses the ability to coalesce by number of frames, but we can still
> >> do number of sg entries, as we do now with used_event, and we could
> >> change virtqueue_enable_cb_delayed() to take a precise number if we
> >> wanted.
> >
> > But do we expect delay to be update dynamically?
> > If not, why not stick it in config space?
> 
> Hmm, we could update it dynamically (and will, in the case of ethtool).
> But it won't be common, so we could append a field to
> virtio_pci_common_cfg for PCI.
> 
> I think MMIO and CCW would be easy to extend too, but CC'd to check.

If this is a simple extension of the config space, it should just work
for ccw (the Linux guest driver currently uses 0x100 as max config
space size, which I grabbed from pci at the time I wrote it).

But looking at this virtio_pci_common_cfg stuff, it seems to contain a
lot of things that are handled via ccws on virtio-ccw (like number of
queues or device status). Having an extra ccw just for changing this
delay value seems like overkill.

On the basic topic of interrupt coalescing: With adapter interrupts,
virtio-ccw already has some kind of coalescing: The summary indicator
is set just once and an interrupt is made pending, then individual
queue indicators are switched on and no further interrupt is generated
if the summary indicator has not been cleared by the guest yet. I'm not
sure how it would be different if an individual queue indicator is
switched on later. Chances are that the guest code processing the
indicators has not even yet processed to that individual indicator, so
it wouldn't matter if it was set delayed. It is probably something that
has to be tried out.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rusty Russell Feb. 16, 2015, 3:07 a.m. UTC | #8
Pawel Moll <pawel.moll@arm.com> writes:
> On Fri, 2015-02-13 at 02:52 +0000, Rusty Russell wrote:
>> "Michael S. Tsirkin" <mst@redhat.com> writes:
>> > On Tue, Feb 10, 2015 at 12:02:37PM +1030, Rusty Russell wrote:
>> >> Jason Wang <jasowang@redhat.com> writes:
>> >> > This patch enables the interrupt coalescing setting through ethtool.
>> >> 
>> >> The problem is that there's nothing network specific about interrupt
>> >> coalescing.  I can see other devices wanting exactly the same thing,
>> >> which means we'd deprecate this in the next virtio standard.
>> >> 
>> >> I think the right answer is to extend like we did with
>> >> vring_used_event(), eg:
>> >> 
>> >> 1) Add a new feature VIRTIO_F_RING_COALESCE.
>> >> 2) Add another a 32-bit field after vring_used_event(), eg:
>> >>         #define vring_used_delay(vr) (*(u32 *)((vr)->avail->ring[(vr)->num + 2]))
>> >> 
>> >> This loses the ability to coalesce by number of frames, but we can still
>> >> do number of sg entries, as we do now with used_event, and we could
>> >> change virtqueue_enable_cb_delayed() to take a precise number if we
>> >> wanted.
>> >
>> > But do we expect delay to be update dynamically?
>> > If not, why not stick it in config space?
>> 
>> Hmm, we could update it dynamically (and will, in the case of ethtool).
>> But it won't be common, so we could append a field to
>> virtio_pci_common_cfg for PCI.
>> 
>> I think MMIO and CCW would be easy to extend too, but CC'd to check.
>
> As far as I understand the virtio_pci_common_cfg principle (just had a
> look, for the first time ;-), it's now an equivalent of the MMIO control
> registers block. I see no major problem with adding another one.

OK, thanks.

> Or were you thinking about introducing some standard for the "real"
> config space? (fine with me as well - the transport will have nothing to
> do :-)

No, that'd not be possible at this point.  I think it's a per-transport
decision.

Cheers,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rusty Russell Feb. 16, 2015, 3:19 a.m. UTC | #9
Cornelia Huck <cornelia.huck@de.ibm.com> writes:
> On Fri, 13 Feb 2015 13:22:09 +1030
> Rusty Russell <rusty@rustcorp.com.au> wrote:
>
>> "Michael S. Tsirkin" <mst@redhat.com> writes:
>> > On Tue, Feb 10, 2015 at 12:02:37PM +1030, Rusty Russell wrote:
>> >> Jason Wang <jasowang@redhat.com> writes:
>> >> > This patch enables the interrupt coalescing setting through ethtool.
>> >> 
>> >> The problem is that there's nothing network specific about interrupt
>> >> coalescing.  I can see other devices wanting exactly the same thing,
>> >> which means we'd deprecate this in the next virtio standard.
>> >> 
>> >> I think the right answer is to extend like we did with
>> >> vring_used_event(), eg:
>> >> 
>> >> 1) Add a new feature VIRTIO_F_RING_COALESCE.
>> >> 2) Add another a 32-bit field after vring_used_event(), eg:
>> >>         #define vring_used_delay(vr) (*(u32 *)((vr)->avail->ring[(vr)->num + 2]))
>> >> 
>> >> This loses the ability to coalesce by number of frames, but we can still
>> >> do number of sg entries, as we do now with used_event, and we could
>> >> change virtqueue_enable_cb_delayed() to take a precise number if we
>> >> wanted.
>> >
>> > But do we expect delay to be update dynamically?
>> > If not, why not stick it in config space?
>> 
>> Hmm, we could update it dynamically (and will, in the case of ethtool).
>> But it won't be common, so we could append a field to
>> virtio_pci_common_cfg for PCI.
>> 
>> I think MMIO and CCW would be easy to extend too, but CC'd to check.
>
> If this is a simple extension of the config space, it should just work
> for ccw (the Linux guest driver currently uses 0x100 as max config
> space size, which I grabbed from pci at the time I wrote it).
>
> But looking at this virtio_pci_common_cfg stuff, it seems to contain a
> lot of things that are handled via ccws on virtio-ccw (like number of
> queues or device status). Having an extra ccw just for changing this
> delay value seems like overkill.

Yes, possibly.

> On the basic topic of interrupt coalescing: With adapter interrupts,
> virtio-ccw already has some kind of coalescing: The summary indicator
> is set just once and an interrupt is made pending, then individual
> queue indicators are switched on and no further interrupt is generated
> if the summary indicator has not been cleared by the guest yet. I'm not
> sure how it would be different if an individual queue indicator is
> switched on later. Chances are that the guest code processing the
> indicators has not even yet processed to that individual indicator, so
> it wouldn't matter if it was set delayed. It is probably something that
> has to be tried out.

The network driver will do this at the virtio level too: no more rx
interrupts will be received until all packets have been processed.

But it is particularly useful for network transmit interrupts: we want
to be notified of the packet's finishing, but a little delay (hence more
batching) is better.  For rx, I can envision a case where the guest is
too fast and thus keeps getting interrupted after each packet.  A user
might decide to trade off some latency to increase batching; seems
like a bit like a benchmark hack to me, though...

Cheers,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index cc5f5de..2b958fb 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -145,6 +145,11 @@  struct virtnet_info {
 
 	/* Budget for polling tx completion */
 	u32 tx_work_limit;
+
+	__u32 rx_coalesce_usecs;
+	__u32 rx_max_coalesced_frames;
+	__u32 tx_coalesce_usecs;
+	__u32 tx_max_coalesced_frames;
 };
 
 struct padded_vnet_hdr {
@@ -1404,12 +1409,73 @@  static void virtnet_get_channels(struct net_device *dev,
 	channels->other_count = 0;
 }
 
+static int virtnet_set_coalesce(struct net_device *dev,
+				struct ethtool_coalesce *ec)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	struct scatterlist sg;
+	struct virtio_net_ctrl_coalesce c;
+
+	if (!vi->has_cvq ||
+	    !virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_COALESCE))
+		return -EOPNOTSUPP;
+	if (vi->rx_coalesce_usecs != ec->rx_coalesce_usecs ||
+	    vi->rx_max_coalesced_frames != ec->rx_max_coalesced_frames) {
+		c.coalesce_usecs = ec->rx_coalesce_usecs;
+		c.max_coalesced_frames = ec->rx_max_coalesced_frames;
+		sg_init_one(&sg, &c, sizeof(c));
+		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_COALESCE,
+					  VIRTIO_NET_CTRL_COALESCE_RX_SET,
+					  &sg)) {
+			dev_warn(&dev->dev, "Fail to set rx coalescing\n");
+			return -EINVAL;
+		}
+		vi->rx_coalesce_usecs = ec->rx_coalesce_usecs;
+		vi->rx_max_coalesced_frames = ec->rx_max_coalesced_frames;
+	}
+
+	if (vi->tx_coalesce_usecs != ec->tx_coalesce_usecs ||
+	    vi->tx_max_coalesced_frames != ec->tx_max_coalesced_frames) {
+		c.coalesce_usecs = ec->tx_coalesce_usecs;
+		c.max_coalesced_frames = ec->tx_max_coalesced_frames;
+		sg_init_one(&sg, &c, sizeof(c));
+		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_COALESCE,
+					  VIRTIO_NET_CTRL_COALESCE_TX_SET,
+					  &sg)) {
+			dev_warn(&dev->dev, "Fail to set tx coalescing\n");
+			return -EINVAL;
+		}
+		vi->tx_coalesce_usecs = ec->tx_coalesce_usecs;
+		vi->tx_max_coalesced_frames = ec->tx_max_coalesced_frames;
+	}
+
+	vi->tx_work_limit = ec->tx_max_coalesced_frames_irq;
+
+	return 0;
+}
+
+static int virtnet_get_coalesce(struct net_device *dev,
+				struct ethtool_coalesce *ec)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+
+	ec->rx_coalesce_usecs = vi->rx_coalesce_usecs;
+	ec->rx_max_coalesced_frames = vi->rx_max_coalesced_frames;
+	ec->tx_coalesce_usecs = vi->tx_coalesce_usecs;
+	ec->tx_max_coalesced_frames = vi->tx_max_coalesced_frames;
+	ec->tx_max_coalesced_frames_irq = vi->tx_work_limit;
+
+	return 0;
+}
+
 static const struct ethtool_ops virtnet_ethtool_ops = {
 	.get_drvinfo = virtnet_get_drvinfo,
 	.get_link = ethtool_op_get_link,
 	.get_ringparam = virtnet_get_ringparam,
 	.set_channels = virtnet_set_channels,
 	.get_channels = virtnet_get_channels,
+	.set_coalesce = virtnet_set_coalesce,
+	.get_coalesce = virtnet_get_coalesce,
 };
 
 #define MIN_MTU 68
@@ -2048,6 +2114,7 @@  static unsigned int features[] = {
 	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
 	VIRTIO_NET_F_CTRL_MAC_ADDR,
 	VIRTIO_F_ANY_LAYOUT,
+	VIRTIO_NET_F_CTRL_COALESCE,
 };
 
 static struct virtio_driver virtio_net_driver = {
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index b5f1677..332009d 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -34,6 +34,7 @@ 
 /* The feature bitmap for virtio net */
 #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
 #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
+#define VIRTIO_NET_F_CTRL_COALESCE 3	/* Set coalescing */
 #define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
 #define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
 #define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
@@ -202,4 +203,15 @@  struct virtio_net_ctrl_mq {
  #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
  #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
 
+struct virtio_net_ctrl_coalesce {
+	__u32 coalesce_usecs;
+	__u32 max_coalesced_frames;
+};
+
+#define VIRTIO_NET_CTRL_COALESCE 6
+ #define VIRTIO_NET_CTRL_COALESCE_TX_SET 0
+ #define VIRTIO_NET_CTRL_COALESCE_TX_GET 1
+ #define VIRTIO_NET_CTRL_COALESCE_RX_SET 2
+ #define VIRTIO_NET_CTRL_COALESCE_RX_GET 3
+
 #endif /* _LINUX_VIRTIO_NET_H */