diff mbox

[RFC,1/2] net: Add new network device function to allow for MMIO batching

Message ID 20120712002603.27846.23752.stgit@gitlad.jf.intel.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Duyck, Alexander H July 12, 2012, 12:26 a.m. UTC
This change adds capabilities to the driver for batching the MMIO write
involved with transmits.  Most of the logic is based off of the code for
the qdisc scheduling.

What I did is break the transmit path into two parts.  We already had the
ndo_start_xmit function which has been there all along.  The part I added
was ndo_complete_xmit which is meant to handle notifying the hardware that
frames are ready for delivery.

To control all of this I added a net sysfs value for the Tx queues called
dispatch_limit.  When 0 it indicates that all frames will notify hardware
immediately.  When 1 or more the netdev_complete_xmit call will queue up to
that number of packets, and when the value is exceeded it will notify the
hardware and reset the pending frame dispatch count.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---

 include/linux/netdevice.h |   57 ++++++++++++++++++++++++++++++++++++++
 net/core/dev.c            |   67 +++++++++++++++++++++++++++++++++++++++++++++
 net/core/net-sysfs.c      |   36 ++++++++++++++++++++++++
 3 files changed, 160 insertions(+), 0 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eric Dumazet July 12, 2012, 7:14 a.m. UTC | #1
On Wed, 2012-07-11 at 17:26 -0700, Alexander Duyck wrote:
> This change adds capabilities to the driver for batching the MMIO write
> involved with transmits.  Most of the logic is based off of the code for
> the qdisc scheduling.
> 
> What I did is break the transmit path into two parts.  We already had the
> ndo_start_xmit function which has been there all along.  The part I added
> was ndo_complete_xmit which is meant to handle notifying the hardware that
> frames are ready for delivery.
> 
> To control all of this I added a net sysfs value for the Tx queues called
> dispatch_limit.  When 0 it indicates that all frames will notify hardware
> immediately.  When 1 or more the netdev_complete_xmit call will queue up to
> that number of packets, and when the value is exceeded it will notify the
> hardware and reset the pending frame dispatch count.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
> ---

The idea is good, but do we really need so complex schem ?

Most of the transmits are done from __qdisc_run()

We could add logic in __qdisc_run()/qdisc_restart()

qdisc_run_end() would then have to call ndo_complete_xmit() to make
sure the MMIO is done.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duyck, Alexander H July 12, 2012, 3:39 p.m. UTC | #2
On 07/12/2012 12:14 AM, Eric Dumazet wrote:
> On Wed, 2012-07-11 at 17:26 -0700, Alexander Duyck wrote:
>> This change adds capabilities to the driver for batching the MMIO write
>> involved with transmits.  Most of the logic is based off of the code for
>> the qdisc scheduling.
>>
>> What I did is break the transmit path into two parts.  We already had the
>> ndo_start_xmit function which has been there all along.  The part I added
>> was ndo_complete_xmit which is meant to handle notifying the hardware that
>> frames are ready for delivery.
>>
>> To control all of this I added a net sysfs value for the Tx queues called
>> dispatch_limit.  When 0 it indicates that all frames will notify hardware
>> immediately.  When 1 or more the netdev_complete_xmit call will queue up to
>> that number of packets, and when the value is exceeded it will notify the
>> hardware and reset the pending frame dispatch count.
>>
>> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
>> ---
> The idea is good, but do we really need so complex schem ?
>
> Most of the transmits are done from __qdisc_run()
>
> We could add logic in __qdisc_run()/qdisc_restart()
>
> qdisc_run_end() would then have to call ndo_complete_xmit() to make
> sure the MMIO is done.

The problem is in both of the cases where I have seen the issue the
qdisc is actually empty.

In the case of pktgen it does not use the qdisc layer at all.  It just
directly calls ndo_start_xmit.

In the standard networking case we never fill the qdisc because the MMIO
write stalls the entire CPU so the application never gets a chance to
get ahead of the hardware.  From what I can tell the only case in which
the qdisc_run solution would work is if the ndo_start_xmit was called on
a different CPU from the application that is doing the transmitting.

Thanks,

Alex
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet July 13, 2012, 7:19 a.m. UTC | #3
On Wed, 2012-07-11 at 17:26 -0700, Alexander Duyck wrote:

> +static inline void netdev_complete_xmit(struct netdev_queue *txq)
> +{
> +	struct net_device *dev = txq->dev;
> +	const struct net_device_ops *ops = dev->netdev_ops;
> +
> +	if (txq->dispatch_pending < txq->dispatch_limit) {
> +		if (netif_tx_queue_delayed(txq)) {
> +			txq->dispatch_pending++;
> +			return;
> +		}
> +
> +		/* start of delayed write sequence */
> +		netif_tx_delay_queue(txq);

	I dont understand this part. Isnt a return missing here ?

> +	}
> +
> +	txq->dispatch_pending = 0;
> +
> +	ops->ndo_complete_xmit(dev, txq - &dev->_tx[0]);
> +}
> +


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet July 13, 2012, 7:38 a.m. UTC | #4
On Thu, 2012-07-12 at 08:39 -0700, Alexander Duyck wrote:

> The problem is in both of the cases where I have seen the issue the
> qdisc is actually empty.
> 

You mean a router workload, with links of same bandwidth.
(BQL doesnt trigger)

Frankly what percentage of linux powered machines act as high perf
routers ?

> In the case of pktgen it does not use the qdisc layer at all.  It just
> directly calls ndo_start_xmit.

pktgen is in kernel, adding a complete() call in it is certainly ok,
if we can avoid kernel bloat.

I mean, pktgen represents less than 0.000001 % of real workloads.

> 
> In the standard networking case we never fill the qdisc because the MMIO
> write stalls the entire CPU so the application never gets a chance to
> get ahead of the hardware.  From what I can tell the only case in which
> the qdisc_run solution would work is if the ndo_start_xmit was called on
> a different CPU from the application that is doing the transmitting.

Hey, I can tell that qdisc is not empty on many workloads.
But BQL and TSO mean we only send one or two packets per qdisc run.

I understand this MMIO batching helps routers workloads, or workloads
using many small packets.

But on other workloads, this adds a significant latency source
(NET_TX_SOFTIRQ)

It would be good to instrument the extra delay on a single UDP send.

(entering do_softirq() path is not a few instructions...)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duyck, Alexander H July 13, 2012, 3:37 p.m. UTC | #5
On 07/13/2012 12:38 AM, Eric Dumazet wrote:
> On Thu, 2012-07-12 at 08:39 -0700, Alexander Duyck wrote:
>
>> The problem is in both of the cases where I have seen the issue the
>> qdisc is actually empty.
>>
> You mean a router workload, with links of same bandwidth.
> (BQL doesnt trigger)
>
> Frankly what percentage of linux powered machines act as high perf
> routers ?
Actually I was seeing this issue with the sending application on the
same CPU as the Tx cleanup.  The problem was the CPU would stall and
consume cycles instead of putting work into placing more packets on the
queue. 

>> In the case of pktgen it does not use the qdisc layer at all.  It just
>> directly calls ndo_start_xmit.
> pktgen is in kernel, adding a complete() call in it is certainly ok,
> if we can avoid kernel bloat.
>
> I mean, pktgen represents less than 0.000001 % of real workloads.
I realize that, but it does provide a valid means of stress testing an
interface and demonstrating that the MMIO writes are causing significant
stalls and bus utilization.

>> In the standard networking case we never fill the qdisc because the MMIO
>> write stalls the entire CPU so the application never gets a chance to
>> get ahead of the hardware.  From what I can tell the only case in which
>> the qdisc_run solution would work is if the ndo_start_xmit was called on
>> a different CPU from the application that is doing the transmitting.
> Hey, I can tell that qdisc is not empty on many workloads.
> But BQL and TSO mean we only send one or two packets per qdisc run.
>
> I understand this MMIO batching helps routers workloads, or workloads
> using many small packets.
>
> But on other workloads, this adds a significant latency source
> (NET_TX_SOFTIRQ)
>
> It would be good to instrument the extra delay on a single UDP send.
>
> (entering do_softirq() path is not a few instructions...)
These kind of issues are one of the reasons why this feature is disabled
by default.  You have to explicitly enable it by setting the
dispatch_limit to something other than 0.

I suppose I could just make it a part of the Tx cleanup itself since I
am only doing a trylock instead of waiting and taking the full lock.  I
am open to any other suggestions for alternatives other than NET_TX_SOFTIRQ.

Thanks,

Alex
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duyck, Alexander H July 13, 2012, 3:49 p.m. UTC | #6
On 07/13/2012 12:19 AM, Eric Dumazet wrote:
> On Wed, 2012-07-11 at 17:26 -0700, Alexander Duyck wrote:
>
>> +static inline void netdev_complete_xmit(struct netdev_queue *txq)
>> +{
>> +	struct net_device *dev = txq->dev;
>> +	const struct net_device_ops *ops = dev->netdev_ops;
>> +
>> +	if (txq->dispatch_pending < txq->dispatch_limit) {
>> +		if (netif_tx_queue_delayed(txq)) {
>> +			txq->dispatch_pending++;
>> +			return;
>> +		}
>> +
>> +		/* start of delayed write sequence */
>> +		netif_tx_delay_queue(txq);
> 	I dont understand this part. Isnt a return missing here ?
>
>> +	}
>> +
>> +	txq->dispatch_pending = 0;
>> +
>> +	ops->ndo_complete_xmit(dev, txq - &dev->_tx[0]);
>> +}
>> +
>
There is intentionally no return there.  The idea is that the first
packet always gets through.  It is what is going to later force the
interrupt that will force the final flush if it is needed.  That is one
of the ways I am helping to reduce the latency of things such as TSO
which will only be using one or two frames per interrupt anyway.

Thanks,

Alex
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger July 13, 2012, 3:50 p.m. UTC | #7
On Fri, 13 Jul 2012 09:38:49 +0200
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Frankly what percentage of linux powered machines act as high perf
> routers ?

More than you think, every Linux machine acting as hypervisor (Xen and KVM)
is also doing this.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet July 13, 2012, 4:18 p.m. UTC | #8
On Fri, 2012-07-13 at 08:49 -0700, Alexander Duyck wrote:
> On 07/13/2012 12:19 AM, Eric Dumazet wrote:
> > On Wed, 2012-07-11 at 17:26 -0700, Alexander Duyck wrote:
> >
> >> +static inline void netdev_complete_xmit(struct netdev_queue *txq)
> >> +{
> >> +	struct net_device *dev = txq->dev;
> >> +	const struct net_device_ops *ops = dev->netdev_ops;
> >> +
> >> +	if (txq->dispatch_pending < txq->dispatch_limit) {
> >> +		if (netif_tx_queue_delayed(txq)) {
> >> +			txq->dispatch_pending++;
> >> +			return;
> >> +		}
> >> +
> >> +		/* start of delayed write sequence */
> >> +		netif_tx_delay_queue(txq);
> > 	I dont understand this part. Isnt a return missing here ?
> >
> >> +	}
> >> +
> >> +	txq->dispatch_pending = 0;
> >> +
> >> +	ops->ndo_complete_xmit(dev, txq - &dev->_tx[0]);
> >> +}
> >> +
> >
> There is intentionally no return there.  The idea is that the first
> packet always gets through.  It is what is going to later force the
> interrupt that will force the final flush if it is needed.  That is one
> of the ways I am helping to reduce the latency of things such as TSO
> which will only be using one or two frames per interrupt anyway.


So for a single packet, we only trigger TX softirq do do nothing at all,
or worse the ndo_complete_xmit() is done twice ?

It looks like you need to add comments, because if I dont understand
this code, who will ?



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet July 13, 2012, 4:23 p.m. UTC | #9
On Fri, 2012-07-13 at 08:50 -0700, Stephen Hemminger wrote:
> On Fri, 13 Jul 2012 09:38:49 +0200
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
> > Frankly what percentage of linux powered machines act as high perf
> > routers ?
> 
> More than you think, every Linux machine acting as hypervisor (Xen and KVM)
> is also doing this.


High perf router meant : ability to route 10 Mpps, without help of TSO.

If an hypervisor is enable to use TSO, I expect very bad performances
anyway.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5a1a657..8d50fc4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -522,6 +522,8 @@  enum netdev_queue_state_t {
 	__QUEUE_STATE_DRV_XOFF,
 	__QUEUE_STATE_STACK_XOFF,
 	__QUEUE_STATE_FROZEN,
+	__QUEUE_STATE_DELAYED,
+	__QUEUE_STATE_DISPATCH,
 #define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF)		| \
 			      (1 << __QUEUE_STATE_STACK_XOFF))
 #define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF		| \
@@ -550,6 +552,7 @@  struct netdev_queue {
 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
 	int			numa_node;
 #endif
+	unsigned int		dispatch_limit;
 /*
  * write mostly part
  */
@@ -561,6 +564,11 @@  struct netdev_queue {
 	unsigned long		trans_start;
 
 	/*
+	 * pointer to next Tx queue in dispatch_queue
+	 */
+	struct netdev_queue	*next_dispatch;
+
+	/*
 	 * Number of TX timeouts for this queue
 	 * (/sys/class/net/DEV/Q/trans_timeout)
 	 */
@@ -568,6 +576,8 @@  struct netdev_queue {
 
 	unsigned long		state;
 
+	unsigned int		dispatch_pending;
+
 #ifdef CONFIG_BQL
 	struct dql		dql;
 #endif
@@ -924,6 +934,8 @@  struct net_device_ops {
 	int			(*ndo_stop)(struct net_device *dev);
 	netdev_tx_t		(*ndo_start_xmit) (struct sk_buff *skb,
 						   struct net_device *dev);
+	void			(*ndo_complete_xmit) (struct net_device *dev,
+						      unsigned int queue);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb);
 	void			(*ndo_change_rx_flags)(struct net_device *dev,
@@ -1760,6 +1772,9 @@  struct softnet_data {
 	unsigned int		dropped;
 	struct sk_buff_head	input_pkt_queue;
 	struct napi_struct	backlog;
+
+	struct netdev_queue	*dispatch_queue;
+	struct netdev_queue	**dispatch_queue_tailp;
 };
 
 static inline void input_queue_head_incr(struct softnet_data *sd)
@@ -1779,6 +1794,44 @@  static inline void input_queue_tail_incr_save(struct softnet_data *sd,
 
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
+static inline void netif_tx_delay_queue(struct netdev_queue *txq)
+{
+	set_bit(__QUEUE_STATE_DELAYED, &txq->state);
+}
+
+extern void __netif_tx_dispatch_queue(struct netdev_queue *txq);
+
+static inline void netif_tx_dispatch_queue(struct netdev_queue *txq)
+{
+	if (test_and_clear_bit(__QUEUE_STATE_DELAYED, &txq->state))
+		__netif_tx_dispatch_queue(txq);
+}
+
+static inline bool netif_tx_queue_delayed(const struct netdev_queue *txq)
+{
+	return test_bit(__QUEUE_STATE_DELAYED, &txq->state);
+}
+
+static inline void netdev_complete_xmit(struct netdev_queue *txq)
+{
+	struct net_device *dev = txq->dev;
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (txq->dispatch_pending < txq->dispatch_limit) {
+		if (netif_tx_queue_delayed(txq)) {
+			txq->dispatch_pending++;
+			return;
+		}
+
+		/* start of delayed write sequence */
+		netif_tx_delay_queue(txq);
+	}
+
+	txq->dispatch_pending = 0;
+
+	ops->ndo_complete_xmit(dev, txq - &dev->_tx[0]);
+}
+
 extern void __netif_schedule(struct Qdisc *q);
 
 static inline void netif_schedule_queue(struct netdev_queue *txq)
@@ -1973,6 +2026,7 @@  static inline void netdev_completed_queue(struct net_device *dev,
 
 static inline void netdev_tx_reset_queue(struct netdev_queue *q)
 {
+	clear_bit(__QUEUE_STATE_DELAYED, &q->state);
 #ifdef CONFIG_BQL
 	clear_bit(__QUEUE_STATE_STACK_XOFF, &q->state);
 	dql_reset(&q->dql);
@@ -2482,6 +2536,9 @@  static inline void netif_tx_unlock_bh(struct net_device *dev)
 	}						\
 }
 
+#define HARD_TX_TRYLOCK(dev, txq)			\
+	((dev->features & NETIF_F_LLTX) || __netif_tx_trylock(txq))
+
 static inline void netif_tx_disable(struct net_device *dev)
 {
 	unsigned int i;
diff --git a/net/core/dev.c b/net/core/dev.c
index 93af533..a72669a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2032,6 +2032,27 @@  int netif_get_num_default_rss_queues(void)
 }
 EXPORT_SYMBOL(netif_get_num_default_rss_queues);
 
+static inline void __netif_tx_redispatch_queue(struct netdev_queue *txq)
+{
+	struct softnet_data *sd;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	sd = &__get_cpu_var(softnet_data);
+	txq->next_dispatch = NULL;
+	sd->dispatch_queue = txq;
+	sd->dispatch_queue_tailp = &txq->next_dispatch;
+	raise_softirq_irqoff(NET_TX_SOFTIRQ);
+	local_irq_restore(flags);
+}
+
+void __netif_tx_dispatch_queue(struct netdev_queue *txq)
+{
+	if (!test_and_set_bit(__QUEUE_STATE_DISPATCH, &txq->state))
+		__netif_tx_redispatch_queue(txq);
+}
+EXPORT_SYMBOL(__netif_tx_dispatch_queue);
+
 static inline void __netif_reschedule(struct Qdisc *q)
 {
 	struct softnet_data *sd;
@@ -3268,6 +3289,41 @@  static void net_tx_action(struct softirq_action *h)
 			}
 		}
 	}
+
+	if (sd->dispatch_queue) {
+		struct netdev_queue *head;
+
+		local_irq_disable();
+		head = sd->dispatch_queue;
+		sd->dispatch_queue = NULL;
+		sd->dispatch_queue_tailp = &sd->dispatch_queue;
+		local_irq_enable();
+
+		while (head) {
+			struct netdev_queue *txq = head;
+			struct net_device *dev = txq->dev;
+			const struct net_device_ops *ops = dev->netdev_ops;
+
+			head = head->next_dispatch;
+
+			if (!HARD_TX_TRYLOCK(dev, txq)) {
+				__netif_tx_redispatch_queue(txq);
+				continue;
+			}
+
+			smp_mb__before_clear_bit();
+			clear_bit(__QUEUE_STATE_DISPATCH, &txq->state);
+
+			if (txq->dispatch_pending &&
+			    !netif_tx_queue_delayed(txq)) {
+				int index = txq - &dev->_tx[0];
+
+				ops->ndo_complete_xmit(dev, index);
+			}
+
+			HARD_TX_UNLOCK(dev, txq);
+		}
+	}
 }
 
 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
@@ -6485,6 +6541,15 @@  static int dev_cpu_callback(struct notifier_block *nfb,
 		oldsd->output_queue = NULL;
 		oldsd->output_queue_tailp = &oldsd->output_queue;
 	}
+
+	/* Append delayed xmit queue from offline CPU */
+	if (oldsd->dispatch_queue) {
+		*sd->dispatch_queue_tailp = oldsd->dispatch_queue;
+		sd->dispatch_queue_tailp = oldsd->dispatch_queue_tailp;
+		oldsd->dispatch_queue = NULL;
+		oldsd->dispatch_queue_tailp = &oldsd->dispatch_queue;
+	}
+
 	/* Append NAPI poll list from offline CPU. */
 	if (!list_empty(&oldsd->poll_list)) {
 		list_splice_init(&oldsd->poll_list, &sd->poll_list);
@@ -6772,6 +6837,8 @@  static int __init net_dev_init(void)
 		INIT_LIST_HEAD(&sd->poll_list);
 		sd->output_queue = NULL;
 		sd->output_queue_tailp = &sd->output_queue;
+		sd->dispatch_queue = NULL;
+		sd->dispatch_queue_tailp = &sd->dispatch_queue;
 #ifdef CONFIG_RPS
 		sd->csd.func = rps_trigger_softirq;
 		sd->csd.info = sd;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 42bb496..4f7eb58 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -997,11 +997,47 @@  static struct netdev_queue_attribute xps_cpus_attribute =
     __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
 #endif /* CONFIG_XPS */
 
+static ssize_t show_dispatch_limit(struct netdev_queue *queue,
+				   struct netdev_queue_attribute *attribute,
+				   char *buf)
+{
+	unsigned int dispatch_limit;
+
+	spin_lock_irq(&queue->_xmit_lock);
+	dispatch_limit = queue->dispatch_limit;
+	spin_unlock_irq(&queue->_xmit_lock);
+
+	return sprintf(buf, "%u\n", dispatch_limit);
+}
+
+static ssize_t store_dispatch_limit(struct netdev_queue *queue,
+				    struct netdev_queue_attribute *attribute,
+				    const char *buf, size_t len)
+{
+	unsigned int dispatch_limit;
+	int err;
+
+	err = kstrtouint(buf, 10, &dispatch_limit);
+	if (err < 0)
+		return err;
+
+	spin_lock_irq(&queue->_xmit_lock);
+	queue->dispatch_limit = dispatch_limit;
+	spin_unlock_irq(&queue->_xmit_lock);
+
+	return len;
+}
+
+static struct netdev_queue_attribute dispatch_limit_attribute =
+	__ATTR(dispatch_limit, S_IRUGO | S_IWUSR,
+	       show_dispatch_limit, store_dispatch_limit);
+
 static struct attribute *netdev_queue_default_attrs[] = {
 	&queue_trans_timeout.attr,
 #ifdef CONFIG_XPS
 	&xps_cpus_attribute.attr,
 #endif
+	&dispatch_limit_attribute.attr,
 	NULL
 };