diff mbox

[2/2] ixgbe: fix select_queue management (v2)

Message ID 20090320091247.38326931@nehalam
State Rejected, archived
Delegated to: David Miller
Headers show

Commit Message

stephen hemminger March 20, 2009, 4:12 p.m. UTC
Convert ixgbe to use net_device_ops properly.
Rather than changing the select_queue function pointer
just change number of available transmit queues.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

---
 drivers/net/ixgbe/ixgbe_dcb_nl.c |   48 +++++++++++++++++----------------------
 1 file changed, 21 insertions(+), 27 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Waskiewicz Jr, Peter P March 21, 2009, 3:48 a.m. UTC | #1
On Fri, 20 Mar 2009, Stephen Hemminger wrote:

> Convert ixgbe to use net_device_ops properly.
> Rather than changing the select_queue function pointer
> just change number of available transmit queues.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> ---
>  drivers/net/ixgbe/ixgbe_dcb_nl.c |   48 +++++++++++++++++----------------------
>  1 file changed, 21 insertions(+), 27 deletions(-)
> 
> --- a/drivers/net/ixgbe/ixgbe_dcb_nl.c	2009-03-20 09:01:19.643651162 -0700
> +++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c	2009-03-20 09:11:09.645652169 -0700
> @@ -102,12 +102,6 @@ static u8 ixgbe_dcbnl_get_state(struct n
>  	return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED);
>  }
>  
> -static u16 ixgbe_dcb_select_queue(struct net_device *dev, struct sk_buff *skb)
> -{
> -	/* All traffic should default to class 0 */
> -	return 0;
> -}
> -
>  static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
>  {
>  	u8 err = 0;
> @@ -135,7 +129,7 @@ static u8 ixgbe_dcbnl_set_state(struct n
>  		kfree(adapter->rx_ring);
>  		adapter->tx_ring = NULL;
>  		adapter->rx_ring = NULL;
> -		netdev->select_queue = &ixgbe_dcb_select_queue;
> +		netdev->real_num_tx_queues = 1;

NAK.  The point of dcb_select_queue() isn't because DCB mode only uses 1 
Tx queue.  DCB has 8 priorities, and allocates 8 Tx queues, one for each 
priority.  The DCB spec says that any traffic not being filtered by some 
kind of mechanism needs to go through priority 0, or queue 0.  So 
select_queue is meant to tag all traffic to queue 0, then have the 
attached qdisc and tc filters get the majority of the traffic into the 
different priority queues.

If we did not push the unfiltered traffic into queue 0, then skb_tx_hash() 
would put traffic randomly into queues with higher priority, which is not 
what we want.

I'd prefer your original patch to fix this up, where you check if DCB is 
enabled, and return 0.

-PJ Waskiewicz
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger March 21, 2009, 4:45 a.m. UTC | #2
On Fri, 20 Mar 2009 20:48:46 -0700 (Pacific Daylight Time)
"Waskiewicz Jr, Peter P" <peter.p.waskiewicz.jr@intel.com> wrote:

> On Fri, 20 Mar 2009, Stephen Hemminger wrote:
> 
> > Convert ixgbe to use net_device_ops properly.
> > Rather than changing the select_queue function pointer
> > just change number of available transmit queues.
> > 
> > Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> > 
> > ---
> >  drivers/net/ixgbe/ixgbe_dcb_nl.c |   48 +++++++++++++++++----------------------
> >  1 file changed, 21 insertions(+), 27 deletions(-)
> > 
> > --- a/drivers/net/ixgbe/ixgbe_dcb_nl.c	2009-03-20 09:01:19.643651162 -0700
> > +++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c	2009-03-20 09:11:09.645652169 -0700
> > @@ -102,12 +102,6 @@ static u8 ixgbe_dcbnl_get_state(struct n
> >  	return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED);
> >  }
> >  
> > -static u16 ixgbe_dcb_select_queue(struct net_device *dev, struct sk_buff *skb)
> > -{
> > -	/* All traffic should default to class 0 */
> > -	return 0;
> > -}
> > -
> >  static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
> >  {
> >  	u8 err = 0;
> > @@ -135,7 +129,7 @@ static u8 ixgbe_dcbnl_set_state(struct n
> >  		kfree(adapter->rx_ring);
> >  		adapter->tx_ring = NULL;
> >  		adapter->rx_ring = NULL;
> > -		netdev->select_queue = &ixgbe_dcb_select_queue;
> > +		netdev->real_num_tx_queues = 1;
> 
> NAK.  The point of dcb_select_queue() isn't because DCB mode only uses 1 
> Tx queue.  DCB has 8 priorities, and allocates 8 Tx queues, one for each 
> priority.  The DCB spec says that any traffic not being filtered by some 
> kind of mechanism needs to go through priority 0, or queue 0.  So 
> select_queue is meant to tag all traffic to queue 0, then have the 
> attached qdisc and tc filters get the majority of the traffic into the 
> different priority queues.
> 
> If we did not push the unfiltered traffic into queue 0, then skb_tx_hash() 
> would put traffic randomly into queues with higher priority, which is not 
> what we want.
> 
> I'd prefer your original patch to fix this up, where you check if DCB is 
> enabled, and return 0.
> 
> -PJ Waskiewicz

The default select queue function in the kernel is:

static struct netdev_queue *dev_pick_tx(struct net_device *dev,
					struct sk_buff *skb)
{
	const struct net_device_ops *ops = dev->netdev_ops;
	u16 queue_index = 0;

	if (ops->ndo_select_queue)
		queue_index = ops->ndo_select_queue(dev, skb);
	else if (dev->real_num_tx_queues > 1)
		queue_index = skb_tx_hash(dev, skb);

	skb_set_queue_mapping(skb, queue_index);
	return netdev_get_tx_queue(dev, queue_index);
}

So if driver (re)sets real_num_tx_queues to 1 then queue_index will always
0 and all traffic will go to one queue. This is the same as having your
own select_queue function.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Waskiewicz Jr, Peter P March 21, 2009, 6:21 a.m. UTC | #3
On Fri, 20 Mar 2009, Stephen Hemminger wrote:

> The default select queue function in the kernel is:
> 
> static struct netdev_queue *dev_pick_tx(struct net_device *dev,
> 					struct sk_buff *skb)
> {
> 	const struct net_device_ops *ops = dev->netdev_ops;
> 	u16 queue_index = 0;
> 
> 	if (ops->ndo_select_queue)
> 		queue_index = ops->ndo_select_queue(dev, skb);
> 	else if (dev->real_num_tx_queues > 1)
> 		queue_index = skb_tx_hash(dev, skb);
> 
> 	skb_set_queue_mapping(skb, queue_index);
> 	return netdev_get_tx_queue(dev, queue_index);
> }
> 
> So if driver (re)sets real_num_tx_queues to 1 then queue_index will always
> 0 and all traffic will go to one queue. This is the same as having your
> own select_queue function.

I see your point, but it is a hack in my opinion.  The device will have 8 
real Tx queues, not 1.  I'd much rather go with the original proposal, 
since if the code in dev_pick_tx() changed, it could silently break ixgbe.

-PJ Waskiewicz
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller March 21, 2009, 7:33 a.m. UTC | #4
From: "Waskiewicz Jr, Peter P" <peter.p.waskiewicz.jr@intel.com>
Date: Fri, 20 Mar 2009 23:21:38 -0700 (Pacific Daylight Time)

> I see your point, but it is a hack in my opinion.  The device will have 8 
> real Tx queues, not 1.  I'd much rather go with the original proposal, 
> since if the code in dev_pick_tx() changed, it could silently break ixgbe.

It can't, if you only advertise one transmit queue the kernel
can never ever choose anything other than queue zero.  It's
impossible.

Stephen's right, you guys don't need your select queue override.

And if you recall I suspected this from the very beginning.

You guys never ever think out of the box, ever...  if it's
not straightforward, you guys won't got for it.  That makes
it very frustrating to get anything done.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Waskiewicz Jr, Peter P March 21, 2009, 7:43 a.m. UTC | #5
On Sat, 21 Mar 2009, David Miller wrote:

> From: "Waskiewicz Jr, Peter P" <peter.p.waskiewicz.jr@intel.com>
> Date: Fri, 20 Mar 2009 23:21:38 -0700 (Pacific Daylight Time)
> 
> > I see your point, but it is a hack in my opinion.  The device will have 8 
> > real Tx queues, not 1.  I'd much rather go with the original proposal, 
> > since if the code in dev_pick_tx() changed, it could silently break ixgbe.
> 
> It can't, if you only advertise one transmit queue the kernel
> can never ever choose anything other than queue zero.  It's
> impossible.
> 
> Stephen's right, you guys don't need your select queue override.
> 
> And if you recall I suspected this from the very beginning.
> 
> You guys never ever think out of the box, ever...  if it's
> not straightforward, you guys won't got for it.  That makes
> it very frustrating to get anything done.

This patch will break DCB in ixgbe.  We need all 8 queues, because the 
user will be assigning tc filters to the sch_multiq qdisc to get traffic 
into priority queues.  If we take Stephen's patch and tell the stack we 
have 1 real_num_tx_queues, then we get 1 band in sch_multiq, which makes 
it impossible to assign traffic to priorities 1 through 8:

static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
{
        struct multiq_sched_data *q = qdisc_priv(sch);
        struct tc_multiq_qopt *qopt;
        int i;

        if (!netif_is_multiqueue(qdisc_dev(sch)))
                return -EOPNOTSUPP;
        if (nla_len(opt) < sizeof(*qopt))
                return -EINVAL;

        qopt = nla_data(opt);

        qopt->bands = qdisc_dev(sch)->real_num_tx_queues;

This is not what we want, rather, we want all 8 Tx queues that we expose.  
The only reason we override the select_queue is to catch the unfiltered 
traffic and send it to queue 0.

Cheers,
-PJ Waskiewicz
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger March 21, 2009, 7:39 p.m. UTC | #6
On Sat, 21 Mar 2009 00:43:40 -0700 (Pacific Daylight Time)
"Waskiewicz Jr, Peter P" <peter.p.waskiewicz.jr@intel.com> wrote:

> On Sat, 21 Mar 2009, David Miller wrote:
> 
> > From: "Waskiewicz Jr, Peter P" <peter.p.waskiewicz.jr@intel.com>
> > Date: Fri, 20 Mar 2009 23:21:38 -0700 (Pacific Daylight Time)
> > 
> > > I see your point, but it is a hack in my opinion.  The device will have 8 
> > > real Tx queues, not 1.  I'd much rather go with the original proposal, 
> > > since if the code in dev_pick_tx() changed, it could silently break ixgbe.
> > 
> > It can't, if you only advertise one transmit queue the kernel
> > can never ever choose anything other than queue zero.  It's
> > impossible.
> > 
> > Stephen's right, you guys don't need your select queue override.
> > 
> > And if you recall I suspected this from the very beginning.
> > 
> > You guys never ever think out of the box, ever...  if it's
> > not straightforward, you guys won't got for it.  That makes
> > it very frustrating to get anything done.
> 
> This patch will break DCB in ixgbe.  We need all 8 queues, because the 
> user will be assigning tc filters to the sch_multiq qdisc to get traffic 
> into priority queues.  If we take Stephen's patch and tell the stack we 
> have 1 real_num_tx_queues, then we get 1 band in sch_multiq, which makes 
> it impossible to assign traffic to priorities 1 through 8:
> 

How does it make sense to say you have 8 bands, but only use one.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Waskiewicz Jr, Peter P March 22, 2009, 1:48 a.m. UTC | #7
On Sat, 21 Mar 2009, Stephen Hemminger wrote:

> On Sat, 21 Mar 2009 00:43:40 -0700 (Pacific Daylight Time)
> "Waskiewicz Jr, Peter P" <peter.p.waskiewicz.jr@intel.com> wrote:
> 
> > On Sat, 21 Mar 2009, David Miller wrote:
> > 
> > > From: "Waskiewicz Jr, Peter P" <peter.p.waskiewicz.jr@intel.com>
> > > Date: Fri, 20 Mar 2009 23:21:38 -0700 (Pacific Daylight Time)
> > > 
> > > > I see your point, but it is a hack in my opinion.  The device will have 8 
> > > > real Tx queues, not 1.  I'd much rather go with the original proposal, 
> > > > since if the code in dev_pick_tx() changed, it could silently break ixgbe.
> > > 
> > > It can't, if you only advertise one transmit queue the kernel
> > > can never ever choose anything other than queue zero.  It's
> > > impossible.
> > > 
> > > Stephen's right, you guys don't need your select queue override.
> > > 
> > > And if you recall I suspected this from the very beginning.
> > > 
> > > You guys never ever think out of the box, ever...  if it's
> > > not straightforward, you guys won't got for it.  That makes
> > > it very frustrating to get anything done.
> > 
> > This patch will break DCB in ixgbe.  We need all 8 queues, because the 
> > user will be assigning tc filters to the sch_multiq qdisc to get traffic 
> > into priority queues.  If we take Stephen's patch and tell the stack we 
> > have 1 real_num_tx_queues, then we get 1 band in sch_multiq, which makes 
> > it impossible to assign traffic to priorities 1 through 8:
> > 
> 
> How does it make sense to say you have 8 bands, but only use one.

That's not how DCB works.  We use sch_multiq to identify the traffic we 
want to put into the 8 bands.  So in other words, the user will add tc 
filters to move the traffic around.  We override select queue to filter 
the rest of the traffic into a single queue, so we don't randomly put 
traffic into the other hardware priority queues.

Either way, thanks for cleaning this up Stephen.  It was something I 
needed to do, and haven't done yet.  So I very much appreciate it.

Cheers,
-PJ Waskiewicz
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller March 22, 2009, 2 a.m. UTC | #8
From: "Waskiewicz Jr, Peter P" <peter.p.waskiewicz.jr@intel.com>
Date: Sat, 21 Mar 2009 18:48:18 -0700 (Pacific Daylight Time)

> That's not how DCB works.  We use sch_multiq to identify the traffic we 
> want to put into the 8 bands.  So in other words, the user will add tc 
> filters to move the traffic around.  We override select queue to filter 
> the rest of the traffic into a single queue, so we don't randomly put 
> traffic into the other hardware priority queues.

It's escaping me why the multiq rules can't handle this?

In the end, it's a decision of where the logic lives.  Currently
the default handling logic is in the ->select_queue() override,
and I'm still not at all convinced it has to be there.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c	2009-03-20 09:01:19.643651162 -0700
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c	2009-03-20 09:11:09.645652169 -0700
@@ -102,12 +102,6 @@  static u8 ixgbe_dcbnl_get_state(struct n
 	return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED);
 }
 
-static u16 ixgbe_dcb_select_queue(struct net_device *dev, struct sk_buff *skb)
-{
-	/* All traffic should default to class 0 */
-	return 0;
-}
-
 static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 {
 	u8 err = 0;
@@ -135,7 +129,7 @@  static u8 ixgbe_dcbnl_set_state(struct n
 		kfree(adapter->rx_ring);
 		adapter->tx_ring = NULL;
 		adapter->rx_ring = NULL;
-		netdev->select_queue = &ixgbe_dcb_select_queue;
+		netdev->real_num_tx_queues = 1;
 
 		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
 		adapter->flags |= IXGBE_FLAG_DCB_ENABLED;
@@ -147,6 +141,7 @@  static u8 ixgbe_dcbnl_set_state(struct n
 		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
 			if (netif_running(netdev))
 				netdev->netdev_ops->ndo_stop(netdev);
+
 			ixgbe_reset_interrupt_capability(adapter);
 			ixgbe_napi_del_all(adapter);
 			INIT_LIST_HEAD(&netdev->napi_list);
@@ -154,7 +149,7 @@  static u8 ixgbe_dcbnl_set_state(struct n
 			kfree(adapter->rx_ring);
 			adapter->tx_ring = NULL;
 			adapter->rx_ring = NULL;
-			netdev->select_queue = NULL;
+			netdev->real_num_tx_queues = MAX_TX_QUEUES;
 
 			adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
 			adapter->flags |= IXGBE_FLAG_RSS_ENABLED;