diff mbox

[net-next,V3] net: dynamic ingress_queue allocation

Message ID 1286035915.2582.2472.camel@edumazet-laptop
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Oct. 2, 2010, 4:11 p.m. UTC
Le samedi 02 octobre 2010 à 11:32 +0200, Jarek Poplawski a écrit :
> On Fri, Oct 01, 2010 at 03:56:28PM +0200, Eric Dumazet wrote:
>  
> >  static void netdev_init_queue_locks(struct net_device *dev)
> >  {
> >  	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
> > -	__netdev_init_queue_locks_one(dev, &dev->ingress_queue, NULL);
> > +	__netdev_init_queue_locks_one(dev, dev_ingress_queue(dev), NULL);
> 
> Is dev_ingress_queue(dev) not NULL anytime here?

Yes, but I felt this could be removed later. If you feel its OK right
now, I am OK too ;)

> 
> >  }
> >  
> >  unsigned long netdev_fix_features(unsigned long features, const char *name)
> > @@ -5447,16 +5448,37 @@ static void netdev_init_one_queue(struct net_device *dev,
> >  				  struct netdev_queue *queue,
> >  				  void *_unused)
> >  {
> > -	queue->dev = dev;
> > +	if (queue)
> > +		queue->dev = dev;
> >  }
> >  
> >  static void netdev_init_queues(struct net_device *dev)
> >  {
> > -	netdev_init_one_queue(dev, &dev->ingress_queue, NULL);
> > +	netdev_init_one_queue(dev, dev_ingress_queue(dev), NULL);
> 
> Is dev_ingress_queue(dev) not NULL anytime here?
> 

Yes

> >  	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
> >  	spin_lock_init(&dev->tx_global_lock);
> >  }
> >  
> > +struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
> > +{
> > +	struct netdev_queue *queue = dev_ingress_queue(dev);
> > +
> > +#ifdef CONFIG_NET_CLS_ACT
> > +	if (queue)
> > +		return queue;
> > +	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
> > +	if (!queue)
> > +		return NULL;
> > +	netdev_init_one_queue(dev, queue, NULL);
> > +	__netdev_init_queue_locks_one(dev, queue, NULL);
> > +	queue->qdisc = &noop_qdisc;
> > +	queue->qdisc_sleeping = &noop_qdisc;
> > +	smp_wmb();
> 
> Why don't we need smp_rmb() in handle_ing()?
> 

I only wanted to see if Al Viro was using ingress on its Alpha
machine ;)

I am going to use regular rcu api to ease code understanding :)


> > +	dev->ingress_queue = queue;
> > +#endif
> > +	return queue;
> > +}
> > +
> >  /**
> >   *	alloc_netdev_mq - allocate network device
> >   *	@sizeof_priv:	size of private data to allocate space for
> > @@ -5559,6 +5581,8 @@ void free_netdev(struct net_device *dev)
> >  
> >  	kfree(dev->_tx);
> >  
> > +	kfree(dev_ingress_queue(dev));
> > +
> >  	/* Flush device addresses */
> >  	dev_addr_flush(dev);
> >  
> > diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> > index b802078..8635110 100644
> > --- a/net/sched/sch_api.c
> > +++ b/net/sched/sch_api.c
> > @@ -240,7 +240,10 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
> >  	if (q)
> >  		goto out;
> >  
> > -	q = qdisc_match_from_root(dev->ingress_queue.qdisc_sleeping, handle);
> > +	if (!dev_ingress_queue(dev))
> > +		goto out;
> > +	q = qdisc_match_from_root(dev_ingress_queue(dev)->qdisc_sleeping,
> > +				  handle);
> 


> I'd prefer:
>  +	if (dev_ingress_queue(dev))
>  +		q = qdisc_match_from_root(dev_ingress_queue(dev)->qdisc_sleeping,
> 

Yes

> >  out:
> >  	return q;
> >  }
> > @@ -690,6 +693,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
> >  		    (new && new->flags & TCQ_F_INGRESS)) {
> >  			num_q = 1;
> >  			ingress = 1;
> > +			if (!dev_ingress_queue(dev))
> > +				return -ENOENT;
> 
> Is this test really needed here?

To avoid a NULL dereference some lines later.
Do I have a guarantee its not NULL here ?

> 
> >  		}
> >  
> >  		if (dev->flags & IFF_UP)
> > @@ -701,7 +706,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
> >  		}
> >  
> >  		for (i = 0; i < num_q; i++) {
> > -			struct netdev_queue *dev_queue = &dev->ingress_queue;
> > +			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
> >  
> >  			if (!ingress)
> >  				dev_queue = netdev_get_tx_queue(dev, i);
> > @@ -979,7 +984,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
> >  					return -ENOENT;
> >  				q = qdisc_leaf(p, clid);
> >  			} else { /* ingress */
> > -				q = dev->ingress_queue.qdisc_sleeping;
> > +				if (dev_ingress_queue(dev))
> > +					q = dev_ingress_queue(dev)->qdisc_sleeping;
> >  			}
> >  		} else {
> >  			q = dev->qdisc;
> > @@ -1044,7 +1050,8 @@ replay:
> >  					return -ENOENT;
> >  				q = qdisc_leaf(p, clid);
> >  			} else { /*ingress */
> > -				q = dev->ingress_queue.qdisc_sleeping;
> > +				if (dev_ingress_queue_create(dev))
> > +					q = dev_ingress_queue(dev)->qdisc_sleeping;
> 
> I wonder if doing dev_ingress_queue_create() just before qdisc_create()
> (and the test here) isn't more readable.

Sorry, I dont understand. I want to create ingress_queue only if user
wants it. If we setup (egress) trafic shaping, no need to setup
ingress_queue.

> >  
> > @@ -753,7 +755,7 @@ void dev_activate(struct net_device *dev)
> >  
> >  	need_watchdog = 0;
> >  	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
> > -	transition_one_qdisc(dev, &dev->ingress_queue, NULL);
> > +	transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
> 
> I'd prefer here and similarly later:
> 
>  +	if (dev_ingress_queue(dev))
>  +		transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
> 
> to show NULL dev_queue is only legal in this one case.

OK, thanks a lot for the extended review Jarek (and Jamal of course)

Here is the V3 then.

[PATCH net-next V3] net: dynamic ingress_queue allocation

ingress being not used very much, and net_device->ingress_queue being
quite a big object (128 or 256 bytes), use a dynamic allocation if
needed (tc qdisc add dev eth0 ingress ...)

dev_ingress_queue(dev) helper should be used only with RTNL taken.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
V3: add rcu notations & address Jarek comments
 include/linux/netdevice.h |    2 -
 include/linux/rtnetlink.h |    8 ++++++
 net/core/dev.c            |   34 ++++++++++++++++++++++-------
 net/sched/sch_api.c       |   42 ++++++++++++++++++++++++------------
 net/sched/sch_generic.c   |   12 ++++++----
 5 files changed, 71 insertions(+), 27 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jarek Poplawski Oct. 3, 2010, 9:42 a.m. UTC | #1
On Sat, Oct 02, 2010 at 06:11:55PM +0200, Eric Dumazet wrote:
> Le samedi 02 octobre 2010 ?? 11:32 +0200, Jarek Poplawski a écrit :
> > On Fri, Oct 01, 2010 at 03:56:28PM +0200, Eric Dumazet wrote:
...
> > > diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> > > index b802078..8635110 100644
> > > --- a/net/sched/sch_api.c
> > > +++ b/net/sched/sch_api.c
...
> > > @@ -690,6 +693,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
> > >  		    (new && new->flags & TCQ_F_INGRESS)) {
> > >  			num_q = 1;
> > >  			ingress = 1;
> > > +			if (!dev_ingress_queue(dev))
> > > +				return -ENOENT;
> > 
> > Is this test really needed here?
> 
> To avoid a NULL dereference some lines later.
> Do I have a guarantee its not NULL here ?

Do you have any scenario for NULL here? ;-)

Of course, it's your patch and responsibility, and I'll not guarantee,
but you could at least add a TODO comment, to check it later.

> > > @@ -1044,7 +1050,8 @@ replay:
> > >  					return -ENOENT;
> > >  				q = qdisc_leaf(p, clid);
> > >  			} else { /*ingress */
> > > -				q = dev->ingress_queue.qdisc_sleeping;
> > > +				if (dev_ingress_queue_create(dev))
> > > +					q = dev_ingress_queue(dev)->qdisc_sleeping;
> > 
> > I wonder if doing dev_ingress_queue_create() just before qdisc_create()
> > (and the test here) isn't more readable.
> 
> Sorry, I dont understand. I want to create ingress_queue only if user
> wants it. If we setup (egress) trafic shaping, no need to setup
> ingress_queue.

I mean doing both creates in one place:

> @@ -1123,11 +1130,14 @@ replay:
>  create_n_graft:
...
> +	if (clid == TC_H_INGRESS) {
 +		if (dev_ingress_queue_create(dev))
> +			q = qdisc_create(dev, dev_ingress_queue(dev), p,
> +					 tcm->tcm_parent, tcm->tcm_parent,
> +					 tca, &err);
> +		else
> +			err = -ENOENT;
> +	} else {
>  		struct netdev_queue *dev_queue;
...

> Here is the V3 then.
> 
> [PATCH net-next V3] net: dynamic ingress_queue allocation
> 
> ingress being not used very much, and net_device->ingress_queue being
> quite a big object (128 or 256 bytes), use a dynamic allocation if
> needed (tc qdisc add dev eth0 ingress ...)
> 
> dev_ingress_queue(dev) helper should be used only with RTNL taken.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
> V3: add rcu notations & address Jarek comments
>  include/linux/netdevice.h |    2 -
>  include/linux/rtnetlink.h |    8 ++++++
>  net/core/dev.c            |   34 ++++++++++++++++++++++-------
>  net/sched/sch_api.c       |   42 ++++++++++++++++++++++++------------
>  net/sched/sch_generic.c   |   12 ++++++----
>  5 files changed, 71 insertions(+), 27 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index ceed347..92d81ed 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -986,7 +986,7 @@ struct net_device {
>  	rx_handler_func_t	*rx_handler;
>  	void			*rx_handler_data;
>  
> -	struct netdev_queue	ingress_queue; /* use two cache lines */
> +	struct netdev_queue __rcu *ingress_queue;
>  
>  /*
>   * Cache lines mostly used on transmit path
> diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
> index 68c436b..0bb7b48 100644
> --- a/include/linux/rtnetlink.h
> +++ b/include/linux/rtnetlink.h
> @@ -6,6 +6,7 @@
>  #include <linux/if_link.h>
>  #include <linux/if_addr.h>
>  #include <linux/neighbour.h>
> +#include <linux/netdevice.h>
>  
>  /* rtnetlink families. Values up to 127 are reserved for real address
>   * families, values above 128 may be used arbitrarily.
> @@ -769,6 +770,13 @@ extern int lockdep_rtnl_is_held(void);
>  #define rtnl_dereference(p)					\
>  	rcu_dereference_check(p, lockdep_rtnl_is_held())
>  
> +static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
> +{
> +	return rtnl_dereference(dev->ingress_queue);

I'd consider rcu_dereference_rtnl(). Btw, technically qdisc_lookup()
doesn't require rtnl, and there was time it was used without it
(on xmit path).

I think you should also add a comment here why this rcu is used, and
that it changes only once in dev's liftime.

Jarek P.

PS: checkpatched or not checkpatched, that is the question... ;-)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
jamal Oct. 3, 2010, 1:10 p.m. UTC | #2
On Sun, 2010-10-03 at 11:42 +0200, Jarek Poplawski wrote:

> > 
> > To avoid a NULL dereference some lines later.
> > Do I have a guarantee its not NULL here ?
> 
> Do you have any scenario for NULL here? ;-)

This is why i called this part clever earlier ;-> It is 
clever. There are several scenarios (i attempted to represent them
in the tests that Eric run):

1) ingress qdisc has been compiled in
flags & TCQ_F_INGRESS is true
a) user trying to add ingress qdisc first time
then q is null, new is not null and this would work
b) user trying to delete already added qdisc
then q is not null, new is null 

2) ingress qdisc not compiled in
Repeat #1a above, and Eric's check will bail out ..

The one thing that may have been useful is to also try
a "replace" after #1a and maybe after #2

cheers,
jamal

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Oct. 4, 2010, 8:42 a.m. UTC | #3
Le dimanche 03 octobre 2010 à 11:42 +0200, Jarek Poplawski a écrit :

> I'd consider rcu_dereference_rtnl(). Btw, technically qdisc_lookup()
> doesn't require rtnl, and there was time it was used without it
> (on xmit path).


Hmm, for me, rcu_dereference_rtnl() is a bit lazy.

Either we are a reader and should use rcu_dereference(), or a writer and
RTNL should be held.

Mixing two conditions in a "super macro" is a workaround that we used to
promptly shutup some lockdep splats. Real fix would be to use strict
lockdep conditions, because this better documents the code and the
locking invariants.

BTW, rtnl_dereference() should be changed to use
rcu_dereference_protected() instead of rcu_dereference_check() :
If RTBL is held, there is no need to force a barrier.


> I think you should also add a comment here why this rcu is used, and
> that it changes only once in dev's liftime.
> 

This comment was needed in the previous version of the patch, with the
smb_wmb() barrier. Now I switched to regular rcu use, nothing prevents
us to change dev->ingress_queue in flight. Of course there is no current
interest doing so.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Oct. 4, 2010, 12:06 p.m. UTC | #4
On Mon, Oct 04, 2010 at 10:42:09AM +0200, Eric Dumazet wrote:
> Le dimanche 03 octobre 2010 ?? 11:42 +0200, Jarek Poplawski a écrit :
> 
> > I'd consider rcu_dereference_rtnl(). Btw, technically qdisc_lookup()
> > doesn't require rtnl, and there was time it was used without it
> > (on xmit path).
> 
> 
> Hmm, for me, rcu_dereference_rtnl() is a bit lazy.
> 
> Either we are a reader and should use rcu_dereference(), or a writer and
> RTNL should be held.
> 
> Mixing two conditions in a "super macro" is a workaround that we used to
> promptly shutup some lockdep splats. Real fix would be to use strict
> lockdep conditions, because this better documents the code and the
> locking invariants.
> 
> BTW, rtnl_dereference() should be changed to use
> rcu_dereference_protected() instead of rcu_dereference_check() :
> If RTBL is held, there is no need to force a barrier.

Actually, I'm one of those (convinced by Patrick btw), who consider
rcu_dereference() on the clear update side as misleading, so I'm not
rtnl_dereference() fan with or without changes.

> 
> 
> > I think you should also add a comment here why this rcu is used, and
> > that it changes only once in dev's liftime.
> > 
> 
> This comment was needed in the previous version of the patch, with the
> smb_wmb() barrier. Now I switched to regular rcu use, nothing prevents
> us to change dev->ingress_queue in flight. Of course there is no current
> interest doing so.

Right, but then at least in qdisc_lookup():

if (dev_ingress_queue(dev))
	q = qdisc_match_from_root(dev_ingress_queue(dev), handle);

you should use a variable instead of the second dereference (rtnl isn't
mandatory here).

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Oct. 4, 2010, 12:52 p.m. UTC | #5
Le lundi 04 octobre 2010 à 14:06 +0200, Jarek Poplawski a écrit :

> Right, but then at least in qdisc_lookup():
> 
> if (dev_ingress_queue(dev))
> 	q = qdisc_match_from_root(dev_ingress_queue(dev), handle);
> 
> you should use a variable instead of the second dereference (rtnl isn't
> mandatory here).

I am lost.

If rntl is not mandatory, what is the lock that protects us ?

qdisc_lookup() _is_ called under the protection of a lock.
Or as soon as we return from it, result could change under us.

Please name it, and I'll use it :

rcu_dereference_protected(dev->ingress_queue, lockdep_is_held(THISLOCK))



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Oct. 4, 2010, 2:24 p.m. UTC | #6
On Mon, Oct 04, 2010 at 02:52:00PM +0200, Eric Dumazet wrote:
> Le lundi 04 octobre 2010 ?? 14:06 +0200, Jarek Poplawski a écrit :
> 
> > Right, but then at least in qdisc_lookup():
> > 
> > if (dev_ingress_queue(dev))
> > 	q = qdisc_match_from_root(dev_ingress_queue(dev), handle);
> > 
> > you should use a variable instead of the second dereference (rtnl isn't
> > mandatory here).
> 
> I am lost.
> 
> If rntl is not mandatory, what is the lock that protects us ?
> 
> qdisc_lookup() _is_ called under the protection of a lock.
> Or as soon as we return from it, result could change under us.
> 
> Please name it, and I'll use it :
> 
> rcu_dereference_protected(dev->ingress_queue, lockdep_is_held(THISLOCK))
> 

You are right! There is no other lock. (I forgot I removed
qdisc_list_lock already ;-)

Sorry,
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Oct. 5, 2010, 7:24 a.m. UTC | #7
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 02 Oct 2010 18:11:55 +0200

> [PATCH net-next V3] net: dynamic ingress_queue allocation
> 
> ingress being not used very much, and net_device->ingress_queue being
> quite a big object (128 or 256 bytes), use a dynamic allocation if
> needed (tc qdisc add dev eth0 ingress ...)
> 
> dev_ingress_queue(dev) helper should be used only with RTNL taken.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

This looks good to me, applied, thanks Eric.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Oct. 5, 2010, 7:31 a.m. UTC | #8
Le mardi 05 octobre 2010 à 00:24 -0700, David Miller a écrit :
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Sat, 02 Oct 2010 18:11:55 +0200
> 
> > [PATCH net-next V3] net: dynamic ingress_queue allocation
> > 
> > ingress being not used very much, and net_device->ingress_queue being
> > quite a big object (128 or 256 bytes), use a dynamic allocation if
> > needed (tc qdisc add dev eth0 ingress ...)
> > 
> > dev_ingress_queue(dev) helper should be used only with RTNL taken.
> > 
> > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> 
> This looks good to me, applied, thanks Eric.

Thanks to Jarek, Jamal, and you :)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ceed347..92d81ed 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -986,7 +986,7 @@  struct net_device {
 	rx_handler_func_t	*rx_handler;
 	void			*rx_handler_data;
 
-	struct netdev_queue	ingress_queue; /* use two cache lines */
+	struct netdev_queue __rcu *ingress_queue;
 
 /*
  * Cache lines mostly used on transmit path
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 68c436b..0bb7b48 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -6,6 +6,7 @@ 
 #include <linux/if_link.h>
 #include <linux/if_addr.h>
 #include <linux/neighbour.h>
+#include <linux/netdevice.h>
 
 /* rtnetlink families. Values up to 127 are reserved for real address
  * families, values above 128 may be used arbitrarily.
@@ -769,6 +770,13 @@  extern int lockdep_rtnl_is_held(void);
 #define rtnl_dereference(p)					\
 	rcu_dereference_check(p, lockdep_rtnl_is_held())
 
+static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
+{
+	return rtnl_dereference(dev->ingress_queue);
+}
+
+extern struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
+
 extern void rtnetlink_init(void);
 extern void __rtnl_unlock(void);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index a313bab..b078ec8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2702,11 +2702,10 @@  EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
  * the ingress scheduler, you just cant add policies on ingress.
  *
  */
-static int ing_filter(struct sk_buff *skb)
+static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
 {
 	struct net_device *dev = skb->dev;
 	u32 ttl = G_TC_RTTL(skb->tc_verd);
-	struct netdev_queue *rxq;
 	int result = TC_ACT_OK;
 	struct Qdisc *q;
 
@@ -2720,8 +2719,6 @@  static int ing_filter(struct sk_buff *skb)
 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 
-	rxq = &dev->ingress_queue;
-
 	q = rxq->qdisc;
 	if (q != &noop_qdisc) {
 		spin_lock(qdisc_lock(q));
@@ -2737,7 +2734,9 @@  static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 					 struct packet_type **pt_prev,
 					 int *ret, struct net_device *orig_dev)
 {
-	if (skb->dev->ingress_queue.qdisc == &noop_qdisc)
+	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+
+	if (!rxq || rxq->qdisc == &noop_qdisc)
 		goto out;
 
 	if (*pt_prev) {
@@ -2745,7 +2744,7 @@  static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 		*pt_prev = NULL;
 	}
 
-	switch (ing_filter(skb)) {
+	switch (ing_filter(skb, rxq)) {
 	case TC_ACT_SHOT:
 	case TC_ACT_STOLEN:
 		kfree_skb(skb);
@@ -4940,7 +4939,6 @@  static void __netdev_init_queue_locks_one(struct net_device *dev,
 static void netdev_init_queue_locks(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
-	__netdev_init_queue_locks_one(dev, &dev->ingress_queue, NULL);
 }
 
 unsigned long netdev_fix_features(unsigned long features, const char *name)
@@ -5452,11 +5450,29 @@  static void netdev_init_one_queue(struct net_device *dev,
 
 static void netdev_init_queues(struct net_device *dev)
 {
-	netdev_init_one_queue(dev, &dev->ingress_queue, NULL);
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
 	spin_lock_init(&dev->tx_global_lock);
 }
 
+struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
+{
+	struct netdev_queue *queue = dev_ingress_queue(dev);
+
+#ifdef CONFIG_NET_CLS_ACT
+	if (queue)
+		return queue;
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+	if (!queue)
+		return NULL;
+	netdev_init_one_queue(dev, queue, NULL);
+	__netdev_init_queue_locks_one(dev, queue, NULL);
+	queue->qdisc = &noop_qdisc;
+	queue->qdisc_sleeping = &noop_qdisc;
+	rcu_assign_pointer(dev->ingress_queue, queue);
+#endif
+	return queue;
+}
+
 /**
  *	alloc_netdev_mq - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
@@ -5559,6 +5575,8 @@  void free_netdev(struct net_device *dev)
 
 	kfree(dev->_tx);
 
+	kfree(rcu_dereference_raw(dev->ingress_queue));
+
 	/* Flush device addresses */
 	dev_addr_flush(dev);
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b802078..b22ca2d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -240,7 +240,10 @@  struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 	if (q)
 		goto out;
 
-	q = qdisc_match_from_root(dev->ingress_queue.qdisc_sleeping, handle);
+	if (dev_ingress_queue(dev))
+		q = qdisc_match_from_root(
+			dev_ingress_queue(dev)->qdisc_sleeping,
+			handle);
 out:
 	return q;
 }
@@ -690,6 +693,8 @@  static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		    (new && new->flags & TCQ_F_INGRESS)) {
 			num_q = 1;
 			ingress = 1;
+			if (!dev_ingress_queue(dev))
+				return -ENOENT;
 		}
 
 		if (dev->flags & IFF_UP)
@@ -701,7 +706,7 @@  static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		}
 
 		for (i = 0; i < num_q; i++) {
-			struct netdev_queue *dev_queue = &dev->ingress_queue;
+			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
 
 			if (!ingress)
 				dev_queue = netdev_get_tx_queue(dev, i);
@@ -979,7 +984,8 @@  static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
 			} else { /* ingress */
-				q = dev->ingress_queue.qdisc_sleeping;
+				if (dev_ingress_queue(dev))
+					q = dev_ingress_queue(dev)->qdisc_sleeping;
 			}
 		} else {
 			q = dev->qdisc;
@@ -1043,8 +1049,9 @@  replay:
 				if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
-			} else { /*ingress */
-				q = dev->ingress_queue.qdisc_sleeping;
+			} else { /* ingress */
+				if (dev_ingress_queue_create(dev))
+					q = dev_ingress_queue(dev)->qdisc_sleeping;
 			}
 		} else {
 			q = dev->qdisc;
@@ -1123,11 +1130,14 @@  replay:
 create_n_graft:
 	if (!(n->nlmsg_flags&NLM_F_CREATE))
 		return -ENOENT;
-	if (clid == TC_H_INGRESS)
-		q = qdisc_create(dev, &dev->ingress_queue, p,
-				 tcm->tcm_parent, tcm->tcm_parent,
-				 tca, &err);
-	else {
+	if (clid == TC_H_INGRESS) {
+		if (dev_ingress_queue(dev))
+			q = qdisc_create(dev, dev_ingress_queue(dev), p,
+					 tcm->tcm_parent, tcm->tcm_parent,
+					 tca, &err);
+		else
+			err = -ENOENT;
+	} else {
 		struct netdev_queue *dev_queue;
 
 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
@@ -1304,8 +1314,10 @@  static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
 			goto done;
 
-		dev_queue = &dev->ingress_queue;
-		if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
+		dev_queue = dev_ingress_queue(dev);
+		if (dev_queue &&
+		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
+				       &q_idx, s_q_idx) < 0)
 			goto done;
 
 cont:
@@ -1595,8 +1607,10 @@  static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
 		goto done;
 
-	dev_queue = &dev->ingress_queue;
-	if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
+	dev_queue = dev_ingress_queue(dev);
+	if (dev_queue &&
+	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
+				&t, s_t) < 0)
 		goto done;
 
 done:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 545278a..3d57681 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -753,7 +753,8 @@  void dev_activate(struct net_device *dev)
 
 	need_watchdog = 0;
 	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
-	transition_one_qdisc(dev, &dev->ingress_queue, NULL);
+	if (dev_ingress_queue(dev))
+		transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
 
 	if (need_watchdog) {
 		dev->trans_start = jiffies;
@@ -812,7 +813,8 @@  static bool some_qdisc_is_busy(struct net_device *dev)
 void dev_deactivate(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
-	dev_deactivate_queue(dev, &dev->ingress_queue, &noop_qdisc);
+	if (dev_ingress_queue(dev))
+		dev_deactivate_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
 
 	dev_watchdog_down(dev);
 
@@ -838,7 +840,8 @@  void dev_init_scheduler(struct net_device *dev)
 {
 	dev->qdisc = &noop_qdisc;
 	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
-	dev_init_scheduler_queue(dev, &dev->ingress_queue, &noop_qdisc);
+	if (dev_ingress_queue(dev))
+		dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
 
 	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
 }
@@ -861,7 +864,8 @@  static void shutdown_scheduler_queue(struct net_device *dev,
 void dev_shutdown(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
-	shutdown_scheduler_queue(dev, &dev->ingress_queue, &noop_qdisc);
+	if (dev_ingress_queue(dev))
+		shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
 	qdisc_destroy(dev->qdisc);
 	dev->qdisc = &noop_qdisc;