diff mbox

PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle

Message ID 4B01C938.8000705@gmail.com
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Nov. 16, 2009, 9:50 p.m. UTC
time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105

real	0m0.266s
user	0m0.000s
sys	0m0.001s

real	0m0.770s
user	0m0.000s
sys	0m0.000s

real	0m1.022s
user	0m0.000s
sys	0m0.000s


One problem of current schem in vlan dismantle phase is the
holding of device done by following chain :

vlan_dev_stop() ->
	netif_carrier_off(dev) ->
		linkwatch_fire_event(dev) ->
			dev_hold() ...

And __linkwatch_run_queue() runs up to one second later...

A generic fix to this problem is to add a linkwatch_forget_dev() method
to unlink the device from the list of watched devices.

dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory),
to be able to unlink device in O(1).

After patch :
time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105

real    0m0.024s
user    0m0.000s
sys     0m0.000s

real    0m0.032s
user    0m0.000s
sys     0m0.001s

real    0m0.033s
user    0m0.000s
sys     0m0.000s


Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/linux/netdevice.h |    3 +-
 net/core/dev.c            |    3 ++
 net/core/link_watch.c     |   42 ++++++++++++++++++++++++------------
 3 files changed, 33 insertions(+), 15 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

stephen hemminger Nov. 16, 2009, 10:39 p.m. UTC | #1
On Mon, 16 Nov 2009 22:50:48 +0100
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> vlan_dev_stop() ->
> 	netif_carrier_off(dev) ->
> 		linkwatch_fire_event(dev) ->
> 			dev_hold() ...
> 
> And __linkwatch_run_queue() runs up to one second later...
> 
> A generic fix to this problem is to add a linkwatch_forget_dev() method
> to unlink the device from the list of watched devices.
> 
> dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory),
> to be able to unlink device in O(1).
> 
> After patch :
> time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105
> 
> real    0m0.024s
> user    0m0.000s
> sys     0m0.000s
> 
> real    0m0.032s
> user    0m0.000s
> sys     0m0.001s
> 
> real    0m0.033s
> user    0m0.000s
> sys     0m0.000s
> 
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Acked-by: Stephen Hemminger <shemminger@vyatta.com>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Nov. 17, 2009, 12:26 p.m. UTC | #2
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 16 Nov 2009 14:39:17 -0800

> On Mon, 16 Nov 2009 22:50:48 +0100
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
>> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> 
> Acked-by: Stephen Hemminger <shemminger@vyatta.com>

Is this really valid?

The whole point in emitting the netif_carrier_off() is so
that applications see the event in userspace and therefore
can clean things up.

Sure, the kernel will no longer make the device visible, and therefore
the application can't operate on it any longer.  But the application
is deserved of receiving the event anyways so that it can clean up
internal state and datastructures.

It seem to me that in this ->stop() case we'll now elide the event
emission, and I don't see how that can be right.

Really, the link watch stuff is just due for a redesign.  I don't
think a simple hack is going to cut it this time, sorry Eric :-)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Herbert Xu Nov. 17, 2009, 12:31 p.m. UTC | #3
On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote:
>
> Really, the link watch stuff is just due for a redesign.  I don't
> think a simple hack is going to cut it this time, sorry Eric :-)

I have no objections against any redesigns, but since the only
caller of linkwatch_forget_dev runs in process context with the
RTNL, it could also legally emit those events.

Cheers,
diff mbox

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7043f85..4e25730 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -896,7 +896,7 @@  struct net_device {
 	/* device index hash chain */
 	struct hlist_node	index_hlist;
 
-	struct net_device	*link_watch_next;
+	struct list_head	link_watch_list;
 
 	/* register/unregister state machine */
 	enum { NETREG_UNINITIALIZED=0,
@@ -1600,6 +1600,7 @@  static inline void dev_hold(struct net_device *dev)
  */
 
 extern void linkwatch_fire_event(struct net_device *dev);
+extern void linkwatch_forget_dev(struct net_device *dev);
 
 /**
  *	netif_carrier_ok - test if carrier present
diff --git a/net/core/dev.c b/net/core/dev.c
index 4b24d79..649de02 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5066,6 +5066,8 @@  static void netdev_wait_allrefs(struct net_device *dev)
 {
 	unsigned long rebroadcast_time, warning_time;
 
+	linkwatch_forget_dev(dev);
+
 	rebroadcast_time = warning_time = jiffies;
 	while (atomic_read(&dev->refcnt) != 0) {
 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
@@ -5280,6 +5282,7 @@  struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
+	INIT_LIST_HEAD(&dev->link_watch_list);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 	strcpy(dev->name, name);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index bf8f7af..05fe273 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -35,7 +35,7 @@  static unsigned long linkwatch_nextevent;
 static void linkwatch_event(struct work_struct *dummy);
 static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
 
-static struct net_device *lweventlist;
+static LIST_HEAD(lweventlist);
 static DEFINE_SPINLOCK(lweventlist_lock);
 
 static unsigned char default_operstate(const struct net_device *dev)
@@ -89,8 +89,10 @@  static void linkwatch_add_event(struct net_device *dev)
 	unsigned long flags;
 
 	spin_lock_irqsave(&lweventlist_lock, flags);
-	dev->link_watch_next = lweventlist;
-	lweventlist = dev;
+	if (list_empty(&dev->link_watch_list)) {
+		list_add_tail(&dev->link_watch_list, &lweventlist);
+		dev_hold(dev);
+	}
 	spin_unlock_irqrestore(&lweventlist_lock, flags);
 }
 
@@ -135,7 +137,8 @@  static void linkwatch_schedule_work(int urgent)
 
 static void __linkwatch_run_queue(int urgent_only)
 {
-	struct net_device *next;
+	struct net_device *dev;
+	LIST_HEAD(wrk);
 
 	/*
 	 * Limit the number of linkwatch events to one
@@ -153,19 +156,18 @@  static void __linkwatch_run_queue(int urgent_only)
 	clear_bit(LW_URGENT, &linkwatch_flags);
 
 	spin_lock_irq(&lweventlist_lock);
-	next = lweventlist;
-	lweventlist = NULL;
-	spin_unlock_irq(&lweventlist_lock);
+	list_splice_init(&lweventlist, &wrk);
 
-	while (next) {
-		struct net_device *dev = next;
+	while (!list_empty(&wrk)) {
 
-		next = dev->link_watch_next;
+		dev = list_first_entry(&wrk, struct net_device, link_watch_list);
+		list_del_init(&dev->link_watch_list);
 
 		if (urgent_only && !linkwatch_urgent_event(dev)) {
-			linkwatch_add_event(dev);
+			list_add_tail(&dev->link_watch_list, &lweventlist);
 			continue;
 		}
+		spin_unlock_irq(&lweventlist_lock);
 
 		/*
 		 * Make sure the above read is complete since it can be
@@ -189,10 +191,24 @@  static void __linkwatch_run_queue(int urgent_only)
 		}
 
 		dev_put(dev);
+		spin_lock_irq(&lweventlist_lock);
 	}
 
-	if (lweventlist)
+	if (!list_empty(&lweventlist))
 		linkwatch_schedule_work(0);
+	spin_unlock_irq(&lweventlist_lock);
+}
+
+void linkwatch_forget_dev(struct net_device *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&lweventlist_lock, flags);
+	if (!list_empty(&dev->link_watch_list)) {
+		list_del_init(&dev->link_watch_list);
+		dev_put(dev);
+	}
+	spin_unlock_irqrestore(&lweventlist_lock, flags);
 }
 
 
@@ -216,8 +232,6 @@  void linkwatch_fire_event(struct net_device *dev)
 	bool urgent = linkwatch_urgent_event(dev);
 
 	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
-		dev_hold(dev);
-
 		linkwatch_add_event(dev);
 	} else if (!urgent)
 		return;