diff mbox

dev: use ifindex hash for dev_seq_ops

Message ID 1318586017-17207-1-git-send-email-mmaruseac@ixiacom.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Mihai Maruseac Oct. 14, 2011, 9:53 a.m. UTC
Instead of using the dev->next chain and trying to resync at each call to
dev_seq_start, use the ifindex, keeping the last index in seq->private field.

Tests revealed the following results for ifconfig > /dev/null
	* 1000 interfaces:
		* 0.114s without patch
		* 0.089s with patch
	* 3000 interfaces:
		* 0.489s without patch
		* 0.110s with patch
	* 5000 interfaces:
		* 1.363s without patch
		* 0.250s with patch
	* 128000 interfaces (other setup):
		* ~100s without patch
		* ~30s with patch

Signed-off-by: Mihai Maruseac <mmaruseac@ixiacom.com>
---
 net/core/dev.c |   55 ++++++++++++++++++++++++++++++++++---------------------
 1 files changed, 34 insertions(+), 21 deletions(-)

Comments

Eric Dumazet Oct. 14, 2011, 12:53 p.m. UTC | #1
Le vendredi 14 octobre 2011 à 12:53 +0300, Mihai Maruseac a écrit :
> Instead of using the dev->next chain and trying to resync at each call to
> dev_seq_start, use the ifindex, keeping the last index in seq->private field.
> 
> Tests revealed the following results for ifconfig > /dev/null
> 	* 1000 interfaces:
> 		* 0.114s without patch
> 		* 0.089s with patch
> 	* 3000 interfaces:
> 		* 0.489s without patch
> 		* 0.110s with patch
> 	* 5000 interfaces:
> 		* 1.363s without patch
> 		* 0.250s with patch
> 	* 128000 interfaces (other setup):
> 		* ~100s without patch
> 		* ~30s with patch
> 
> Signed-off-by: Mihai Maruseac <mmaruseac@ixiacom.com>
> ---
>  net/core/dev.c |   55 ++++++++++++++++++++++++++++++++++---------------------
>  1 files changed, 34 insertions(+), 21 deletions(-)
> 
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 70ecb86..ea24445 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -4041,6 +4041,37 @@ static int dev_ifconf(struct net *net, char __user *arg)
>  }
>  
>  #ifdef CONFIG_PROC_FS
> +
> +struct dev_iter_state {
> +	struct seq_net_private p;
> +	int ifindex;
> +};
> +
> +static struct net_device *__dev_seq_next(struct seq_file *seq, loff_t *pos)
> +{
> +	struct dev_iter_state *state = seq->private;
> +	struct net *net = seq_file_net(seq);
> +	struct net_device *dev;
> +	loff_t off;
> +
> +	dev = dev_get_by_index_rcu(net, state->ifindex);
> +	if (likely(dev))
> +		goto found;
> +
> +	off = 0;
> +	for_each_netdev_rcu(net, dev)
> +		if (off++ == *pos) {
> +			state->ifindex = dev->ifindex;
> +			goto found;
> +		}
> +
> +	return NULL;
> +found:
> +	state->ifindex++;

This assumes device ifindexes are contained in a small range 
[N .. N + X]

I understand this can help some benchmarks, but in real world this wont
help that much once ifindexes are 'fragmented' (If really this multi
thousand devices stuff is for real)

Listen, we currently have 256 slots in the hash table.

Can we try to make 'offset' something like  (slot_number<<24) +
(position in hash chain [slot_number]), instead of (position in devices
global list)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Daniel Baluta Oct. 17, 2011, 8:03 a.m. UTC | #2
> This assumes device ifindexes are contained in a small range
> [N .. N + X]
>
> I understand this can help some benchmarks, but in real world this wont
> help that much once ifindexes are 'fragmented' (If really this multi
> thousand devices stuff is for real)
>
> Listen, we currently have 256 slots in the hash table.
>
> Can we try to make 'offset' something like  (slot_number<<24) +
> (position in hash chain [slot_number]), instead of (position in devices
> global list)


Eric, we can refine the idea of our first patch [1], where we recorded
the (bucket, offset) pair. Stephen, do you agree with this?


thanks,
Daniel.

[1] http://patchwork.ozlabs.org/patch/118331/
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger Oct. 17, 2011, 3:12 p.m. UTC | #3
On Mon, 17 Oct 2011 11:03:54 +0300
Daniel Baluta <dbaluta@ixiacom.com> wrote:

> > This assumes device ifindexes are contained in a small range
> > [N .. N + X]
> >
> > I understand this can help some benchmarks, but in real world this wont
> > help that much once ifindexes are 'fragmented' (If really this multi
> > thousand devices stuff is for real)
> >
> > Listen, we currently have 256 slots in the hash table.
> >
> > Can we try to make 'offset' something like  (slot_number<<24) +
> > (position in hash chain [slot_number]), instead of (position in devices
> > global list)
> 
> 
> Eric, we can refine the idea of our first patch [1], where we recorded
> the (bucket, offset) pair. Stephen, do you agree with this?
> 
> 
> thanks,
> Daniel.
> 
> [1] http://patchwork.ozlabs.org/patch/118331/

Using buckets is fine, my idea about ifindex was just to try and
preserve the order, but it doesn't matter.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/core/dev.c b/net/core/dev.c
index 70ecb86..ea24445 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4041,6 +4041,37 @@  static int dev_ifconf(struct net *net, char __user *arg)
 }
 
 #ifdef CONFIG_PROC_FS
+
+struct dev_iter_state {
+	struct seq_net_private p;
+	int ifindex;
+};
+
+static struct net_device *__dev_seq_next(struct seq_file *seq, loff_t *pos)
+{
+	struct dev_iter_state *state = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct net_device *dev;
+	loff_t off;
+
+	dev = dev_get_by_index_rcu(net, state->ifindex);
+	if (likely(dev))
+		goto found;
+
+	off = 0;
+	for_each_netdev_rcu(net, dev)
+		if (off++ == *pos) {
+			state->ifindex = dev->ifindex;
+			goto found;
+		}
+
+	return NULL;
+found:
+	state->ifindex++;
+	++*pos;
+	return dev;
+}
+
 /*
  *	This is invoked by the /proc filesystem handler to display a device
  *	in detail.
@@ -4048,33 +4079,15 @@  static int dev_ifconf(struct net *net, char __user *arg)
 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(RCU)
 {
-	struct net *net = seq_file_net(seq);
-	loff_t off;
-	struct net_device *dev;
-
 	rcu_read_lock();
 	if (!*pos)
 		return SEQ_START_TOKEN;
-
-	off = 1;
-	for_each_netdev_rcu(net, dev)
-		if (off++ == *pos)
-			return dev;
-
-	return NULL;
+	return __dev_seq_next(seq, pos);
 }
 
 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct net_device *dev = v;
-
-	if (v == SEQ_START_TOKEN)
-		dev = first_net_device_rcu(seq_file_net(seq));
-	else
-		dev = next_net_device_rcu(dev);
-
-	++*pos;
-	return dev;
+	return __dev_seq_next(seq, pos);
 }
 
 void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4173,7 +4186,7 @@  static const struct seq_operations dev_seq_ops = {
 static int dev_seq_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &dev_seq_ops,
-			    sizeof(struct seq_net_private));
+			    sizeof(struct dev_iter_state));
 }
 
 static const struct file_operations dev_seq_fops = {