diff mbox

[net-next-2.6] rtnetlink: rtnl_setlink() and rtnl_getlink() changes

Message ID 4ADF7633.9050208@gmail.com
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Oct. 21, 2009, 8:59 p.m. UTC
Stephen, do you think we could change "ip link show dev ethX" to
let it use rtnl_getlink() instead of rtnl_dump_ifinfo() ?

Thanks !

[PATCH net-next-2.6]rtnetlink: rtnl_setlink() and rtnl_getlink() changes

rtnl_getlink() & rtnl_setlink() run with RTNL held, we can use
__dev_get_by_index() and __dev_get_by_name() variants and avoid
dev_hold()/dev_put()

Adds to rtnl_getlink() the capability to find a device by its name,
not only by its index.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/core/rtnetlink.c |   38 +++++++++++++++++++-------------------
 1 files changed, 19 insertions(+), 19 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

stephen hemminger Oct. 21, 2009, 9:48 p.m. UTC | #1
On Wed, 21 Oct 2009 22:59:31 +0200
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Stephen, do you think we could change "ip link show dev ethX" to
> let it use rtnl_getlink() instead of rtnl_dump_ifinfo() ?
> 
> Thanks !
> 
> [PATCH net-next-2.6]rtnetlink: rtnl_setlink() and rtnl_getlink() changes
> 
> rtnl_getlink() & rtnl_setlink() run with RTNL held, we can use
> __dev_get_by_index() and __dev_get_by_name() variants and avoid
> dev_hold()/dev_put()
> 
> Adds to rtnl_getlink() the capability to find a device by its name,
> not only by its index.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
>  net/core/rtnetlink.c |   38 +++++++++++++++++++-------------------
>  1 files changed, 19 insertions(+), 19 deletions(-)
> 
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index eb42873..ba13b09 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -910,9 +910,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
>  	err = -EINVAL;
>  	ifm = nlmsg_data(nlh);
>  	if (ifm->ifi_index > 0)
> -		dev = dev_get_by_index(net, ifm->ifi_index);
> +		dev = __dev_get_by_index(net, ifm->ifi_index);
>  	else if (tb[IFLA_IFNAME])
> -		dev = dev_get_by_name(net, ifname);
> +		dev = __dev_get_by_name(net, ifname);
>  	else
>  		goto errout;
>  
> @@ -922,11 +922,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
>  	}
>  
>  	if ((err = validate_linkmsg(dev, tb)) < 0)
> -		goto errout_dev;
> +		goto errout;
>  
>  	err = do_setlink(dev, ifm, tb, ifname, 0);
> -errout_dev:
> -	dev_put(dev);
>  errout:
>  	return err;
>  }
> @@ -1154,6 +1152,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
>  {
>  	struct net *net = sock_net(skb->sk);
>  	struct ifinfomsg *ifm;
> +	char ifname[IFNAMSIZ];
>  	struct nlattr *tb[IFLA_MAX+1];
>  	struct net_device *dev = NULL;
>  	struct sk_buff *nskb;
> @@ -1163,19 +1162,23 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
>  	if (err < 0)
>  		return err;
>  
> +	if (tb[IFLA_IFNAME])
> +		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
> +
>  	ifm = nlmsg_data(nlh);
> -	if (ifm->ifi_index > 0) {
> -		dev = dev_get_by_index(net, ifm->ifi_index);
> -		if (dev == NULL)
> -			return -ENODEV;
> -	} else
> +	if (ifm->ifi_index > 0)
> +		dev = __dev_get_by_index(net, ifm->ifi_index);
> +	else if (tb[IFLA_IFNAME])
> +		dev = __dev_get_by_name(net, ifname);
> +	else
>  		return -EINVAL;
>  
> +	if (dev == NULL)
> +		return -ENODEV;
> +
>  	nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
> -	if (nskb == NULL) {
> -		err = -ENOBUFS;
> -		goto errout;
> -	}
> +	if (nskb == NULL)
> +		return -ENOBUFS;
>  
>  	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
>  			       nlh->nlmsg_seq, 0, 0);
> @@ -1183,11 +1186,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
>  		/* -EMSGSIZE implies BUG in if_nlmsg_size */
>  		WARN_ON(err == -EMSGSIZE);
>  		kfree_skb(nskb);
> -		goto errout;
> -	}
> -	err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
> -errout:
> -	dev_put(dev);
> +	} else
> +		err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
>  
>  	return err;
>  }

Would work, but not sure what it gains.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Oct. 22, 2009, 2:59 a.m. UTC | #2
Stephen Hemminger a écrit :
> On Wed, 21 Oct 2009 22:59:31 +0200
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
>> Stephen, do you think we could change "ip link show dev ethX" to
>> let it use rtnl_getlink() instead of rtnl_dump_ifinfo() ?
>>
> 
> Would work, but not sure what it gains.

It takes about one second to dump the table when we have 25.000 devices
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Oct. 22, 2009, 4:48 a.m. UTC | #3
Eric Dumazet a écrit :
> Stephen Hemminger a écrit :
>> On Wed, 21 Oct 2009 22:59:31 +0200
>> Eric Dumazet <eric.dumazet@gmail.com> wrote:
>>
>>> Stephen, do you think we could change "ip link show dev ethX" to
>>> let it use rtnl_getlink() instead of rtnl_dump_ifinfo() ?
>>>
>> Would work, but not sure what it gains.
> 
> It takes about one second to dump the table when we have 25.000 devices

Adding new links takes also lot of time in rtnl_dump_ifinfo(), we could
optimize it using a 256 fanout (using the ifindex hash table instead
of the single list)

But IMHO rtnl_dump_ifinfo() should be used only when needed, not when
querying/adding a particular device.

------------------------------------------------------------------------------
   PerfTop:   42745 irqs/sec  kernel:88.0% [100000 cycles],  (all, 8 CPUs)
------------------------------------------------------------------------------

             samples    pcnt   kernel function
             _______   _____   _______________

           231146.00 - 52.4% : rtnl_dump_ifinfo
            18491.00 -  4.2% : __register_sysctl_paths
            17700.00 -  4.0% : mwait_idle
            12883.00 -  2.9% : rtnl_fill_ifinfo
            12661.00 -  2.9% : schedule
             6324.00 -  1.4% : find_busiest_group
             5911.00 -  1.3% : _spin_lock_irqsave
             4862.00 -  1.1% : dev_get_stats
             4726.00 -  1.1% : copy_to_user
             4547.00 -  1.0% : __nla_put
             4117.00 -  0.9% : sysfs_find_dirent
             4090.00 -  0.9% : sysenter_past_esp
             3789.00 -  0.9% : fput
             3735.00 -  0.8% : __nla_reserve
             3699.00 -  0.8% : read_tsc


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Oct. 22, 2009, 11:34 a.m. UTC | #4
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 21 Oct 2009 22:59:31 +0200

> rtnl_getlink() & rtnl_setlink() run with RTNL held, we can use
> __dev_get_by_index() and __dev_get_by_name() variants and avoid
> dev_hold()/dev_put()
> 
> Adds to rtnl_getlink() the capability to find a device by its name,
> not only by its index.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Looks good, applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index eb42873..ba13b09 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -910,9 +910,9 @@  static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	err = -EINVAL;
 	ifm = nlmsg_data(nlh);
 	if (ifm->ifi_index > 0)
-		dev = dev_get_by_index(net, ifm->ifi_index);
+		dev = __dev_get_by_index(net, ifm->ifi_index);
 	else if (tb[IFLA_IFNAME])
-		dev = dev_get_by_name(net, ifname);
+		dev = __dev_get_by_name(net, ifname);
 	else
 		goto errout;
 
@@ -922,11 +922,9 @@  static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	}
 
 	if ((err = validate_linkmsg(dev, tb)) < 0)
-		goto errout_dev;
+		goto errout;
 
 	err = do_setlink(dev, ifm, tb, ifname, 0);
-errout_dev:
-	dev_put(dev);
 errout:
 	return err;
 }
@@ -1154,6 +1152,7 @@  static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ifinfomsg *ifm;
+	char ifname[IFNAMSIZ];
 	struct nlattr *tb[IFLA_MAX+1];
 	struct net_device *dev = NULL;
 	struct sk_buff *nskb;
@@ -1163,19 +1162,23 @@  static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (err < 0)
 		return err;
 
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+
 	ifm = nlmsg_data(nlh);
-	if (ifm->ifi_index > 0) {
-		dev = dev_get_by_index(net, ifm->ifi_index);
-		if (dev == NULL)
-			return -ENODEV;
-	} else
+	if (ifm->ifi_index > 0)
+		dev = __dev_get_by_index(net, ifm->ifi_index);
+	else if (tb[IFLA_IFNAME])
+		dev = __dev_get_by_name(net, ifname);
+	else
 		return -EINVAL;
 
+	if (dev == NULL)
+		return -ENODEV;
+
 	nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
-	if (nskb == NULL) {
-		err = -ENOBUFS;
-		goto errout;
-	}
+	if (nskb == NULL)
+		return -ENOBUFS;
 
 	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
 			       nlh->nlmsg_seq, 0, 0);
@@ -1183,11 +1186,8 @@  static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		/* -EMSGSIZE implies BUG in if_nlmsg_size */
 		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(nskb);
-		goto errout;
-	}
-	err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
-errout:
-	dev_put(dev);
+	} else
+		err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
 
 	return err;
 }