diff mbox

[v2,net-next] net: use jump_label to shortcut RPS if not setup

Message ID 1321535606.2751.35.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Nov. 17, 2011, 1:13 p.m. UTC
Most machines dont use RPS/RFS, and pay a fair amount of instructions in
netif_receive_skb() / netif_rx() / get_rps_cpu() just to discover
RPS/RFS is not setup.

Add a jump_label named rps_needed.

If no device rps_map or global rps_sock_flow_table is setup,
netif_receive_skb() / netif_rx() do a single instruction instead of many
ones, including conditional jumps.

jmp +0    (if CONFIG_JUMP_LABEL=y)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
---
V2: add netif_rx() optim as well

 include/linux/netdevice.h  |    5 +++++
 net/core/dev.c             |   21 +++++++++------------
 net/core/net-sysfs.c       |    7 +++++--
 net/core/sysctl_net_core.c |    9 +++++++--
 4 files changed, 26 insertions(+), 16 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eric Dumazet Nov. 17, 2011, 9:34 p.m. UTC | #1
Le jeudi 17 novembre 2011 à 14:13 +0100, Eric Dumazet a écrit :
> Most machines dont use RPS/RFS, and pay a fair amount of instructions in
> netif_receive_skb() / netif_rx() / get_rps_cpu() just to discover
> RPS/RFS is not setup.
> 
> Add a jump_label named rps_needed.
> 
> If no device rps_map or global rps_sock_flow_table is setup,
> netif_receive_skb() / netif_rx() do a single instruction instead of many
> ones, including conditional jumps.
> 
> jmp +0    (if CONFIG_JUMP_LABEL=y)
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> CC: Tom Herbert <therbert@google.com>

I wonder if we could use this jump_label infrastructure to shortcut
expensive netfilter nf_hook_slow() / nf_iterate on empty
tables/chains...





--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Nov. 17, 2011, 10:04 p.m. UTC | #2
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 17 Nov 2011 22:34:49 +0100

> I wonder if we could use this jump_label infrastructure to shortcut
> expensive netfilter nf_hook_slow() / nf_iterate on empty
> tables/chains...

That would be nice, I can't see why it wouldn't be possible.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Nov. 17, 2011, 10:06 p.m. UTC | #3
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 17 Nov 2011 14:13:26 +0100

> Most machines dont use RPS/RFS, and pay a fair amount of instructions in
> netif_receive_skb() / netif_rx() / get_rps_cpu() just to discover
> RPS/RFS is not setup.
> 
> Add a jump_label named rps_needed.
> 
> If no device rps_map or global rps_sock_flow_table is setup,
> netif_receive_skb() / netif_rx() do a single instruction instead of many
> ones, including conditional jumps.
> 
> jmp +0    (if CONFIG_JUMP_LABEL=y)
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> CC: Tom Herbert <therbert@google.com>
> ---
> V2: add netif_rx() optim as well

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Nov. 17, 2011, 10:19 p.m. UTC | #4
Le jeudi 17 novembre 2011 à 17:04 -0500, David Miller a écrit :
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Thu, 17 Nov 2011 22:34:49 +0100
> 
> > I wonder if we could use this jump_label infrastructure to shortcut
> > expensive netfilter nf_hook_slow() / nf_iterate on empty
> > tables/chains...
> 
> That would be nice, I can't see why it wouldn't be possible.

Almost done :)

Only problem is arch_static_branch() has a "i" (key), so some calls wont
be optimized (hooknum in a variable, not a constant)

for example in nf_ct_frag6_output()

Not a big deal, most calls can be optimized away.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4d5698a..0bbe030 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -214,6 +214,11 @@  enum {
 #include <linux/cache.h>
 #include <linux/skbuff.h>
 
+#ifdef CONFIG_RPS
+#include <linux/jump_label.h>
+extern struct jump_label_key rps_needed;
+#endif
+
 struct neighbour;
 struct neigh_parms;
 struct sk_buff;
diff --git a/net/core/dev.c b/net/core/dev.c
index 26c49d5..f789599 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2711,6 +2711,8 @@  EXPORT_SYMBOL(__skb_get_rxhash);
 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
 
+struct jump_label_key rps_needed __read_mostly;
+
 static struct rps_dev_flow *
 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	    struct rps_dev_flow *rflow, u16 next_cpu)
@@ -2994,7 +2996,7 @@  int netif_rx(struct sk_buff *skb)
 
 	trace_netif_rx(skb);
 #ifdef CONFIG_RPS
-	{
+	if (static_branch(&rps_needed))	{
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu;
 
@@ -3009,14 +3011,13 @@  int netif_rx(struct sk_buff *skb)
 
 		rcu_read_unlock();
 		preempt_enable();
-	}
-#else
+	} else
+#endif
 	{
 		unsigned int qtail;
 		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
 		put_cpu();
 	}
-#endif
 	return ret;
 }
 EXPORT_SYMBOL(netif_rx);
@@ -3359,7 +3360,7 @@  int netif_receive_skb(struct sk_buff *skb)
 		return NET_RX_SUCCESS;
 
 #ifdef CONFIG_RPS
-	{
+	if (static_branch(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu, ret;
 
@@ -3370,16 +3371,12 @@  int netif_receive_skb(struct sk_buff *skb)
 		if (cpu >= 0) {
 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 			rcu_read_unlock();
-		} else {
-			rcu_read_unlock();
-			ret = __netif_receive_skb(skb);
+			return ret;
 		}
-
-		return ret;
+		rcu_read_unlock();
 	}
-#else
-	return __netif_receive_skb(skb);
 #endif
+	return __netif_receive_skb(skb);
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 602b141..db6c2f8 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -606,9 +606,12 @@  static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	rcu_assign_pointer(queue->rps_map, map);
 	spin_unlock(&rps_map_lock);
 
-	if (old_map)
+	if (map)
+		jump_label_inc(&rps_needed);
+	if (old_map) {
 		kfree_rcu(old_map, rcu);
-
+		jump_label_dec(&rps_needed);
+	}
 	free_cpumask_var(mask);
 	return len;
 }
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 77a65f0..d05559d 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -68,8 +68,13 @@  static int rps_sock_flow_sysctl(ctl_table *table, int write,
 
 		if (sock_table != orig_sock_table) {
 			rcu_assign_pointer(rps_sock_flow_table, sock_table);
-			synchronize_rcu();
-			vfree(orig_sock_table);
+			if (sock_table)
+				jump_label_inc(&rps_needed);
+			if (orig_sock_table) {
+				jump_label_dec(&rps_needed);
+				synchronize_rcu();
+				vfree(orig_sock_table);
+			}
 		}
 	}