diff mbox

[net-next] neigh: speedup neigh_resolve_output()

Message ID 1286484247.3745.91.camel@edumazet-laptop
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Oct. 7, 2010, 8:44 p.m. UTC
Le jeudi 07 octobre 2010 à 14:53 +0200, Eric Dumazet a écrit :

> Further improvements would need to use a seqlock instead of an rwlock to
> protect neigh->ha[], to not dirty neigh too often and remove two atomic
> ops.
> 

I implemented this idea in following patch, on top on previous one.

[PATCH net-next] neigh: speedup neigh_resolve_output()

Add a seqlock in struct neighbour to protect neigh->ha[], and avoid
dirtying neighbour in stress situation (many different flows / dsts)

Dirtying takes place because of read_lock(&n->lock) and n->used writes.

Switching to a seqlock, and writing n->used only on jiffies changes
permits less dirtying.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/neighbour.h |   16 ++++++++++++
 net/core/neighbour.c    |   47 ++++++++++++++++++++++++--------------
 net/ipv4/arp.c          |    6 +---
 net/sched/sch_teql.c    |    8 +++---
 4 files changed, 51 insertions(+), 26 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

David Miller Oct. 11, 2010, 7:16 p.m. UTC | #1
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 07 Oct 2010 22:44:07 +0200

> Le jeudi 07 octobre 2010 à 14:53 +0200, Eric Dumazet a écrit :
> 
>> Further improvements would need to use a seqlock instead of an rwlock to
>> protect neigh->ha[], to not dirty neigh too often and remove two atomic
>> ops.
>> 
> 
> I implemented this idea in following patch, on top on previous one.
> 
> [PATCH net-next] neigh: speedup neigh_resolve_output()

So Eric do you think this is more efficient than the idea I
proposed, which is to "cmpxchg 'hh' and RCU"?

If you think this seqlock thing is faster or more desirable
for some reason, I'll add it.

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Oct. 11, 2010, 7:46 p.m. UTC | #2
Le lundi 11 octobre 2010 à 12:16 -0700, David Miller a écrit :
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Thu, 07 Oct 2010 22:44:07 +0200
> 
> > Le jeudi 07 octobre 2010 à 14:53 +0200, Eric Dumazet a écrit :
> > 
> >> Further improvements would need to use a seqlock instead of an rwlock to
> >> protect neigh->ha[], to not dirty neigh too often and remove two atomic
> >> ops.
> >> 
> > 
> > I implemented this idea in following patch, on top on previous one.
> > 
> > [PATCH net-next] neigh: speedup neigh_resolve_output()
> 
> So Eric do you think this is more efficient than the idea I
> proposed, which is to "cmpxchg 'hh' and RCU"?
> 
> If you think this seqlock thing is faster or more desirable
> for some reason, I'll add it.
> 
> Thanks!

Hmm, we would need a neigh->ha pointer to some struct, with rcu
protection. It adds a dereference in hot path. I believe this seqlock
(only for pathological cases, where dst are used for few packets) should
be fine.

Note: After this patch, I have a "struct neighbour" small field reorg
pending, not yet submitted.

Thanks


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Oct. 11, 2010, 8:01 p.m. UTC | #3
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 11 Oct 2010 21:46:01 +0200

> Hmm, we would need a neigh->ha pointer to some struct, with rcu
> protection. It adds a dereference in hot path. I believe this seqlock
> (only for pathological cases, where dst are used for few packets) should
> be fine.

Good point.

Ok, assuming it passes build testing I'll push this seqlock
patch to net-next-2.6

Thanks!

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index a4538d5..f04e7a2 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -105,6 +105,7 @@  struct neighbour {
 	atomic_t		refcnt;
 	atomic_t		probes;
 	rwlock_t		lock;
+	seqlock_t		ha_lock;
 	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
 	struct hh_cache		*hh;
 	int			(*output)(struct sk_buff *skb);
@@ -302,7 +303,10 @@  static inline void neigh_confirm(struct neighbour *neigh)
 
 static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 {
-	neigh->used = jiffies;
+	unsigned long now = ACCESS_ONCE(jiffies);
+	
+	if (neigh->used != now)
+		neigh->used = now;
 	if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
 		return __neigh_event_send(neigh, skb);
 	return 0;
@@ -373,4 +377,14 @@  struct neighbour_cb {
 
 #define NEIGH_CB(skb)	((struct neighbour_cb *)(skb)->cb)
 
+static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
+				     const struct net_device *dev)
+{
+	unsigned int seq;
+
+	do {
+		seq = read_seqbegin(&n->ha_lock);
+		memcpy(dst, n->ha, dev->addr_len);
+	} while (read_seqretry(&n->ha_lock, seq));
+}
 #endif
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 53cc548..54aef9c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -294,6 +294,7 @@  static struct neighbour *neigh_alloc(struct neigh_table *tbl)
 
 	skb_queue_head_init(&n->arp_queue);
 	rwlock_init(&n->lock);
+	seqlock_init(&n->ha_lock);
 	n->updated	  = n->used = now;
 	n->nud_state	  = NUD_NONE;
 	n->output	  = neigh_blackhole;
@@ -1015,7 +1016,7 @@  out_unlock_bh:
 }
 EXPORT_SYMBOL(__neigh_event_send);
 
-static void neigh_update_hhs(struct neighbour *neigh)
+static void neigh_update_hhs(const struct neighbour *neigh)
 {
 	struct hh_cache *hh;
 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1151,7 +1152,9 @@  int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 	}
 
 	if (lladdr != neigh->ha) {
+		write_seqlock(&neigh->ha_lock);
 		memcpy(&neigh->ha, lladdr, dev->addr_len);
+		write_sequnlock(&neigh->ha_lock);
 		neigh_update_hhs(neigh);
 		if (!(new & NUD_CONNECTED))
 			neigh->confirmed = jiffies -
@@ -1214,6 +1217,7 @@  static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
 {
 	struct hh_cache *hh;
 
+	smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
 	for (hh = n->hh; hh; hh = hh->hh_next) {
 		if (hh->hh_type == protocol) {
 			atomic_inc(&hh->hh_refcnt);
@@ -1248,8 +1252,8 @@  static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
 		kfree(hh);
 		return;
 	}
-	read_unlock(&n->lock);
-	write_lock(&n->lock);
+
+	write_lock_bh(&n->lock);
 	
 	/* must check if another thread already did the insert */
 	if (neigh_hh_lookup(n, dst, protocol)) {
@@ -1263,13 +1267,13 @@  static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
 		hh->hh_output = n->ops->output;
 
 	hh->hh_next = n->hh;
+	smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
 	n->hh	    = hh;
 
 	if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
 		hh_cache_put(hh);
 end:
-	write_unlock(&n->lock);
-	read_lock(&n->lock);
+	write_unlock_bh(&n->lock);
 }
 
 /* This function can be used in contexts, where only old dev_queue_xmit
@@ -1308,16 +1312,18 @@  int neigh_resolve_output(struct sk_buff *skb)
 	if (!neigh_event_send(neigh, skb)) {
 		int err;
 		struct net_device *dev = neigh->dev;
+		unsigned int seq;
 
-		read_lock_bh(&neigh->lock);
 		if (dev->header_ops->cache &&
 		    !dst->hh &&
 		    !(dst->flags & DST_NOCACHE))
 			neigh_hh_init(neigh, dst, dst->ops->protocol);
 
-		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-				      neigh->ha, NULL, skb->len);
-		read_unlock_bh(&neigh->lock);
+		do {
+			seq = read_seqbegin(&neigh->ha_lock);
+			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+					      neigh->ha, NULL, skb->len);
+		} while (read_seqretry(&neigh->ha_lock, seq));
 
 		if (err >= 0)
 			rc = neigh->ops->queue_xmit(skb);
@@ -1344,13 +1350,16 @@  int neigh_connected_output(struct sk_buff *skb)
 	struct dst_entry *dst = skb_dst(skb);
 	struct neighbour *neigh = dst->neighbour;
 	struct net_device *dev = neigh->dev;
+	unsigned int seq;
 
 	__skb_pull(skb, skb_network_offset(skb));
 
-	read_lock_bh(&neigh->lock);
-	err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-			      neigh->ha, NULL, skb->len);
-	read_unlock_bh(&neigh->lock);
+	do {
+		seq = read_seqbegin(&neigh->ha_lock);
+		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+				      neigh->ha, NULL, skb->len);
+	} while (read_seqretry(&neigh->ha_lock, seq));
+
 	if (err >= 0)
 		err = neigh->ops->queue_xmit(skb);
 	else {
@@ -2148,10 +2157,14 @@  static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
 
 	read_lock_bh(&neigh->lock);
 	ndm->ndm_state	 = neigh->nud_state;
-	if ((neigh->nud_state & NUD_VALID) &&
-	    nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
-		read_unlock_bh(&neigh->lock);
-		goto nla_put_failure;
+	if (neigh->nud_state & NUD_VALID) {
+		char haddr[MAX_ADDR_LEN];
+
+		neigh_ha_snapshot(haddr, neigh, neigh->dev);
+		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
+			read_unlock_bh(&neigh->lock);
+			goto nla_put_failure;
+		}
 	}
 
 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f353095..d8e540c 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -502,10 +502,8 @@  int arp_find(unsigned char *haddr, struct sk_buff *skb)
 
 	if (n) {
 		n->used = jiffies;
-		if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) {
-			read_lock_bh(&n->lock);
-			memcpy(haddr, n->ha, dev->addr_len);
-			read_unlock_bh(&n->lock);
+		if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) {
+			neigh_ha_snapshot(haddr, n, dev);
 			neigh_release(n);
 			return 0;
 		}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index feaabc1..401af95 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -241,11 +241,11 @@  __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
 	}
 	if (neigh_event_send(n, skb_res) == 0) {
 		int err;
+		char haddr[MAX_ADDR_LEN];
 
-		read_lock(&n->lock);
-		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-				      n->ha, NULL, skb->len);
-		read_unlock(&n->lock);
+		neigh_ha_snapshot(haddr, n, dev);
+		err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
+				      NULL, skb->len);
 
 		if (err < 0) {
 			neigh_release(n);