From patchwork Thu Mar 1 06:22:36 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Steffen Klassert X-Patchwork-Id: 143916 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3D56CB6EEC for ; Thu, 1 Mar 2012 17:22:43 +1100 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757009Ab2CAGWl (ORCPT ); Thu, 1 Mar 2012 01:22:41 -0500 Received: from a.mx.secunet.com ([195.81.216.161]:52566 "EHLO a.mx.secunet.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756530Ab2CAGWk (ORCPT ); Thu, 1 Mar 2012 01:22:40 -0500 Received: from localhost (alg1 [127.0.0.1]) by a.mx.secunet.com (Postfix) with ESMTP id D627A1A007E; Thu, 1 Mar 2012 07:22:38 +0100 (CET) X-Virus-Scanned: by secunet Received: from mail-srv1.secumail.de (unknown [10.53.40.200]) by a.mx.secunet.com (Postfix) with ESMTP id 876A41A007D; Thu, 1 Mar 2012 07:22:37 +0100 (CET) Received: from gauss.dd.secunet.de ([10.182.7.102]) by mail-srv1.secumail.de with Microsoft SMTPSVC(6.0.3790.4675); Thu, 1 Mar 2012 07:22:36 +0100 Received: by gauss.dd.secunet.de (Postfix, from userid 1000) id CCB8E5C0738; Thu, 1 Mar 2012 07:22:36 +0100 (CET) Date: Thu, 1 Mar 2012 07:22:36 +0100 From: Steffen Klassert To: David Miller Cc: timo.teras@iki.fi, netdev@vger.kernel.org Subject: [PATCH 1/2] inetpeer: Invalidate the inetpeer tree along with the routing cache Message-ID: <20120301062236.GC15404@secunet.com> References: <20120301062159.GB15404@secunet.com> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20120301062159.GB15404@secunet.com> User-Agent: Mutt/1.5.20 (2009-06-14) X-OriginalArrivalTime: 01 Mar 2012 06:22:36.0855 (UTC) FILETIME=[B21E9470:01CCF773] Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org We initialize the routing metrics with the values cached on the inetpeer in rt_init_metrics(). So if we have the metrics cached on the inetpeer, we ignore the user configured fib_metrics. To fix this issue, we replace the old tree with a fresh initialized inet_peer_base. The old tree is removed later with a delayed work queue. Signed-off-by: Steffen Klassert --- include/net/inetpeer.h | 3 ++ net/ipv4/inetpeer.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/route.c | 1 + 3 files changed, 83 insertions(+), 1 deletions(-) diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 06b795d..ff04a33 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -41,6 +41,7 @@ struct inet_peer { u32 pmtu_orig; u32 pmtu_learned; struct inetpeer_addr_base redirect_learned; + struct list_head gc_list; /* * Once inet_peer is queued for deletion (refcnt == -1), following fields * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp @@ -96,6 +97,8 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, extern void inet_putpeer(struct inet_peer *p); extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); +extern void inetpeer_invalidate_tree(int family); + /* * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, * tcp_ts_stamp if no refcount is taken on inet_peer diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index bf4a9c4..897953d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -66,6 +67,11 @@ static struct kmem_cache *peer_cachep __read_mostly; +static LIST_HEAD(gc_list); +static const int gc_delay = 60 * HZ; +static struct delayed_work gc_work; +static DEFINE_SPINLOCK(gc_lock); + #define node_height(x) x->avl_height #define peer_avl_empty ((struct inet_peer *)&peer_fake_node) @@ -102,6 +108,50 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ +static void inetpeer_gc_worker(struct work_struct *work) +{ + struct inet_peer *p, *n; + LIST_HEAD(list); + + spin_lock_bh(&gc_lock); + list_replace_init(&gc_list, &list); + spin_unlock_bh(&gc_lock); + + if (list_empty(&list)) + return; + + list_for_each_entry_safe(p, n, &list, gc_list) { + + if(need_resched()) + cond_resched(); + + if (p->avl_left != peer_avl_empty) { + list_add_tail(&p->avl_left->gc_list, &list); + p->avl_left = peer_avl_empty; + } + + if (p->avl_right != peer_avl_empty) { + list_add_tail(&p->avl_right->gc_list, &list); + p->avl_right = peer_avl_empty; + } + + n = list_entry(p->gc_list.next, struct inet_peer, gc_list); + + if (!atomic_read(&p->refcnt)) { + list_del(&p->gc_list); + kmem_cache_free(peer_cachep, p); + } + } + + if (list_empty(&list)) + return; + + spin_lock_bh(&gc_lock); + list_splice(&list, &gc_list); + spin_unlock_bh(&gc_lock); + + schedule_delayed_work(&gc_work, gc_delay); +} /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) @@ -126,6 +176,7 @@ void __init inet_initpeers(void) 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); } static int addr_compare(const struct inetpeer_addr *a, @@ -449,7 +500,7 @@ relookup: p->pmtu_orig = 0; p->redirect_genid = 0; memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); - + INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ link_to_pool(p, base); @@ -509,3 +560,30 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) return rc; } EXPORT_SYMBOL(inet_peer_xrlim_allow); + +void inetpeer_invalidate_tree(int family) +{ + struct inet_peer *old, *new, *prev; + struct inet_peer_base *base = family_to_base(family); + + write_seqlock_bh(&base->lock); + + old = base->root; + if (old == peer_avl_empty_rcu) + goto out; + + new = peer_avl_empty_rcu; + + prev = cmpxchg(&base->root, old, new); + if (prev == old) { + base->total = 0; + spin_lock(&gc_lock); + list_add_tail(&prev->gc_list, &gc_list); + spin_unlock(&gc_lock); + schedule_delayed_work(&gc_work, gc_delay); + } + +out: + write_sequnlock_bh(&base->lock); +} +EXPORT_SYMBOL(inetpeer_invalidate_tree); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bcacf54..23ce0c1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -938,6 +938,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); redirect_genid++; + inetpeer_invalidate_tree(AF_INET); } /*