Patchwork [net-next-2.6] pkt_sched: gen_estimator: kill est_lock rwlock

login
register
mail settings
Submitter Eric Dumazet
Date June 7, 2010, 2:32 p.m.
Message ID <1275921171.2545.102.camel@edumazet-laptop>
Download mbox | patch
Permalink /patch/54857/
State Superseded
Delegated to: David Miller
Headers show

Comments

Eric Dumazet - June 7, 2010, 2:32 p.m.
We can use RCU in est_timer() and kill est_lock rwlock.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/core/gen_estimator.c |   45 ++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 27 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Changli Gao - June 7, 2010, 2:53 p.m.
On Mon, Jun 7, 2010 at 10:32 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> We can use RCU in est_timer() and kill est_lock rwlock.
>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
>  net/core/gen_estimator.c |   45 ++++++++++++++-----------------------
>  1 file changed, 18 insertions(+), 27 deletions(-)
>
> diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
> index cf8e703..406d880 100644
> --- a/net/core/gen_estimator.c
> +++ b/net/core/gen_estimator.c
> @@ -102,9 +102,6 @@ struct gen_estimator_head
>
>  static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
>
> -/* Protects against NULL dereference */
> -static DEFINE_RWLOCK(est_lock);
> -
>  /* Protects against soft lockup during large deletion */
>  static struct rb_root est_root = RB_ROOT;
>
> @@ -115,29 +112,25 @@ static void est_timer(unsigned long arg)
>
>        rcu_read_lock();
>        list_for_each_entry_rcu(e, &elist[idx].list, list) {
> -               u64 nbytes;
> -               u64 brate;
> -               u32 npackets;
> -               u32 rate;
> +               struct gnet_stats_basic_packed *bstats;
>
>                spin_lock(e->stats_lock);
> -               read_lock(&est_lock);
> -               if (e->bstats == NULL)
> -                       goto skip;
> -
> -               nbytes = e->bstats->bytes;
> -               npackets = e->bstats->packets;
> -               brate = (nbytes - e->last_bytes)<<(7 - idx);
> -               e->last_bytes = nbytes;
> -               e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
> -               e->rate_est->bps = (e->avbps+0xF)>>5;
> -
> -               rate = (npackets - e->last_packets)<<(12 - idx);
> -               e->last_packets = npackets;
> -               e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
> -               e->rate_est->pps = (e->avpps+0x1FF)>>10;
> -skip:
> -               read_unlock(&est_lock);
> +               bstats = rcu_dereference(e->bstats);
> +               if (bstats) {
> +                       u64 nbytes = ACCESS_ONCE(bstats->bytes);
> +                       u32 npackets = ACCESS_ONCE(bstats->packets);
> +                       u64 brate = (nbytes - e->last_bytes)<<(7 - idx);
> +                       u32 rate;
> +
> +                       e->last_bytes = nbytes;
> +                       e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
> +                       e->rate_est->bps = (e->avbps+0xF)>>5;
> +
> +                       rate = (npackets - e->last_packets)<<(12 - idx);
> +                       e->last_packets = npackets;
> +                       e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
> +                       e->rate_est->pps = (e->avpps+0x1FF)>>10;
> +               }
>                spin_unlock(e->stats_lock);
>        }
>
> @@ -271,9 +264,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
>        while ((e = gen_find_node(bstats, rate_est))) {
>                rb_erase(&e->node, &est_root);
>
> -               write_lock_bh(&est_lock);
> -               e->bstats = NULL;
> -               write_unlock_bh(&est_lock);
> +               rcu_assign_pointer(e->bstats, NULL);
>
>                list_del_rcu(&e->list);
>                call_rcu(&e->e_rcu, __gen_kill_estimator);
>

Hmm. I don't think it is correct.

Look at this code:

void xt_rateest_put(struct xt_rateest *est)
{
        mutex_lock(&xt_rateest_mutex);
        if (--est->refcnt == 0) {
                hlist_del(&est->list);
                gen_kill_estimator(&est->bstats, &est->rstats);
                kfree(est);
        }
        mutex_unlock(&xt_rateest_mutex);
}

est will be released after gen_kill_estimator() returns. After est is
freed, there may still be some other CPUs running in the branch:
     if (bstats) {
         ....
     }
Eric Dumazet - June 7, 2010, 3:30 p.m.
Le lundi 07 juin 2010 à 22:53 +0800, Changli Gao a écrit :

> Hmm. I don't think it is correct.
> 
> Look at this code:
> 
> void xt_rateest_put(struct xt_rateest *est)
> {
>         mutex_lock(&xt_rateest_mutex);
>         if (--est->refcnt == 0) {
>                 hlist_del(&est->list);
>                 gen_kill_estimator(&est->bstats, &est->rstats);
>                 kfree(est);
>         }
>         mutex_unlock(&xt_rateest_mutex);
> }
> 
> est will be released after gen_kill_estimator() returns. After est is
> freed, there may still be some other CPUs running in the branch:
>      if (bstats) {
>          ....
>      }
> 

Oh well, I think I knew this from a previous attempt, but then I
forgot :)

I'll provide an updated patch, so that no other attempt is tried next
yer, thanks !



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - June 7, 2010, 3:55 p.m.
Le lundi 07 juin 2010 à 17:30 +0200, Eric Dumazet a écrit :
> Le lundi 07 juin 2010 à 22:53 +0800, Changli Gao a écrit :
> 
> > Hmm. I don't think it is correct.
> > 
> > Look at this code:
> > 
> > void xt_rateest_put(struct xt_rateest *est)
> > {
> >         mutex_lock(&xt_rateest_mutex);
> >         if (--est->refcnt == 0) {
> >                 hlist_del(&est->list);
> >                 gen_kill_estimator(&est->bstats, &est->rstats);
> >                 kfree(est);
> >         }
> >         mutex_unlock(&xt_rateest_mutex);
> > }
> > 
> > est will be released after gen_kill_estimator() returns. After est is
> > freed, there may still be some other CPUs running in the branch:
> >      if (bstats) {
> >          ....
> >      }
> > 
> 
> Oh well, I think I knew this from a previous attempt, but then I
> forgot :)
> 
> I'll provide an updated patch, so that no other attempt is tried next
> yer, thanks !
> 
> 

For your information, bug is already there before my patch.

So this est_lock is a wrong protection, in the sense its so convoluted
that nobody but you and me even noticed it was buggy in the first place.

(see commit 5d944c640b4 for a first patch)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index cf8e703..406d880 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -102,9 +102,6 @@  struct gen_estimator_head
 
 static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
 
-/* Protects against NULL dereference */
-static DEFINE_RWLOCK(est_lock);
-
 /* Protects against soft lockup during large deletion */
 static struct rb_root est_root = RB_ROOT;
 
@@ -115,29 +112,25 @@  static void est_timer(unsigned long arg)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(e, &elist[idx].list, list) {
-		u64 nbytes;
-		u64 brate;
-		u32 npackets;
-		u32 rate;
+		struct gnet_stats_basic_packed *bstats;
 
 		spin_lock(e->stats_lock);
-		read_lock(&est_lock);
-		if (e->bstats == NULL)
-			goto skip;
-
-		nbytes = e->bstats->bytes;
-		npackets = e->bstats->packets;
-		brate = (nbytes - e->last_bytes)<<(7 - idx);
-		e->last_bytes = nbytes;
-		e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
-		e->rate_est->bps = (e->avbps+0xF)>>5;
-
-		rate = (npackets - e->last_packets)<<(12 - idx);
-		e->last_packets = npackets;
-		e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
-		e->rate_est->pps = (e->avpps+0x1FF)>>10;
-skip:
-		read_unlock(&est_lock);
+		bstats = rcu_dereference(e->bstats);
+		if (bstats) {
+			u64 nbytes = ACCESS_ONCE(bstats->bytes);
+			u32 npackets = ACCESS_ONCE(bstats->packets);
+			u64 brate = (nbytes - e->last_bytes)<<(7 - idx);
+			u32 rate;
+
+			e->last_bytes = nbytes;
+			e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
+			e->rate_est->bps = (e->avbps+0xF)>>5;
+
+			rate = (npackets - e->last_packets)<<(12 - idx);
+			e->last_packets = npackets;
+			e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
+			e->rate_est->pps = (e->avpps+0x1FF)>>10;
+		}
 		spin_unlock(e->stats_lock);
 	}
 
@@ -271,9 +264,7 @@  void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 	while ((e = gen_find_node(bstats, rate_est))) {
 		rb_erase(&e->node, &est_root);
 
-		write_lock_bh(&est_lock);
-		e->bstats = NULL;
-		write_unlock_bh(&est_lock);
+		rcu_assign_pointer(e->bstats, NULL);
 
 		list_del_rcu(&e->list);
 		call_rcu(&e->e_rcu, __gen_kill_estimator);