diff mbox

pkt_sched: gen_kill_estimator() rcu fixes

Message ID 1276085363.2442.125.camel@edumazet-laptop
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet June 9, 2010, 12:09 p.m. UTC
Le mercredi 09 juin 2010 à 10:41 +0000, Jarek Poplawski a écrit :

> Spelling suggestion: xt_rateest_free_rcu?
> 
> Since it's only 3 places I'd consider adding little comments,
> who needs this call_rcu (like in qdisc_destroy).
> 
> Anyway this patch looks OK to me.
> 

Thanks for reviewing, you are right about typo and adding some comments.

What about following ?

[PATCH v2] pkt_sched: gen_kill_estimator() rcu fixes

gen_kill_estimator() API is incomplete or not well documented, since
caller should make sure an RCU grace period is respected before
freeing stats_lock.

This was partially addressed in commit 5d944c640b4 
(gen_estimator: deadlock fix), but same problem exist for all
gen_kill_estimator() users, if lock they use is not already RCU
protected.

A code review shows xt_RATEEST.c, act_api.c, act_police.c have this
problem. Other are ok because they use qdisc lock, already RCU
protected.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/act_api.h              |    2 ++
 include/net/netfilter/xt_rateest.h |    1 +
 net/core/gen_estimator.c           |    1 +
 net/netfilter/xt_RATEEST.c         |   12 +++++++++++-
 net/sched/act_api.c                |   11 ++++++++++-
 net/sched/act_police.c             |   12 +++++++++++-
 6 files changed, 36 insertions(+), 3 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jarek Poplawski June 9, 2010, 12:50 p.m. UTC | #1
On Wed, Jun 09, 2010 at 02:09:23PM +0200, Eric Dumazet wrote:
> Le mercredi 09 juin 2010 ?? 10:41 +0000, Jarek Poplawski a écrit :
> 
> > Spelling suggestion: xt_rateest_free_rcu?
> > 
> > Since it's only 3 places I'd consider adding little comments,
> > who needs this call_rcu (like in qdisc_destroy).
> > 
> > Anyway this patch looks OK to me.
> > 
> 
> Thanks for reviewing, you are right about typo and adding some comments.
> 
> What about following ?

Very nice! (But let's remember these comments aren't very precise
wrt. bstats at the moment ;-)

Thanks,
Jarek P.

> 
> [PATCH v2] pkt_sched: gen_kill_estimator() rcu fixes
> 
> gen_kill_estimator() API is incomplete or not well documented, since
> caller should make sure an RCU grace period is respected before
> freeing stats_lock.
> 
> This was partially addressed in commit 5d944c640b4 
> (gen_estimator: deadlock fix), but same problem exist for all
> gen_kill_estimator() users, if lock they use is not already RCU
> protected.
> 
> A code review shows xt_RATEEST.c, act_api.c, act_police.c have this
> problem. Other are ok because they use qdisc lock, already RCU
> protected.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
>  include/net/act_api.h              |    2 ++
>  include/net/netfilter/xt_rateest.h |    1 +
>  net/core/gen_estimator.c           |    1 +
>  net/netfilter/xt_RATEEST.c         |   12 +++++++++++-
>  net/sched/act_api.c                |   11 ++++++++++-
>  net/sched/act_police.c             |   12 +++++++++++-
>  6 files changed, 36 insertions(+), 3 deletions(-)
> 
> diff --git a/include/net/act_api.h b/include/net/act_api.h
> index c05fd71..bab385f 100644
> --- a/include/net/act_api.h
> +++ b/include/net/act_api.h
> @@ -20,6 +20,7 @@ struct tcf_common {
>  	struct gnet_stats_queue		tcfc_qstats;
>  	struct gnet_stats_rate_est	tcfc_rate_est;
>  	spinlock_t			tcfc_lock;
> +	struct rcu_head			tcfc_rcu;
>  };
>  #define tcf_next	common.tcfc_next
>  #define tcf_index	common.tcfc_index
> @@ -32,6 +33,7 @@ struct tcf_common {
>  #define tcf_qstats	common.tcfc_qstats
>  #define tcf_rate_est	common.tcfc_rate_est
>  #define tcf_lock	common.tcfc_lock
> +#define tcf_rcu		common.tcfc_rcu
>  
>  struct tcf_police {
>  	struct tcf_common	common;
> diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
> index ddbf37e..5e14277 100644
> --- a/include/net/netfilter/xt_rateest.h
> +++ b/include/net/netfilter/xt_rateest.h
> @@ -9,6 +9,7 @@ struct xt_rateest {
>  	struct gnet_estimator		params;
>  	struct gnet_stats_rate_est	rstats;
>  	struct gnet_stats_basic_packed	bstats;
> +	struct rcu_head			rcu;
>  };
>  
>  extern struct xt_rateest *xt_rateest_lookup(const char *name);
> diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
> index 785e527..9fbe7f7 100644
> --- a/net/core/gen_estimator.c
> +++ b/net/core/gen_estimator.c
> @@ -263,6 +263,7 @@ static void __gen_kill_estimator(struct rcu_head *head)
>   *
>   * Removes the rate estimator specified by &bstats and &rate_est.
>   *
> + * Note : Caller should respect an RCU grace period before freeing stats_lock
>   */
>  void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
>  			struct gnet_stats_rate_est *rate_est)
> diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
> index 69c01e1..de079ab 100644
> --- a/net/netfilter/xt_RATEEST.c
> +++ b/net/netfilter/xt_RATEEST.c
> @@ -60,13 +60,22 @@ struct xt_rateest *xt_rateest_lookup(const char *name)
>  }
>  EXPORT_SYMBOL_GPL(xt_rateest_lookup);
>  
> +static void xt_rateest_free_rcu(struct rcu_head *head)
> +{
> +	kfree(container_of(head, struct xt_rateest, rcu));
> +}
> +
>  void xt_rateest_put(struct xt_rateest *est)
>  {
>  	mutex_lock(&xt_rateest_mutex);
>  	if (--est->refcnt == 0) {
>  		hlist_del(&est->list);
>  		gen_kill_estimator(&est->bstats, &est->rstats);
> -		kfree(est);
> +		/*
> +		 * gen_estimator est_timer() might access est->lock or bstats,
> +		 * wait a RCU grace period before freeing 'est'
> +		 */
> +		call_rcu(&est->rcu, xt_rateest_free_rcu);
>  	}
>  	mutex_unlock(&xt_rateest_mutex);
>  }
> @@ -179,6 +188,7 @@ static int __init xt_rateest_tg_init(void)
>  static void __exit xt_rateest_tg_fini(void)
>  {
>  	xt_unregister_target(&xt_rateest_tg_reg);
> +	rcu_barrier(); /* Wait for completion of call_rcu()'s (xt_rateest_free_rcu) */
>  }
>  
>  
> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
> index 972378f..23b25f8 100644
> --- a/net/sched/act_api.c
> +++ b/net/sched/act_api.c
> @@ -26,6 +26,11 @@
>  #include <net/act_api.h>
>  #include <net/netlink.h>
>  
> +static void tcf_common_free_rcu(struct rcu_head *head)
> +{
> +	kfree(container_of(head, struct tcf_common, tcfc_rcu));
> +}
> +
>  void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
>  {
>  	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
> @@ -38,7 +43,11 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
>  			write_unlock_bh(hinfo->lock);
>  			gen_kill_estimator(&p->tcfc_bstats,
>  					   &p->tcfc_rate_est);
> -			kfree(p);
> +			/*
> +			 * gen_estimator est_timer() might access p->tcfc_lock
> +			 * or bstats, wait a RCU grace period before freeing p
> +			 */
> +			call_rcu(&p->tcfc_rcu, tcf_common_free_rcu);
>  			return;
>  		}
>  	}
> diff --git a/net/sched/act_police.c b/net/sched/act_police.c
> index 654f73d..537a487 100644
> --- a/net/sched/act_police.c
> +++ b/net/sched/act_police.c
> @@ -97,6 +97,11 @@ nla_put_failure:
>  	goto done;
>  }
>  
> +static void tcf_police_free_rcu(struct rcu_head *head)
> +{
> +	kfree(container_of(head, struct tcf_police, tcf_rcu));
> +}
> +
>  static void tcf_police_destroy(struct tcf_police *p)
>  {
>  	unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
> @@ -113,7 +118,11 @@ static void tcf_police_destroy(struct tcf_police *p)
>  				qdisc_put_rtab(p->tcfp_R_tab);
>  			if (p->tcfp_P_tab)
>  				qdisc_put_rtab(p->tcfp_P_tab);
> -			kfree(p);
> +			/*
> +			 * gen_estimator est_timer() might access p->tcf_lock
> +			 * or bstats, wait a RCU grace period before freeing p
> +			 */
> +			call_rcu(&p->tcf_rcu, tcf_police_free_rcu);
>  			return;
>  		}
>  	}
> @@ -397,6 +406,7 @@ static void __exit
>  police_cleanup_module(void)
>  {
>  	tcf_unregister_action(&act_police_ops);
> +	rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
>  }
>  
>  module_init(police_init_module);
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet June 9, 2010, 1:05 p.m. UTC | #2
Le mercredi 09 juin 2010 à 12:50 +0000, Jarek Poplawski a écrit :
> On Wed, Jun 09, 2010 at 02:09:23PM +0200, Eric Dumazet wrote:
> > Le mercredi 09 juin 2010 ?? 10:41 +0000, Jarek Poplawski a écrit :
> > 
> > > Spelling suggestion: xt_rateest_free_rcu?
> > > 
> > > Since it's only 3 places I'd consider adding little comments,
> > > who needs this call_rcu (like in qdisc_destroy).
> > > 
> > > Anyway this patch looks OK to me.
> > > 
> > 
> > Thanks for reviewing, you are right about typo and adding some comments.
> > 
> > What about following ?
> 
> Very nice! (But let's remember these comments aren't very precise
> wrt. bstats at the moment ;-)

Yes, I anticipated a bit next patch ;)

BTW, I think using qdisc spinlock to update stats or reading them is not
really needed, particularly in xt_rateest/xt_RATEEST case, where
per_cpu bstats would be good

We could have one seqcount_t used by (bytes/packets) bstats producer,
and one seqcount_t used by est_timer when computing rates.


bstats updates :


write_seqcount_begin(&bstats->abs_seq);
bstats->bytes += len;
bstats->packets++;
write_seqcount_end(&bstats->abs_seq);


est_timer() would really run lockless, only rcu protected.

(No more spinlock, only the actual rcu_read_lock())


do {
	seq = read_seqcount_begin(&bstats->abs_seq);
	abs_bytes = bstats->bytes;
	abs_packets = bstats->packets;
} while (read_seqcount_retry(&bstats->abs_seq, seq));

write_seqcount_begin(&s->rate_seq);
/* compute rate estimation and store it */
...
write_seqcount_end(&s->rate_seq);

This would mean bstats should be rcu protected too...



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller June 12, 2010, 1:39 a.m. UTC | #3
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 09 Jun 2010 14:09:23 +0200

> [PATCH v2] pkt_sched: gen_kill_estimator() rcu fixes
> 
> gen_kill_estimator() API is incomplete or not well documented, since
> caller should make sure an RCU grace period is respected before
> freeing stats_lock.
> 
> This was partially addressed in commit 5d944c640b4 
> (gen_estimator: deadlock fix), but same problem exist for all
> gen_kill_estimator() users, if lock they use is not already RCU
> protected.
> 
> A code review shows xt_RATEEST.c, act_api.c, act_police.c have this
> problem. Other are ok because they use qdisc lock, already RCU
> protected.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/act_api.h b/include/net/act_api.h
index c05fd71..bab385f 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -20,6 +20,7 @@  struct tcf_common {
 	struct gnet_stats_queue		tcfc_qstats;
 	struct gnet_stats_rate_est	tcfc_rate_est;
 	spinlock_t			tcfc_lock;
+	struct rcu_head			tcfc_rcu;
 };
 #define tcf_next	common.tcfc_next
 #define tcf_index	common.tcfc_index
@@ -32,6 +33,7 @@  struct tcf_common {
 #define tcf_qstats	common.tcfc_qstats
 #define tcf_rate_est	common.tcfc_rate_est
 #define tcf_lock	common.tcfc_lock
+#define tcf_rcu		common.tcfc_rcu
 
 struct tcf_police {
 	struct tcf_common	common;
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index ddbf37e..5e14277 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -9,6 +9,7 @@  struct xt_rateest {
 	struct gnet_estimator		params;
 	struct gnet_stats_rate_est	rstats;
 	struct gnet_stats_basic_packed	bstats;
+	struct rcu_head			rcu;
 };
 
 extern struct xt_rateest *xt_rateest_lookup(const char *name);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 785e527..9fbe7f7 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -263,6 +263,7 @@  static void __gen_kill_estimator(struct rcu_head *head)
  *
  * Removes the rate estimator specified by &bstats and &rate_est.
  *
+ * Note : Caller should respect an RCU grace period before freeing stats_lock
  */
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est *rate_est)
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 69c01e1..de079ab 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -60,13 +60,22 @@  struct xt_rateest *xt_rateest_lookup(const char *name)
 }
 EXPORT_SYMBOL_GPL(xt_rateest_lookup);
 
+static void xt_rateest_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct xt_rateest, rcu));
+}
+
 void xt_rateest_put(struct xt_rateest *est)
 {
 	mutex_lock(&xt_rateest_mutex);
 	if (--est->refcnt == 0) {
 		hlist_del(&est->list);
 		gen_kill_estimator(&est->bstats, &est->rstats);
-		kfree(est);
+		/*
+		 * gen_estimator est_timer() might access est->lock or bstats,
+		 * wait a RCU grace period before freeing 'est'
+		 */
+		call_rcu(&est->rcu, xt_rateest_free_rcu);
 	}
 	mutex_unlock(&xt_rateest_mutex);
 }
@@ -179,6 +188,7 @@  static int __init xt_rateest_tg_init(void)
 static void __exit xt_rateest_tg_fini(void)
 {
 	xt_unregister_target(&xt_rateest_tg_reg);
+	rcu_barrier(); /* Wait for completion of call_rcu()'s (xt_rateest_free_rcu) */
 }
 
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 972378f..23b25f8 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -26,6 +26,11 @@ 
 #include <net/act_api.h>
 #include <net/netlink.h>
 
+static void tcf_common_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct tcf_common, tcfc_rcu));
+}
+
 void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 {
 	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
@@ -38,7 +43,11 @@  void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 			write_unlock_bh(hinfo->lock);
 			gen_kill_estimator(&p->tcfc_bstats,
 					   &p->tcfc_rate_est);
-			kfree(p);
+			/*
+			 * gen_estimator est_timer() might access p->tcfc_lock
+			 * or bstats, wait a RCU grace period before freeing p
+			 */
+			call_rcu(&p->tcfc_rcu, tcf_common_free_rcu);
 			return;
 		}
 	}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 654f73d..537a487 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -97,6 +97,11 @@  nla_put_failure:
 	goto done;
 }
 
+static void tcf_police_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct tcf_police, tcf_rcu));
+}
+
 static void tcf_police_destroy(struct tcf_police *p)
 {
 	unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
@@ -113,7 +118,11 @@  static void tcf_police_destroy(struct tcf_police *p)
 				qdisc_put_rtab(p->tcfp_R_tab);
 			if (p->tcfp_P_tab)
 				qdisc_put_rtab(p->tcfp_P_tab);
-			kfree(p);
+			/*
+			 * gen_estimator est_timer() might access p->tcf_lock
+			 * or bstats, wait a RCU grace period before freeing p
+			 */
+			call_rcu(&p->tcf_rcu, tcf_police_free_rcu);
 			return;
 		}
 	}
@@ -397,6 +406,7 @@  static void __exit
 police_cleanup_module(void)
 {
 	tcf_unregister_action(&act_police_ops);
+	rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
 }
 
 module_init(police_init_module);