diff mbox series

[net-next,03/12] net: sched: flower: introduce reference counting for filters

Message ID 20190214074712.17846-4-vladbu@mellanox.com
State Changes Requested
Delegated to: David Miller
Headers show
Series Refactor flower classifier to remove dependency on rtnl lock | expand

Commit Message

Vlad Buslov Feb. 14, 2019, 7:47 a.m. UTC
Extend flower filters with reference counting in order to remove dependency
on rtnl lock in flower ops and allow to modify filters concurrently.
Reference to flower filter can be taken/released concurrently as soon as it
is marked as 'unlocked' by last patch in this series. Use atomic reference
counter type to make concurrent modifications safe.

Always take reference to flower filter while working with it:
- Modify fl_get() to take reference to filter.
- Implement tp->put() callback as fl_put() function to allow cls API to
release reference taken by fl_get().
- Modify fl_change() to assume that caller holds reference to fold and take
reference to fnew.
- Take reference to filter while using it in fl_walk().

Implement helper functions to get/put filter reference counter.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
 net/sched/cls_flower.c | 95 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 81 insertions(+), 14 deletions(-)

Comments

Stefano Brivio Feb. 14, 2019, 8:34 p.m. UTC | #1
On Thu, 14 Feb 2019 09:47:03 +0200
Vlad Buslov <vladbu@mellanox.com> wrote:

> +static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp,
> +						unsigned long *handle)
> +{
> +	struct cls_fl_head *head = fl_head_dereference(tp);
> +	struct cls_fl_filter *f;
> +
> +	rcu_read_lock();
> +	/* don't return filters that are being deleted */
> +	while ((f = idr_get_next_ul(&head->handle_idr,
> +				    handle)) != NULL &&
> +	       !refcount_inc_not_zero(&f->refcnt))
> +		++(*handle);

This... hurts :) What about:

	while ((f = idr_get_next_ul(&head->handle_idr, &handle))) {
		if (refcount_inc_not_zero(&f->refcnt))
			break;
		++(*handle);
	}

?

> +	rcu_read_unlock();
> +
> +	return f;
> +}
> +
>  static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
>  			struct netlink_ext_ack *extack)
>  {
> @@ -456,10 +503,7 @@ static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
>  	if (!tc_skip_hw(f->flags))
>  		fl_hw_destroy_filter(tp, f, extack);
>  	tcf_unbind_filter(tp, &f->res);
> -	if (async)
> -		tcf_queue_work(&f->rwork, fl_destroy_filter_work);
> -	else
> -		__fl_destroy_filter(f);
> +	__fl_put(f);
>  
>  	return last;
>  }
> @@ -494,11 +538,18 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
>  	tcf_queue_work(&head->rwork, fl_destroy_sleepable);
>  }
>  
> +static void fl_put(struct tcf_proto *tp, void *arg)
> +{
> +	struct cls_fl_filter *f = arg;
> +
> +	__fl_put(f);
> +}
> +
>  static void *fl_get(struct tcf_proto *tp, u32 handle)
>  {
>  	struct cls_fl_head *head = fl_head_dereference(tp);
>  
> -	return idr_find(&head->handle_idr, handle);
> +	return __fl_get(head, handle);
>  }
>  
>  static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
> @@ -1321,12 +1372,16 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>  	struct nlattr **tb;
>  	int err;
>  
> -	if (!tca[TCA_OPTIONS])
> -		return -EINVAL;
> +	if (!tca[TCA_OPTIONS]) {
> +		err = -EINVAL;
> +		goto errout_fold;
> +	}
>  
>  	mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL);
> -	if (!mask)
> -		return -ENOBUFS;
> +	if (!mask) {
> +		err = -ENOBUFS;
> +		goto errout_fold;
> +	}
>  
>  	tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
>  	if (!tb) {
> @@ -1349,6 +1404,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>  		err = -ENOBUFS;
>  		goto errout_tb;
>  	}
> +	refcount_set(&fnew->refcnt, 1);
>  
>  	err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
>  	if (err < 0)
> @@ -1381,6 +1437,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>  	if (!tc_in_hw(fnew->flags))
>  		fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
>  
> +	refcount_inc(&fnew->refcnt);

I guess I'm not getting the semantics but... why is it 2 now?
Vlad Buslov Feb. 15, 2019, 11:22 a.m. UTC | #2
On Thu 14 Feb 2019 at 20:34, Stefano Brivio <sbrivio@redhat.com> wrote:
> On Thu, 14 Feb 2019 09:47:03 +0200
> Vlad Buslov <vladbu@mellanox.com> wrote:
>
>> +static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp,
>> +						unsigned long *handle)
>> +{
>> +	struct cls_fl_head *head = fl_head_dereference(tp);
>> +	struct cls_fl_filter *f;
>> +
>> +	rcu_read_lock();
>> +	/* don't return filters that are being deleted */
>> +	while ((f = idr_get_next_ul(&head->handle_idr,
>> +				    handle)) != NULL &&
>> +	       !refcount_inc_not_zero(&f->refcnt))
>> +		++(*handle);
>
> This... hurts :) What about:
>
> 	while ((f = idr_get_next_ul(&head->handle_idr, &handle))) {
> 		if (refcount_inc_not_zero(&f->refcnt))
> 			break;
> 		++(*handle);
> 	}
>
> ?

I prefer to avoid using value of assignment as boolean and
non-structured jumps, when possible. In this case it seems OK either
way, but how about:

	for (f = idr_get_next_ul(&head->handle_idr, handle);
	     f && !refcount_inc_not_zero(&f->refcnt);
	     f = idr_get_next_ul(&head->handle_idr, handle))
		++(*handle);

>
>> +	rcu_read_unlock();
>> +
>> +	return f;
>> +}
>> +
>>  static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
>>  			struct netlink_ext_ack *extack)
>>  {
>> @@ -456,10 +503,7 @@ static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
>>  	if (!tc_skip_hw(f->flags))
>>  		fl_hw_destroy_filter(tp, f, extack);
>>  	tcf_unbind_filter(tp, &f->res);
>> -	if (async)
>> -		tcf_queue_work(&f->rwork, fl_destroy_filter_work);
>> -	else
>> -		__fl_destroy_filter(f);
>> +	__fl_put(f);
>>
>>  	return last;
>>  }
>> @@ -494,11 +538,18 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
>>  	tcf_queue_work(&head->rwork, fl_destroy_sleepable);
>>  }
>>
>> +static void fl_put(struct tcf_proto *tp, void *arg)
>> +{
>> +	struct cls_fl_filter *f = arg;
>> +
>> +	__fl_put(f);
>> +}
>> +
>>  static void *fl_get(struct tcf_proto *tp, u32 handle)
>>  {
>>  	struct cls_fl_head *head = fl_head_dereference(tp);
>>
>> -	return idr_find(&head->handle_idr, handle);
>> +	return __fl_get(head, handle);
>>  }
>>
>>  static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
>> @@ -1321,12 +1372,16 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>>  	struct nlattr **tb;
>>  	int err;
>>
>> -	if (!tca[TCA_OPTIONS])
>> -		return -EINVAL;
>> +	if (!tca[TCA_OPTIONS]) {
>> +		err = -EINVAL;
>> +		goto errout_fold;
>> +	}
>>
>>  	mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL);
>> -	if (!mask)
>> -		return -ENOBUFS;
>> +	if (!mask) {
>> +		err = -ENOBUFS;
>> +		goto errout_fold;
>> +	}
>>
>>  	tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
>>  	if (!tb) {
>> @@ -1349,6 +1404,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>>  		err = -ENOBUFS;
>>  		goto errout_tb;
>>  	}
>> +	refcount_set(&fnew->refcnt, 1);
>>
>>  	err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
>>  	if (err < 0)
>> @@ -1381,6 +1437,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>>  	if (!tc_in_hw(fnew->flags))
>>  		fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
>>
>> +	refcount_inc(&fnew->refcnt);
>
> I guess I'm not getting the semantics but... why is it 2 now?

As soon as fnew is inserted into head->handle_idr (one reference), it
becomes accessible to concurrent users, which means that it can be
deleted at any time. However, tp->change() returns a reference to newly
created filter to cls_api by assigning "arg" parameter to it (second
reference). After tp->change() returns, cls API continues to use fnew
and releases it with tfilter_put() when finished.
Stefano Brivio Feb. 15, 2019, 12:32 p.m. UTC | #3
On Fri, 15 Feb 2019 11:22:45 +0000
Vlad Buslov <vladbu@mellanox.com> wrote:

> On Thu 14 Feb 2019 at 20:34, Stefano Brivio <sbrivio@redhat.com> wrote:
> > On Thu, 14 Feb 2019 09:47:03 +0200
> > Vlad Buslov <vladbu@mellanox.com> wrote:
> >  
> >> +static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp,
> >> +						unsigned long *handle)
> >> +{
> >> +	struct cls_fl_head *head = fl_head_dereference(tp);
> >> +	struct cls_fl_filter *f;
> >> +
> >> +	rcu_read_lock();
> >> +	/* don't return filters that are being deleted */
> >> +	while ((f = idr_get_next_ul(&head->handle_idr,
> >> +				    handle)) != NULL &&
> >> +	       !refcount_inc_not_zero(&f->refcnt))
> >> +		++(*handle);  
> >
> > This... hurts :) What about:
> >
> > 	while ((f = idr_get_next_ul(&head->handle_idr, &handle))) {
> > 		if (refcount_inc_not_zero(&f->refcnt))
> > 			break;
> > 		++(*handle);
> > 	}
> >
> > ?  
> 
> I prefer to avoid using value of assignment as boolean and
> non-structured jumps, when possible. In this case it seems OK either
> way, but how about:
> 
> 	for (f = idr_get_next_ul(&head->handle_idr, handle);
> 	     f && !refcount_inc_not_zero(&f->refcnt);
> 	     f = idr_get_next_ul(&head->handle_idr, handle))
> 		++(*handle);

Honestly, I preferred the original, this is repeating idr_get_next_ul()
twice.

Maybe, just:

	[...]
	struct idr *idr;

	[...]
	idr = &head->handle_idr;
	while ((f = idr_get_next_ul(idr, handle)) != NULL &&
	       !refcount_inc_not_zero(&f->refcnt))
		++(*handle);

also rather ugly, but not entirely unreadable. I tried drafting a
helper for this, but it just ends up hiding what this does.

> >> @@ -1349,6 +1404,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
> >>  		err = -ENOBUFS;
> >>  		goto errout_tb;
> >>  	}
> >> +	refcount_set(&fnew->refcnt, 1);
> >>
> >>  	err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
> >>  	if (err < 0)
> >> @@ -1381,6 +1437,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
> >>  	if (!tc_in_hw(fnew->flags))
> >>  		fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
> >>
> >> +	refcount_inc(&fnew->refcnt);  
> >
> > I guess I'm not getting the semantics but... why is it 2 now?  
> 
> As soon as fnew is inserted into head->handle_idr (one reference), it
> becomes accessible to concurrent users, which means that it can be
> deleted at any time. However, tp->change() returns a reference to newly
> created filter to cls_api by assigning "arg" parameter to it (second
> reference). After tp->change() returns, cls API continues to use fnew
> and releases it with tfilter_put() when finished.

I see, thanks for the explanation!
diff mbox series

Patch

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 91596a6271f8..b216ed26f344 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -14,6 +14,7 @@ 
 #include <linux/module.h>
 #include <linux/rhashtable.h>
 #include <linux/workqueue.h>
+#include <linux/refcount.h>
 
 #include <linux/if_ether.h>
 #include <linux/in6.h>
@@ -104,6 +105,11 @@  struct cls_fl_filter {
 	u32 in_hw_count;
 	struct rcu_work rwork;
 	struct net_device *hw_dev;
+	/* Flower classifier is unlocked, which means that its reference counter
+	 * can be changed concurrently without any kind of external
+	 * synchronization. Use atomic reference counter to be concurrency-safe.
+	 */
+	refcount_t refcnt;
 };
 
 static const struct rhashtable_params mask_ht_params = {
@@ -443,6 +449,47 @@  static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
 	return rcu_dereference_protected(tp->root, 1);
 }
 
+static void __fl_put(struct cls_fl_filter *f)
+{
+	if (!refcount_dec_and_test(&f->refcnt))
+		return;
+
+	if (tcf_exts_get_net(&f->exts))
+		tcf_queue_work(&f->rwork, fl_destroy_filter_work);
+	else
+		__fl_destroy_filter(f);
+}
+
+static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle)
+{
+	struct cls_fl_filter *f;
+
+	rcu_read_lock();
+	f = idr_find(&head->handle_idr, handle);
+	if (f && !refcount_inc_not_zero(&f->refcnt))
+		f = NULL;
+	rcu_read_unlock();
+
+	return f;
+}
+
+static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp,
+						unsigned long *handle)
+{
+	struct cls_fl_head *head = fl_head_dereference(tp);
+	struct cls_fl_filter *f;
+
+	rcu_read_lock();
+	/* don't return filters that are being deleted */
+	while ((f = idr_get_next_ul(&head->handle_idr,
+				    handle)) != NULL &&
+	       !refcount_inc_not_zero(&f->refcnt))
+		++(*handle);
+	rcu_read_unlock();
+
+	return f;
+}
+
 static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
 			struct netlink_ext_ack *extack)
 {
@@ -456,10 +503,7 @@  static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
 	if (!tc_skip_hw(f->flags))
 		fl_hw_destroy_filter(tp, f, extack);
 	tcf_unbind_filter(tp, &f->res);
-	if (async)
-		tcf_queue_work(&f->rwork, fl_destroy_filter_work);
-	else
-		__fl_destroy_filter(f);
+	__fl_put(f);
 
 	return last;
 }
@@ -494,11 +538,18 @@  static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
 	tcf_queue_work(&head->rwork, fl_destroy_sleepable);
 }
 
+static void fl_put(struct tcf_proto *tp, void *arg)
+{
+	struct cls_fl_filter *f = arg;
+
+	__fl_put(f);
+}
+
 static void *fl_get(struct tcf_proto *tp, u32 handle)
 {
 	struct cls_fl_head *head = fl_head_dereference(tp);
 
-	return idr_find(&head->handle_idr, handle);
+	return __fl_get(head, handle);
 }
 
 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
@@ -1321,12 +1372,16 @@  static int fl_change(struct net *net, struct sk_buff *in_skb,
 	struct nlattr **tb;
 	int err;
 
-	if (!tca[TCA_OPTIONS])
-		return -EINVAL;
+	if (!tca[TCA_OPTIONS]) {
+		err = -EINVAL;
+		goto errout_fold;
+	}
 
 	mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL);
-	if (!mask)
-		return -ENOBUFS;
+	if (!mask) {
+		err = -ENOBUFS;
+		goto errout_fold;
+	}
 
 	tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
 	if (!tb) {
@@ -1349,6 +1404,7 @@  static int fl_change(struct net *net, struct sk_buff *in_skb,
 		err = -ENOBUFS;
 		goto errout_tb;
 	}
+	refcount_set(&fnew->refcnt, 1);
 
 	err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
 	if (err < 0)
@@ -1381,6 +1437,7 @@  static int fl_change(struct net *net, struct sk_buff *in_skb,
 	if (!tc_in_hw(fnew->flags))
 		fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
 
+	refcount_inc(&fnew->refcnt);
 	if (fold) {
 		fnew->handle = handle;
 
@@ -1399,7 +1456,11 @@  static int fl_change(struct net *net, struct sk_buff *in_skb,
 			fl_hw_destroy_filter(tp, fold, NULL);
 		tcf_unbind_filter(tp, &fold->res);
 		tcf_exts_get_net(&fold->exts);
-		tcf_queue_work(&fold->rwork, fl_destroy_filter_work);
+		/* Caller holds reference to fold, so refcnt is always > 0
+		 * after this.
+		 */
+		refcount_dec(&fold->refcnt);
+		__fl_put(fold);
 	} else {
 		if (__fl_lookup(fnew->mask, &fnew->mkey)) {
 			err = -EEXIST;
@@ -1448,6 +1509,9 @@  static int fl_change(struct net *net, struct sk_buff *in_skb,
 	kfree(tb);
 errout_mask_alloc:
 	kfree(mask);
+errout_fold:
+	if (fold)
+		__fl_put(fold);
 	return err;
 }
 
@@ -1461,24 +1525,26 @@  static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
 			       f->mask->filter_ht_params);
 	__fl_delete(tp, f, extack);
 	*last = list_empty(&head->masks);
+	__fl_put(f);
+
 	return 0;
 }
 
 static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
 		    bool rtnl_held)
 {
-	struct cls_fl_head *head = fl_head_dereference(tp);
 	struct cls_fl_filter *f;
 
 	arg->count = arg->skip;
 
-	while ((f = idr_get_next_ul(&head->handle_idr,
-				    &arg->cookie)) != NULL) {
+	while ((f = fl_get_next_filter(tp, &arg->cookie)) != NULL) {
 		if (arg->fn(tp, f, arg) < 0) {
+			__fl_put(f);
 			arg->stop = 1;
 			break;
 		}
-		arg->cookie = f->handle + 1;
+		__fl_put(f);
+		arg->cookie++;
 		arg->count++;
 	}
 }
@@ -2148,6 +2214,7 @@  static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.init		= fl_init,
 	.destroy	= fl_destroy,
 	.get		= fl_get,
+	.put		= fl_put,
 	.change		= fl_change,
 	.delete		= fl_delete,
 	.walk		= fl_walk,