Message ID | 20190214074712.17846-4-vladbu@mellanox.com |
---|---|
State | Changes Requested |
Delegated to: | David Miller |
Headers | show |
Series | Refactor flower classifier to remove dependency on rtnl lock | expand |
On Thu, 14 Feb 2019 09:47:03 +0200 Vlad Buslov <vladbu@mellanox.com> wrote: > +static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp, > + unsigned long *handle) > +{ > + struct cls_fl_head *head = fl_head_dereference(tp); > + struct cls_fl_filter *f; > + > + rcu_read_lock(); > + /* don't return filters that are being deleted */ > + while ((f = idr_get_next_ul(&head->handle_idr, > + handle)) != NULL && > + !refcount_inc_not_zero(&f->refcnt)) > + ++(*handle); This... hurts :) What about: while ((f = idr_get_next_ul(&head->handle_idr, &handle))) { if (refcount_inc_not_zero(&f->refcnt)) break; ++(*handle); } ? > + rcu_read_unlock(); > + > + return f; > +} > + > static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, > struct netlink_ext_ack *extack) > { > @@ -456,10 +503,7 @@ static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, > if (!tc_skip_hw(f->flags)) > fl_hw_destroy_filter(tp, f, extack); > tcf_unbind_filter(tp, &f->res); > - if (async) > - tcf_queue_work(&f->rwork, fl_destroy_filter_work); > - else > - __fl_destroy_filter(f); > + __fl_put(f); > > return last; > } > @@ -494,11 +538,18 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, > tcf_queue_work(&head->rwork, fl_destroy_sleepable); > } > > +static void fl_put(struct tcf_proto *tp, void *arg) > +{ > + struct cls_fl_filter *f = arg; > + > + __fl_put(f); > +} > + > static void *fl_get(struct tcf_proto *tp, u32 handle) > { > struct cls_fl_head *head = fl_head_dereference(tp); > > - return idr_find(&head->handle_idr, handle); > + return __fl_get(head, handle); > } > > static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { > @@ -1321,12 +1372,16 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, > struct nlattr **tb; > int err; > > - if (!tca[TCA_OPTIONS]) > - return -EINVAL; > + if (!tca[TCA_OPTIONS]) { > + err = -EINVAL; > + goto errout_fold; > + } > > mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL); > - if (!mask) > - return -ENOBUFS; > + if (!mask) { > + err = -ENOBUFS; > + goto errout_fold; > + } > > tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); > if (!tb) { > @@ -1349,6 +1404,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, > err = -ENOBUFS; > goto errout_tb; > } > + refcount_set(&fnew->refcnt, 1); > > err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); > if (err < 0) > @@ -1381,6 +1437,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, > if (!tc_in_hw(fnew->flags)) > fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; > > + refcount_inc(&fnew->refcnt); I guess I'm not getting the semantics but... why is it 2 now?
On Thu 14 Feb 2019 at 20:34, Stefano Brivio <sbrivio@redhat.com> wrote: > On Thu, 14 Feb 2019 09:47:03 +0200 > Vlad Buslov <vladbu@mellanox.com> wrote: > >> +static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp, >> + unsigned long *handle) >> +{ >> + struct cls_fl_head *head = fl_head_dereference(tp); >> + struct cls_fl_filter *f; >> + >> + rcu_read_lock(); >> + /* don't return filters that are being deleted */ >> + while ((f = idr_get_next_ul(&head->handle_idr, >> + handle)) != NULL && >> + !refcount_inc_not_zero(&f->refcnt)) >> + ++(*handle); > > This... hurts :) What about: > > while ((f = idr_get_next_ul(&head->handle_idr, &handle))) { > if (refcount_inc_not_zero(&f->refcnt)) > break; > ++(*handle); > } > > ? I prefer to avoid using value of assignment as boolean and non-structured jumps, when possible. In this case it seems OK either way, but how about: for (f = idr_get_next_ul(&head->handle_idr, handle); f && !refcount_inc_not_zero(&f->refcnt); f = idr_get_next_ul(&head->handle_idr, handle)) ++(*handle); > >> + rcu_read_unlock(); >> + >> + return f; >> +} >> + >> static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, >> struct netlink_ext_ack *extack) >> { >> @@ -456,10 +503,7 @@ static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, >> if (!tc_skip_hw(f->flags)) >> fl_hw_destroy_filter(tp, f, extack); >> tcf_unbind_filter(tp, &f->res); >> - if (async) >> - tcf_queue_work(&f->rwork, fl_destroy_filter_work); >> - else >> - __fl_destroy_filter(f); >> + __fl_put(f); >> >> return last; >> } >> @@ -494,11 +538,18 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, >> tcf_queue_work(&head->rwork, fl_destroy_sleepable); >> } >> >> +static void fl_put(struct tcf_proto *tp, void *arg) >> +{ >> + struct cls_fl_filter *f = arg; >> + >> + __fl_put(f); >> +} >> + >> static void *fl_get(struct tcf_proto *tp, u32 handle) >> { >> struct cls_fl_head *head = fl_head_dereference(tp); >> >> - return idr_find(&head->handle_idr, handle); >> + return __fl_get(head, handle); >> } >> >> static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { >> @@ -1321,12 +1372,16 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, >> struct nlattr **tb; >> int err; >> >> - if (!tca[TCA_OPTIONS]) >> - return -EINVAL; >> + if (!tca[TCA_OPTIONS]) { >> + err = -EINVAL; >> + goto errout_fold; >> + } >> >> mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL); >> - if (!mask) >> - return -ENOBUFS; >> + if (!mask) { >> + err = -ENOBUFS; >> + goto errout_fold; >> + } >> >> tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); >> if (!tb) { >> @@ -1349,6 +1404,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, >> err = -ENOBUFS; >> goto errout_tb; >> } >> + refcount_set(&fnew->refcnt, 1); >> >> err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); >> if (err < 0) >> @@ -1381,6 +1437,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, >> if (!tc_in_hw(fnew->flags)) >> fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; >> >> + refcount_inc(&fnew->refcnt); > > I guess I'm not getting the semantics but... why is it 2 now? As soon as fnew is inserted into head->handle_idr (one reference), it becomes accessible to concurrent users, which means that it can be deleted at any time. However, tp->change() returns a reference to newly created filter to cls_api by assigning "arg" parameter to it (second reference). After tp->change() returns, cls API continues to use fnew and releases it with tfilter_put() when finished.
On Fri, 15 Feb 2019 11:22:45 +0000 Vlad Buslov <vladbu@mellanox.com> wrote: > On Thu 14 Feb 2019 at 20:34, Stefano Brivio <sbrivio@redhat.com> wrote: > > On Thu, 14 Feb 2019 09:47:03 +0200 > > Vlad Buslov <vladbu@mellanox.com> wrote: > > > >> +static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp, > >> + unsigned long *handle) > >> +{ > >> + struct cls_fl_head *head = fl_head_dereference(tp); > >> + struct cls_fl_filter *f; > >> + > >> + rcu_read_lock(); > >> + /* don't return filters that are being deleted */ > >> + while ((f = idr_get_next_ul(&head->handle_idr, > >> + handle)) != NULL && > >> + !refcount_inc_not_zero(&f->refcnt)) > >> + ++(*handle); > > > > This... hurts :) What about: > > > > while ((f = idr_get_next_ul(&head->handle_idr, &handle))) { > > if (refcount_inc_not_zero(&f->refcnt)) > > break; > > ++(*handle); > > } > > > > ? > > I prefer to avoid using value of assignment as boolean and > non-structured jumps, when possible. In this case it seems OK either > way, but how about: > > for (f = idr_get_next_ul(&head->handle_idr, handle); > f && !refcount_inc_not_zero(&f->refcnt); > f = idr_get_next_ul(&head->handle_idr, handle)) > ++(*handle); Honestly, I preferred the original, this is repeating idr_get_next_ul() twice. Maybe, just: [...] struct idr *idr; [...] idr = &head->handle_idr; while ((f = idr_get_next_ul(idr, handle)) != NULL && !refcount_inc_not_zero(&f->refcnt)) ++(*handle); also rather ugly, but not entirely unreadable. I tried drafting a helper for this, but it just ends up hiding what this does. > >> @@ -1349,6 +1404,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, > >> err = -ENOBUFS; > >> goto errout_tb; > >> } > >> + refcount_set(&fnew->refcnt, 1); > >> > >> err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); > >> if (err < 0) > >> @@ -1381,6 +1437,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, > >> if (!tc_in_hw(fnew->flags)) > >> fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; > >> > >> + refcount_inc(&fnew->refcnt); > > > > I guess I'm not getting the semantics but... why is it 2 now? > > As soon as fnew is inserted into head->handle_idr (one reference), it > becomes accessible to concurrent users, which means that it can be > deleted at any time. However, tp->change() returns a reference to newly > created filter to cls_api by assigning "arg" parameter to it (second > reference). After tp->change() returns, cls API continues to use fnew > and releases it with tfilter_put() when finished. I see, thanks for the explanation!
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 91596a6271f8..b216ed26f344 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/rhashtable.h> #include <linux/workqueue.h> +#include <linux/refcount.h> #include <linux/if_ether.h> #include <linux/in6.h> @@ -104,6 +105,11 @@ struct cls_fl_filter { u32 in_hw_count; struct rcu_work rwork; struct net_device *hw_dev; + /* Flower classifier is unlocked, which means that its reference counter + * can be changed concurrently without any kind of external + * synchronization. Use atomic reference counter to be concurrency-safe. + */ + refcount_t refcnt; }; static const struct rhashtable_params mask_ht_params = { @@ -443,6 +449,47 @@ static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp) return rcu_dereference_protected(tp->root, 1); } +static void __fl_put(struct cls_fl_filter *f) +{ + if (!refcount_dec_and_test(&f->refcnt)) + return; + + if (tcf_exts_get_net(&f->exts)) + tcf_queue_work(&f->rwork, fl_destroy_filter_work); + else + __fl_destroy_filter(f); +} + +static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle) +{ + struct cls_fl_filter *f; + + rcu_read_lock(); + f = idr_find(&head->handle_idr, handle); + if (f && !refcount_inc_not_zero(&f->refcnt)) + f = NULL; + rcu_read_unlock(); + + return f; +} + +static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp, + unsigned long *handle) +{ + struct cls_fl_head *head = fl_head_dereference(tp); + struct cls_fl_filter *f; + + rcu_read_lock(); + /* don't return filters that are being deleted */ + while ((f = idr_get_next_ul(&head->handle_idr, + handle)) != NULL && + !refcount_inc_not_zero(&f->refcnt)) + ++(*handle); + rcu_read_unlock(); + + return f; +} + static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, struct netlink_ext_ack *extack) { @@ -456,10 +503,7 @@ static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f, extack); tcf_unbind_filter(tp, &f->res); - if (async) - tcf_queue_work(&f->rwork, fl_destroy_filter_work); - else - __fl_destroy_filter(f); + __fl_put(f); return last; } @@ -494,11 +538,18 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, tcf_queue_work(&head->rwork, fl_destroy_sleepable); } +static void fl_put(struct tcf_proto *tp, void *arg) +{ + struct cls_fl_filter *f = arg; + + __fl_put(f); +} + static void *fl_get(struct tcf_proto *tp, u32 handle) { struct cls_fl_head *head = fl_head_dereference(tp); - return idr_find(&head->handle_idr, handle); + return __fl_get(head, handle); } static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { @@ -1321,12 +1372,16 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, struct nlattr **tb; int err; - if (!tca[TCA_OPTIONS]) - return -EINVAL; + if (!tca[TCA_OPTIONS]) { + err = -EINVAL; + goto errout_fold; + } mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL); - if (!mask) - return -ENOBUFS; + if (!mask) { + err = -ENOBUFS; + goto errout_fold; + } tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); if (!tb) { @@ -1349,6 +1404,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, err = -ENOBUFS; goto errout_tb; } + refcount_set(&fnew->refcnt, 1); err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); if (err < 0) @@ -1381,6 +1437,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(fnew->flags)) fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + refcount_inc(&fnew->refcnt); if (fold) { fnew->handle = handle; @@ -1399,7 +1456,11 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, fl_hw_destroy_filter(tp, fold, NULL); tcf_unbind_filter(tp, &fold->res); tcf_exts_get_net(&fold->exts); - tcf_queue_work(&fold->rwork, fl_destroy_filter_work); + /* Caller holds reference to fold, so refcnt is always > 0 + * after this. + */ + refcount_dec(&fold->refcnt); + __fl_put(fold); } else { if (__fl_lookup(fnew->mask, &fnew->mkey)) { err = -EEXIST; @@ -1448,6 +1509,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, kfree(tb); errout_mask_alloc: kfree(mask); +errout_fold: + if (fold) + __fl_put(fold); return err; } @@ -1461,24 +1525,26 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, f->mask->filter_ht_params); __fl_delete(tp, f, extack); *last = list_empty(&head->masks); + __fl_put(f); + return 0; } static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { - struct cls_fl_head *head = fl_head_dereference(tp); struct cls_fl_filter *f; arg->count = arg->skip; - while ((f = idr_get_next_ul(&head->handle_idr, - &arg->cookie)) != NULL) { + while ((f = fl_get_next_filter(tp, &arg->cookie)) != NULL) { if (arg->fn(tp, f, arg) < 0) { + __fl_put(f); arg->stop = 1; break; } - arg->cookie = f->handle + 1; + __fl_put(f); + arg->cookie++; arg->count++; } } @@ -2148,6 +2214,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .init = fl_init, .destroy = fl_destroy, .get = fl_get, + .put = fl_put, .change = fl_change, .delete = fl_delete, .walk = fl_walk,