diff mbox series

[bpf-next,v3,3/4] bpf: cgroup: properly use bpf_prog_array api

Message ID 20190528182946.3633-3-sdf@google.com
State Superseded
Headers show
Series [bpf-next,v3,1/4] bpf: remove __rcu annotations from bpf_prog_array | expand

Commit Message

Stanislav Fomichev May 28, 2019, 6:29 p.m. UTC
Now that we don't have __rcu markers on the bpf_prog_array helpers,
let's use proper rcu_dereference_protected to obtain array pointer
under mutex.

We also don't need __rcu annotations on cgroup_bpf.inactive since
it's not read/updated concurrently.

v3:
* amend cgroup_rcu_dereference to include percpu_ref_is_dying;
  cgroup_bpf is now reference counted and we don't hold cgroup_mutex
  anymore in cgroup_bpf_release

v2:
* replace xchg with rcu_swap_protected

Cc: Roman Gushchin <guro@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/bpf-cgroup.h |  2 +-
 kernel/bpf/cgroup.c        | 32 +++++++++++++++++++++-----------
 2 files changed, 22 insertions(+), 12 deletions(-)

Comments

Roman Gushchin May 28, 2019, 6:57 p.m. UTC | #1
On Tue, May 28, 2019 at 11:29:45AM -0700, Stanislav Fomichev wrote:
> Now that we don't have __rcu markers on the bpf_prog_array helpers,
> let's use proper rcu_dereference_protected to obtain array pointer
> under mutex.
> 
> We also don't need __rcu annotations on cgroup_bpf.inactive since
> it's not read/updated concurrently.
> 
> v3:
> * amend cgroup_rcu_dereference to include percpu_ref_is_dying;
>   cgroup_bpf is now reference counted and we don't hold cgroup_mutex
>   anymore in cgroup_bpf_release
> 
> v2:
> * replace xchg with rcu_swap_protected
> 
> Cc: Roman Gushchin <guro@fb.com>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  include/linux/bpf-cgroup.h |  2 +-
>  kernel/bpf/cgroup.c        | 32 +++++++++++++++++++++-----------
>  2 files changed, 22 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index 9f100fc422c3..b631ee75762d 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -72,7 +72,7 @@ struct cgroup_bpf {
>  	u32 flags[MAX_BPF_ATTACH_TYPE];
>  
>  	/* temp storage for effective prog array used by prog_attach/detach */
> -	struct bpf_prog_array __rcu *inactive;
> +	struct bpf_prog_array *inactive;
>  
>  	/* reference counter used to detach bpf programs after cgroup removal */
>  	struct percpu_ref refcnt;
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index d995edbe816d..118b70175dd9 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -22,6 +22,13 @@
>  DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
>  EXPORT_SYMBOL(cgroup_bpf_enabled_key);
>  
> +#define cgroup_rcu_dereference(cgrp, p)					\
> +	rcu_dereference_protected(p, lockdep_is_held(&cgroup_mutex) ||	\
> +				  percpu_ref_is_dying(&cgrp->bpf.refcnt))
> +
> +#define cgroup_rcu_swap(rcu_ptr, ptr)					\
> +	rcu_swap_protected(rcu_ptr, ptr, lockdep_is_held(&cgroup_mutex))
> +
>  void cgroup_bpf_offline(struct cgroup *cgrp)
>  {
>  	cgroup_get(cgrp);
> @@ -38,6 +45,7 @@ static void cgroup_bpf_release(struct work_struct *work)
>  	struct cgroup *cgrp = container_of(work, struct cgroup,
>  					   bpf.release_work);
>  	enum bpf_cgroup_storage_type stype;
> +	struct bpf_prog_array *old_array;
>  	unsigned int type;
>  
>  	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
> @@ -54,7 +62,9 @@ static void cgroup_bpf_release(struct work_struct *work)
>  			kfree(pl);
>  			static_branch_dec(&cgroup_bpf_enabled_key);
>  		}
> -		bpf_prog_array_free(cgrp->bpf.effective[type]);
> +		old_array = cgroup_rcu_dereference(cgrp,
> +						   cgrp->bpf.effective[type]);
> +		bpf_prog_array_free(old_array);
>  	}
>  
>  	percpu_ref_exit(&cgrp->bpf.refcnt);
> @@ -126,7 +136,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
>   */
>  static int compute_effective_progs(struct cgroup *cgrp,
>  				   enum bpf_attach_type type,
> -				   struct bpf_prog_array __rcu **array)
> +				   struct bpf_prog_array **array)
>  {
>  	enum bpf_cgroup_storage_type stype;
>  	struct bpf_prog_array *progs;
> @@ -164,17 +174,15 @@ static int compute_effective_progs(struct cgroup *cgrp,
>  		}
>  	} while ((p = cgroup_parent(p)));
>  
> -	rcu_assign_pointer(*array, progs);
> +	*array = progs;
>  	return 0;
>  }
>  
>  static void activate_effective_progs(struct cgroup *cgrp,
>  				     enum bpf_attach_type type,
> -				     struct bpf_prog_array __rcu *array)
> +				     struct bpf_prog_array *old_array)
>  {
> -	struct bpf_prog_array __rcu *old_array;
> -
> -	old_array = xchg(&cgrp->bpf.effective[type], array);
> +	cgroup_rcu_swap(cgrp->bpf.effective[type], old_array);
>  	/* free prog array after grace period, since __cgroup_bpf_run_*()
>  	 * might be still walking the array
>  	 */
> @@ -191,7 +199,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
>   * that array below is variable length
>   */
>  #define	NR ARRAY_SIZE(cgrp->bpf.effective)
> -	struct bpf_prog_array __rcu *arrays[NR] = {};
> +	struct bpf_prog_array *arrays[NR] = {};
>  	int ret, i;
>  
>  	ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
> @@ -477,10 +485,13 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
>  	enum bpf_attach_type type = attr->query.attach_type;
>  	struct list_head *progs = &cgrp->bpf.progs[type];
>  	u32 flags = cgrp->bpf.flags[type];
> +	struct bpf_prog_array *effective;
>  	int cnt, ret = 0, i;
>  
> +	effective = cgroup_rcu_dereference(cgrp, cgrp->bpf.effective[type]);
> +
>  	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
> -		cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
> +		cnt = bpf_prog_array_length(effective);
>  	else
>  		cnt = prog_list_length(progs);
>  
> @@ -497,8 +508,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
>  	}
>  
>  	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
> -		return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
> -						   prog_ids, cnt);
> +		return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
>  	} else {
>  		struct bpf_prog_list *pl;
>  		u32 id;
> -- 
> 2.22.0.rc1.257.g3120a18244-goog
> 

Acked-by: Roman Gushchin <guro@fb.com>

Thanks!
Roman Gushchin May 28, 2019, 7:43 p.m. UTC | #2
On Tue, May 28, 2019 at 11:29:45AM -0700, Stanislav Fomichev wrote:
> Now that we don't have __rcu markers on the bpf_prog_array helpers,
> let's use proper rcu_dereference_protected to obtain array pointer
> under mutex.
> 
> We also don't need __rcu annotations on cgroup_bpf.inactive since
> it's not read/updated concurrently.
> 
> v3:
> * amend cgroup_rcu_dereference to include percpu_ref_is_dying;
>   cgroup_bpf is now reference counted and we don't hold cgroup_mutex
>   anymore in cgroup_bpf_release
> 
> v2:
> * replace xchg with rcu_swap_protected
> 
> Cc: Roman Gushchin <guro@fb.com>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  include/linux/bpf-cgroup.h |  2 +-
>  kernel/bpf/cgroup.c        | 32 +++++++++++++++++++++-----------
>  2 files changed, 22 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index 9f100fc422c3..b631ee75762d 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -72,7 +72,7 @@ struct cgroup_bpf {
>  	u32 flags[MAX_BPF_ATTACH_TYPE];
>  
>  	/* temp storage for effective prog array used by prog_attach/detach */
> -	struct bpf_prog_array __rcu *inactive;
> +	struct bpf_prog_array *inactive;
>  
>  	/* reference counter used to detach bpf programs after cgroup removal */
>  	struct percpu_ref refcnt;
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index d995edbe816d..118b70175dd9 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -22,6 +22,13 @@
>  DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
>  EXPORT_SYMBOL(cgroup_bpf_enabled_key);
>  
> +#define cgroup_rcu_dereference(cgrp, p)					\
> +	rcu_dereference_protected(p, lockdep_is_held(&cgroup_mutex) ||	\
> +				  percpu_ref_is_dying(&cgrp->bpf.refcnt))

Some comments why percpu_ref_is_dying(&cgrp->bpf.refcnt) is enough here will
be appreciated.

Thanks!
Stanislav Fomichev May 28, 2019, 8:16 p.m. UTC | #3
On 05/28, Roman Gushchin wrote:
> On Tue, May 28, 2019 at 11:29:45AM -0700, Stanislav Fomichev wrote:
> > Now that we don't have __rcu markers on the bpf_prog_array helpers,
> > let's use proper rcu_dereference_protected to obtain array pointer
> > under mutex.
> > 
> > We also don't need __rcu annotations on cgroup_bpf.inactive since
> > it's not read/updated concurrently.
> > 
> > v3:
> > * amend cgroup_rcu_dereference to include percpu_ref_is_dying;
> >   cgroup_bpf is now reference counted and we don't hold cgroup_mutex
> >   anymore in cgroup_bpf_release
> > 
> > v2:
> > * replace xchg with rcu_swap_protected
> > 
> > Cc: Roman Gushchin <guro@fb.com>
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  include/linux/bpf-cgroup.h |  2 +-
> >  kernel/bpf/cgroup.c        | 32 +++++++++++++++++++++-----------
> >  2 files changed, 22 insertions(+), 12 deletions(-)
> > 
> > diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> > index 9f100fc422c3..b631ee75762d 100644
> > --- a/include/linux/bpf-cgroup.h
> > +++ b/include/linux/bpf-cgroup.h
> > @@ -72,7 +72,7 @@ struct cgroup_bpf {
> >  	u32 flags[MAX_BPF_ATTACH_TYPE];
> >  
> >  	/* temp storage for effective prog array used by prog_attach/detach */
> > -	struct bpf_prog_array __rcu *inactive;
> > +	struct bpf_prog_array *inactive;
> >  
> >  	/* reference counter used to detach bpf programs after cgroup removal */
> >  	struct percpu_ref refcnt;
> > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> > index d995edbe816d..118b70175dd9 100644
> > --- a/kernel/bpf/cgroup.c
> > +++ b/kernel/bpf/cgroup.c
> > @@ -22,6 +22,13 @@
> >  DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
> >  EXPORT_SYMBOL(cgroup_bpf_enabled_key);
> >  
> > +#define cgroup_rcu_dereference(cgrp, p)					\
> > +	rcu_dereference_protected(p, lockdep_is_held(&cgroup_mutex) ||	\
> > +				  percpu_ref_is_dying(&cgrp->bpf.refcnt))
> 
> Some comments why percpu_ref_is_dying(&cgrp->bpf.refcnt) is enough here will
> be appreciated.
I was actually debating whether to just use raw
rcu_dereference_protected(p, lockdep_is_held()) in __cgroup_bpf_query and
rcu_dereference_protected(p, percpu_ref_is_dying()) in cgroup_bpf_release
instead of having a cgroup_rcu_dereference which covers both cases.

Maybe that should make it more clear (and doesn't require any comment)?
Roman Gushchin May 28, 2019, 8:53 p.m. UTC | #4
On Tue, May 28, 2019 at 01:16:46PM -0700, Stanislav Fomichev wrote:
> On 05/28, Roman Gushchin wrote:
> > On Tue, May 28, 2019 at 11:29:45AM -0700, Stanislav Fomichev wrote:
> > > Now that we don't have __rcu markers on the bpf_prog_array helpers,
> > > let's use proper rcu_dereference_protected to obtain array pointer
> > > under mutex.
> > > 
> > > We also don't need __rcu annotations on cgroup_bpf.inactive since
> > > it's not read/updated concurrently.
> > > 
> > > v3:
> > > * amend cgroup_rcu_dereference to include percpu_ref_is_dying;
> > >   cgroup_bpf is now reference counted and we don't hold cgroup_mutex
> > >   anymore in cgroup_bpf_release
> > > 
> > > v2:
> > > * replace xchg with rcu_swap_protected
> > > 
> > > Cc: Roman Gushchin <guro@fb.com>
> > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > ---
> > >  include/linux/bpf-cgroup.h |  2 +-
> > >  kernel/bpf/cgroup.c        | 32 +++++++++++++++++++++-----------
> > >  2 files changed, 22 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> > > index 9f100fc422c3..b631ee75762d 100644
> > > --- a/include/linux/bpf-cgroup.h
> > > +++ b/include/linux/bpf-cgroup.h
> > > @@ -72,7 +72,7 @@ struct cgroup_bpf {
> > >  	u32 flags[MAX_BPF_ATTACH_TYPE];
> > >  
> > >  	/* temp storage for effective prog array used by prog_attach/detach */
> > > -	struct bpf_prog_array __rcu *inactive;
> > > +	struct bpf_prog_array *inactive;
> > >  
> > >  	/* reference counter used to detach bpf programs after cgroup removal */
> > >  	struct percpu_ref refcnt;
> > > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> > > index d995edbe816d..118b70175dd9 100644
> > > --- a/kernel/bpf/cgroup.c
> > > +++ b/kernel/bpf/cgroup.c
> > > @@ -22,6 +22,13 @@
> > >  DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
> > >  EXPORT_SYMBOL(cgroup_bpf_enabled_key);
> > >  
> > > +#define cgroup_rcu_dereference(cgrp, p)					\
> > > +	rcu_dereference_protected(p, lockdep_is_held(&cgroup_mutex) ||	\
> > > +				  percpu_ref_is_dying(&cgrp->bpf.refcnt))
> > 
> > Some comments why percpu_ref_is_dying(&cgrp->bpf.refcnt) is enough here will
> > be appreciated.
> I was actually debating whether to just use raw
> rcu_dereference_protected(p, lockdep_is_held()) in __cgroup_bpf_query and
> rcu_dereference_protected(p, percpu_ref_is_dying()) in cgroup_bpf_release
> instead of having a cgroup_rcu_dereference which covers both cases.
> 
> Maybe that should make it more clear (and doesn't require any comment)?

Yeah, this makes total sense to me.
diff mbox series

Patch

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 9f100fc422c3..b631ee75762d 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -72,7 +72,7 @@  struct cgroup_bpf {
 	u32 flags[MAX_BPF_ATTACH_TYPE];
 
 	/* temp storage for effective prog array used by prog_attach/detach */
-	struct bpf_prog_array __rcu *inactive;
+	struct bpf_prog_array *inactive;
 
 	/* reference counter used to detach bpf programs after cgroup removal */
 	struct percpu_ref refcnt;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index d995edbe816d..118b70175dd9 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -22,6 +22,13 @@ 
 DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
 EXPORT_SYMBOL(cgroup_bpf_enabled_key);
 
+#define cgroup_rcu_dereference(cgrp, p)					\
+	rcu_dereference_protected(p, lockdep_is_held(&cgroup_mutex) ||	\
+				  percpu_ref_is_dying(&cgrp->bpf.refcnt))
+
+#define cgroup_rcu_swap(rcu_ptr, ptr)					\
+	rcu_swap_protected(rcu_ptr, ptr, lockdep_is_held(&cgroup_mutex))
+
 void cgroup_bpf_offline(struct cgroup *cgrp)
 {
 	cgroup_get(cgrp);
@@ -38,6 +45,7 @@  static void cgroup_bpf_release(struct work_struct *work)
 	struct cgroup *cgrp = container_of(work, struct cgroup,
 					   bpf.release_work);
 	enum bpf_cgroup_storage_type stype;
+	struct bpf_prog_array *old_array;
 	unsigned int type;
 
 	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
@@ -54,7 +62,9 @@  static void cgroup_bpf_release(struct work_struct *work)
 			kfree(pl);
 			static_branch_dec(&cgroup_bpf_enabled_key);
 		}
-		bpf_prog_array_free(cgrp->bpf.effective[type]);
+		old_array = cgroup_rcu_dereference(cgrp,
+						   cgrp->bpf.effective[type]);
+		bpf_prog_array_free(old_array);
 	}
 
 	percpu_ref_exit(&cgrp->bpf.refcnt);
@@ -126,7 +136,7 @@  static bool hierarchy_allows_attach(struct cgroup *cgrp,
  */
 static int compute_effective_progs(struct cgroup *cgrp,
 				   enum bpf_attach_type type,
-				   struct bpf_prog_array __rcu **array)
+				   struct bpf_prog_array **array)
 {
 	enum bpf_cgroup_storage_type stype;
 	struct bpf_prog_array *progs;
@@ -164,17 +174,15 @@  static int compute_effective_progs(struct cgroup *cgrp,
 		}
 	} while ((p = cgroup_parent(p)));
 
-	rcu_assign_pointer(*array, progs);
+	*array = progs;
 	return 0;
 }
 
 static void activate_effective_progs(struct cgroup *cgrp,
 				     enum bpf_attach_type type,
-				     struct bpf_prog_array __rcu *array)
+				     struct bpf_prog_array *old_array)
 {
-	struct bpf_prog_array __rcu *old_array;
-
-	old_array = xchg(&cgrp->bpf.effective[type], array);
+	cgroup_rcu_swap(cgrp->bpf.effective[type], old_array);
 	/* free prog array after grace period, since __cgroup_bpf_run_*()
 	 * might be still walking the array
 	 */
@@ -191,7 +199,7 @@  int cgroup_bpf_inherit(struct cgroup *cgrp)
  * that array below is variable length
  */
 #define	NR ARRAY_SIZE(cgrp->bpf.effective)
-	struct bpf_prog_array __rcu *arrays[NR] = {};
+	struct bpf_prog_array *arrays[NR] = {};
 	int ret, i;
 
 	ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
@@ -477,10 +485,13 @@  int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 	enum bpf_attach_type type = attr->query.attach_type;
 	struct list_head *progs = &cgrp->bpf.progs[type];
 	u32 flags = cgrp->bpf.flags[type];
+	struct bpf_prog_array *effective;
 	int cnt, ret = 0, i;
 
+	effective = cgroup_rcu_dereference(cgrp, cgrp->bpf.effective[type]);
+
 	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
-		cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
+		cnt = bpf_prog_array_length(effective);
 	else
 		cnt = prog_list_length(progs);
 
@@ -497,8 +508,7 @@  int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 	}
 
 	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
-		return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
-						   prog_ids, cnt);
+		return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
 	} else {
 		struct bpf_prog_list *pl;
 		u32 id;