diff mbox series

[bpf-next,v2,1/8] bpf: offload: don't require rtnl for dev list manipulation

Message ID 20171221210120.30166-2-jakub.kicinski@netronome.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series bpf: offload: report device back to user space (take 2) | expand

Commit Message

Jakub Kicinski Dec. 21, 2017, 9:01 p.m. UTC
We don't need the RTNL lock for all operations on offload state.
We only need to hold it around ndo calls.  The device offload
initialization doesn't require it.  The soon-to-come querying
of the offload info will only need it partially.  We will also
be able to remove the waitqueue in following patches.

Use struct rw_semaphore because map offload will require sleeping
with the semaphore held for read.

Suggested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
v2:
 - use dev_get_by_index_rcu() instead of implicit lock dependencies;
 - use DECLARE_RWSEM() instead of init_rwsem() (Kirill).
---
 kernel/bpf/offload.c | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

Comments

Kirill Tkhai Dec. 26, 2017, 4:02 p.m. UTC | #1
Hi, Jakub,

On 22.12.2017 00:01, Jakub Kicinski wrote:
> We don't need the RTNL lock for all operations on offload state.
> We only need to hold it around ndo calls.  The device offload
> initialization doesn't require it.  The soon-to-come querying
> of the offload info will only need it partially.  We will also
> be able to remove the waitqueue in following patches.
> 
> Use struct rw_semaphore because map offload will require sleeping
> with the semaphore held for read.
> 
> Suggested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
> ---
> v2:
>  - use dev_get_by_index_rcu() instead of implicit lock dependencies;
>  - use DECLARE_RWSEM() instead of init_rwsem() (Kirill).
> ---
>  kernel/bpf/offload.c | 33 ++++++++++++++++++++++++++-------
>  1 file changed, 26 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> index 8455b89d1bbf..f049073a37e6 100644
> --- a/kernel/bpf/offload.c
> +++ b/kernel/bpf/offload.c
> @@ -20,8 +20,12 @@
>  #include <linux/netdevice.h>
>  #include <linux/printk.h>
>  #include <linux/rtnetlink.h>
> +#include <linux/rwsem.h>
>  
> -/* protected by RTNL */
> +/* Protects bpf_prog_offload_devs and offload members of all progs.
> + * RTNL lock cannot be taken when holding this lock.
> + */
> +static DECLARE_RWSEM(bpf_devs_lock);
>  static LIST_HEAD(bpf_prog_offload_devs);
>  
>  int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
> @@ -43,19 +47,30 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
>  	offload->prog = prog;
>  	init_waitqueue_head(&offload->verifier_done);
>  
> -	rtnl_lock();
> -	offload->netdev = __dev_get_by_index(net, attr->prog_ifindex);
> +	rcu_read_lock();
> +	offload->netdev = dev_get_by_index_rcu(net, attr->prog_ifindex);
>  	if (!offload->netdev) {
> -		rtnl_unlock();
> -		kfree(offload);
> -		return -EINVAL;
> +		rcu_read_unlock();
> +		goto err_free;
>  	}
> +	dev_hold(offload->netdev);
> +	rcu_read_unlock();

Small remark about this. There is already dev_get_by_index() in net/core/dev.c
with the functionality above. I haven't found in next patches the reason
we have to use directly rcu_read_lock() here.

So, it seems we may replace the above block with simple dev_get_by_index().
Everything else looks good for me.

>  
> +	down_write(&bpf_devs_lock);
> +	if (offload->netdev->reg_state != NETREG_REGISTERED)
> +		goto err_unlock;
>  	prog->aux->offload = offload;
>  	list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
> -	rtnl_unlock();
> +	dev_put(offload->netdev);
> +	up_write(&bpf_devs_lock);
>  
>  	return 0;
> +err_unlock:
> +	up_write(&bpf_devs_lock);
> +	dev_put(offload->netdev);
> +err_free:
> +	kfree(offload);
> +	return -EINVAL;
>  }
>  
>  static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
> @@ -126,7 +141,9 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog)
>  	wake_up(&offload->verifier_done);
>  
>  	rtnl_lock();
> +	down_write(&bpf_devs_lock);
>  	__bpf_prog_offload_destroy(prog);
> +	up_write(&bpf_devs_lock);
>  	rtnl_unlock();
>  
>  	kfree(offload);
> @@ -181,11 +198,13 @@ static int bpf_offload_notification(struct notifier_block *notifier,
>  		if (netdev->reg_state != NETREG_UNREGISTERING)
>  			break;
>  
> +		down_write(&bpf_devs_lock);
>  		list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs,
>  					 offloads) {
>  			if (offload->netdev == netdev)
>  				__bpf_prog_offload_destroy(offload->prog);
>  		}
> +		up_write(&bpf_devs_lock);
>  		break;
>  	default:
>  		break;

Kirill
diff mbox series

Patch

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 8455b89d1bbf..f049073a37e6 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -20,8 +20,12 @@ 
 #include <linux/netdevice.h>
 #include <linux/printk.h>
 #include <linux/rtnetlink.h>
+#include <linux/rwsem.h>
 
-/* protected by RTNL */
+/* Protects bpf_prog_offload_devs and offload members of all progs.
+ * RTNL lock cannot be taken when holding this lock.
+ */
+static DECLARE_RWSEM(bpf_devs_lock);
 static LIST_HEAD(bpf_prog_offload_devs);
 
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
@@ -43,19 +47,30 @@  int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 	offload->prog = prog;
 	init_waitqueue_head(&offload->verifier_done);
 
-	rtnl_lock();
-	offload->netdev = __dev_get_by_index(net, attr->prog_ifindex);
+	rcu_read_lock();
+	offload->netdev = dev_get_by_index_rcu(net, attr->prog_ifindex);
 	if (!offload->netdev) {
-		rtnl_unlock();
-		kfree(offload);
-		return -EINVAL;
+		rcu_read_unlock();
+		goto err_free;
 	}
+	dev_hold(offload->netdev);
+	rcu_read_unlock();
 
+	down_write(&bpf_devs_lock);
+	if (offload->netdev->reg_state != NETREG_REGISTERED)
+		goto err_unlock;
 	prog->aux->offload = offload;
 	list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
-	rtnl_unlock();
+	dev_put(offload->netdev);
+	up_write(&bpf_devs_lock);
 
 	return 0;
+err_unlock:
+	up_write(&bpf_devs_lock);
+	dev_put(offload->netdev);
+err_free:
+	kfree(offload);
+	return -EINVAL;
 }
 
 static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
@@ -126,7 +141,9 @@  void bpf_prog_offload_destroy(struct bpf_prog *prog)
 	wake_up(&offload->verifier_done);
 
 	rtnl_lock();
+	down_write(&bpf_devs_lock);
 	__bpf_prog_offload_destroy(prog);
+	up_write(&bpf_devs_lock);
 	rtnl_unlock();
 
 	kfree(offload);
@@ -181,11 +198,13 @@  static int bpf_offload_notification(struct notifier_block *notifier,
 		if (netdev->reg_state != NETREG_UNREGISTERING)
 			break;
 
+		down_write(&bpf_devs_lock);
 		list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs,
 					 offloads) {
 			if (offload->netdev == netdev)
 				__bpf_prog_offload_destroy(offload->prog);
 		}
+		up_write(&bpf_devs_lock);
 		break;
 	default:
 		break;