diff mbox series

[net-next,14/15] net: devlink: allow to change namespaces during reload

Message ID 20190914064608.26799-15-jiri@resnulli.us
State Changes Requested
Delegated to: David Miller
Headers show
Series devlink: allow devlink instances to change network namespace | expand

Commit Message

Jiri Pirko Sept. 14, 2019, 6:46 a.m. UTC
From: Jiri Pirko <jiri@mellanox.com>

All devlink instances are created in init_net and stay there for a
lifetime. Allow user to be able to move devlink instances into
namespaces during devlink reload operation. That ensures proper
re-instantiation of driver objects, including netdevices.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/main.c |   4 +
 include/uapi/linux/devlink.h              |   4 +
 net/core/devlink.c                        | 155 ++++++++++++++++++++--
 3 files changed, 155 insertions(+), 8 deletions(-)

Comments

Ido Schimmel Sept. 15, 2019, 8:58 a.m. UTC | #1
On Sat, Sep 14, 2019 at 08:46:07AM +0200, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@mellanox.com>
> 
> All devlink instances are created in init_net and stay there for a
> lifetime. Allow user to be able to move devlink instances into
> namespaces during devlink reload operation. That ensures proper
> re-instantiation of driver objects, including netdevices.
> 
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
>  drivers/net/ethernet/mellanox/mlx4/main.c |   4 +
>  include/uapi/linux/devlink.h              |   4 +
>  net/core/devlink.c                        | 155 ++++++++++++++++++++--
>  3 files changed, 155 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
> index ef3f3d06ff1e..989d0882aaa9 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/main.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c
> @@ -3942,6 +3942,10 @@ static int mlx4_devlink_reload_down(struct devlink *devlink,
>  	struct mlx4_dev *dev = &priv->dev;
>  	struct mlx4_dev_persistent *persist = dev->persist;
>  
> +	if (!net_eq(devlink_net(devlink), &init_net)) {
> +		NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported");
> +		return -EOPNOTSUPP;
> +	}

Are you sure that this actually works? I see that you first invoke
reload_down(), then set the new namespace, then invoke reload_up().

So shouldn't this check be done in reload_up() callback instead?

>  	if (persist->num_vfs)
>  		mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
>  	mlx4_restart_one_down(persist->pdev);
> diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
> index 580b7a2e40e1..b558ea88b766 100644
> --- a/include/uapi/linux/devlink.h
> +++ b/include/uapi/linux/devlink.h
> @@ -421,6 +421,10 @@ enum devlink_attr {
>  
>  	DEVLINK_ATTR_RELOAD_FAILED,			/* u8 0 or 1 */
>  
> +	DEVLINK_ATTR_NETNS_FD,			/* u32 */
> +	DEVLINK_ATTR_NETNS_PID,			/* u32 */
> +	DEVLINK_ATTR_NETNS_ID,			/* u32 */
> +
>  	/* add new attributes above here, update the policy in devlink.c */
>  
>  	__DEVLINK_ATTR_MAX,
> diff --git a/net/core/devlink.c b/net/core/devlink.c
> index 362cbbcca225..2a5db95cce3c 100644
> --- a/net/core/devlink.c
> +++ b/net/core/devlink.c
> @@ -435,8 +435,16 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
>  {
>  	struct devlink *devlink;
>  
> -	devlink = devlink_get_from_info(info);
> -	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
> +	/* When devlink changes netns, it would not be found
> +	 * by devlink_get_from_info(). So try if it is stored first.
> +	 */
> +	if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
> +		devlink = info->user_ptr[0];
> +	} else {
> +		devlink = devlink_get_from_info(info);
> +		WARN_ON(IS_ERR(devlink));
> +	}
> +	if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
>  		mutex_unlock(&devlink->lock);
>  	mutex_unlock(&devlink_mutex);
>  }
> @@ -2675,6 +2683,73 @@ devlink_resources_validate(struct devlink *devlink,
>  	return err;
>  }
>  
> +static struct net *devlink_netns_get(struct sk_buff *skb,
> +				     struct devlink *devlink,
> +				     struct genl_info *info)
> +{
> +	struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
> +	struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
> +	struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
> +	struct net *net;
> +
> +	if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
> +		NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified");
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	if (netns_pid_attr) {
> +		net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
> +	} else if (netns_fd_attr) {
> +		net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
> +	} else if (netns_id_attr) {
> +		net = get_net_ns_by_id(sock_net(skb->sk),
> +				       nla_get_u32(netns_id_attr));
> +		if (!net)
> +			net = ERR_PTR(-EINVAL);
> +	} else {
> +		WARN_ON(1);
> +		net = ERR_PTR(-EINVAL);
> +	}
> +	if (IS_ERR(net)) {
> +		NL_SET_ERR_MSG(info->extack, "Unknown network namespace");
> +		return ERR_PTR(-EINVAL);
> +	}
> +	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
> +		put_net(net);
> +		return ERR_PTR(-EPERM);
> +	}
> +	return net;
> +}
> +
> +static void devlink_param_notify(struct devlink *devlink,
> +				 unsigned int port_index,
> +				 struct devlink_param_item *param_item,
> +				 enum devlink_command cmd);
> +
> +static void devlink_reload_netns_change(struct devlink *devlink,
> +					struct net *dest_net)
> +{
> +	struct devlink_param_item *param_item;
> +
> +	/* Userspace needs to be notified about devlink objects
> +	 * removed from original and entering new network namespace.
> +	 * The rest of the devlink objects are re-created during
> +	 * reload process so the notifications are generated separatelly.
> +	 */
> +
> +	list_for_each_entry(param_item, &devlink->param_list, list)
> +		devlink_param_notify(devlink, 0, param_item,
> +				     DEVLINK_CMD_PARAM_DEL);
> +	devlink_notify(devlink, DEVLINK_CMD_DEL);
> +
> +	devlink_net_set(devlink, dest_net);
> +
> +	devlink_notify(devlink, DEVLINK_CMD_NEW);
> +	list_for_each_entry(param_item, &devlink->param_list, list)
> +		devlink_param_notify(devlink, 0, param_item,
> +				     DEVLINK_CMD_PARAM_NEW);
> +}
> +
>  static bool devlink_reload_supported(struct devlink *devlink)
>  {
>  	return devlink->ops->reload_down && devlink->ops->reload_up;
> @@ -2695,9 +2770,27 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
>  }
>  EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
>  
> +static int devlink_reload(struct devlink *devlink, struct net *dest_net,
> +			  struct netlink_ext_ack *extack)
> +{
> +	int err;
> +
> +	err = devlink->ops->reload_down(devlink, extack);
> +	if (err)
> +		return err;
> +
> +	if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
> +		devlink_reload_netns_change(devlink, dest_net);
> +
> +	err = devlink->ops->reload_up(devlink, extack);
> +	devlink_reload_failed_set(devlink, !!err);
> +	return err;
> +}
> +
>  static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
>  {
>  	struct devlink *devlink = info->user_ptr[0];
> +	struct net *dest_net = NULL;
>  	int err;
>  
>  	if (!devlink_reload_supported(devlink))
> @@ -2708,11 +2801,20 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
>  		NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
>  		return err;
>  	}
> -	err = devlink->ops->reload_down(devlink, info->extack);
> -	if (err)
> -		return err;
> -	err = devlink->ops->reload_up(devlink, info->extack);
> -	devlink_reload_failed_set(devlink, !!err);
> +
> +	if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
> +	    info->attrs[DEVLINK_ATTR_NETNS_FD] ||
> +	    info->attrs[DEVLINK_ATTR_NETNS_ID]) {
> +		dest_net = devlink_netns_get(skb, devlink, info);

Hmm, you're never using 'devlink' there, so I guess you can drop it.

> +		if (IS_ERR(dest_net))
> +			return PTR_ERR(dest_net);
> +	}
> +
> +	err = devlink_reload(devlink, dest_net, info->extack);
> +
> +	if (dest_net)
> +		put_net(dest_net);
> +
>  	return err;
>  }
>  
> @@ -5794,6 +5896,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
>  	[DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
>  	[DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
>  	[DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
> +	[DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
> +	[DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
> +	[DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
>  };
>  
>  static const struct genl_ops devlink_nl_ops[] = {
> @@ -8061,9 +8166,43 @@ int devlink_compat_switch_id_get(struct net_device *dev,
>  	return 0;
>  }
>  
> +static void __net_exit devlink_pernet_pre_exit(struct net *net)
> +{
> +	struct devlink *devlink;
> +	int err;
> +
> +	/* In case network namespace is getting destroyed, reload
> +	 * all devlink instances from this namespace into init_net.
> +	 */
> +	mutex_lock(&devlink_mutex);
> +	list_for_each_entry(devlink, &devlink_list, list) {
> +		if (net_eq(devlink_net(devlink), net)) {
> +			if (WARN_ON(!devlink_reload_supported(devlink)))
> +				continue;
> +			err = devlink_reload(devlink, &init_net, NULL);
> +			if (err)
> +				pr_warn("Failed to reload devlink instance into init_net\n");
> +		}
> +	}
> +	mutex_unlock(&devlink_mutex);
> +}
> +
> +static struct pernet_operations devlink_pernet_ops __net_initdata = {
> +	.pre_exit = devlink_pernet_pre_exit,
> +};
> +
>  static int __init devlink_init(void)
>  {
> -	return genl_register_family(&devlink_nl_family);
> +	int err;
> +
> +	err = genl_register_family(&devlink_nl_family);
> +	if (err)
> +		goto out;
> +	err = register_pernet_subsys(&devlink_pernet_ops);
> +
> +out:
> +	WARN_ON(err);
> +	return err;
>  }
>  
>  subsys_initcall(devlink_init);
> -- 
> 2.21.0
>
Jiri Pirko Sept. 15, 2019, 9:43 a.m. UTC | #2
Sun, Sep 15, 2019 at 10:58:29AM CEST, idosch@idosch.org wrote:
>On Sat, Sep 14, 2019 at 08:46:07AM +0200, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@mellanox.com>
>> 
>> All devlink instances are created in init_net and stay there for a
>> lifetime. Allow user to be able to move devlink instances into
>> namespaces during devlink reload operation. That ensures proper
>> re-instantiation of driver objects, including netdevices.
>> 
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>>  drivers/net/ethernet/mellanox/mlx4/main.c |   4 +
>>  include/uapi/linux/devlink.h              |   4 +
>>  net/core/devlink.c                        | 155 ++++++++++++++++++++--
>>  3 files changed, 155 insertions(+), 8 deletions(-)
>> 
>> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
>> index ef3f3d06ff1e..989d0882aaa9 100644
>> --- a/drivers/net/ethernet/mellanox/mlx4/main.c
>> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c
>> @@ -3942,6 +3942,10 @@ static int mlx4_devlink_reload_down(struct devlink *devlink,
>>  	struct mlx4_dev *dev = &priv->dev;
>>  	struct mlx4_dev_persistent *persist = dev->persist;
>>  
>> +	if (!net_eq(devlink_net(devlink), &init_net)) {
>> +		NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported");
>> +		return -EOPNOTSUPP;
>> +	}
>
>Are you sure that this actually works? I see that you first invoke
>reload_down(), then set the new namespace, then invoke reload_up().
>
>So shouldn't this check be done in reload_up() callback instead?

Correct, need to fix this. But I need to do this in down phase so the
objects are not removed.


>
>>  	if (persist->num_vfs)
>>  		mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
>>  	mlx4_restart_one_down(persist->pdev);
>> diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
>> index 580b7a2e40e1..b558ea88b766 100644
>> --- a/include/uapi/linux/devlink.h
>> +++ b/include/uapi/linux/devlink.h
>> @@ -421,6 +421,10 @@ enum devlink_attr {
>>  
>>  	DEVLINK_ATTR_RELOAD_FAILED,			/* u8 0 or 1 */
>>  
>> +	DEVLINK_ATTR_NETNS_FD,			/* u32 */
>> +	DEVLINK_ATTR_NETNS_PID,			/* u32 */
>> +	DEVLINK_ATTR_NETNS_ID,			/* u32 */
>> +
>>  	/* add new attributes above here, update the policy in devlink.c */
>>  
>>  	__DEVLINK_ATTR_MAX,
>> diff --git a/net/core/devlink.c b/net/core/devlink.c
>> index 362cbbcca225..2a5db95cce3c 100644
>> --- a/net/core/devlink.c
>> +++ b/net/core/devlink.c
>> @@ -435,8 +435,16 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
>>  {
>>  	struct devlink *devlink;
>>  
>> -	devlink = devlink_get_from_info(info);
>> -	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
>> +	/* When devlink changes netns, it would not be found
>> +	 * by devlink_get_from_info(). So try if it is stored first.
>> +	 */
>> +	if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
>> +		devlink = info->user_ptr[0];
>> +	} else {
>> +		devlink = devlink_get_from_info(info);
>> +		WARN_ON(IS_ERR(devlink));
>> +	}
>> +	if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
>>  		mutex_unlock(&devlink->lock);
>>  	mutex_unlock(&devlink_mutex);
>>  }
>> @@ -2675,6 +2683,73 @@ devlink_resources_validate(struct devlink *devlink,
>>  	return err;
>>  }
>>  
>> +static struct net *devlink_netns_get(struct sk_buff *skb,
>> +				     struct devlink *devlink,
>> +				     struct genl_info *info)
>> +{
>> +	struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
>> +	struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
>> +	struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
>> +	struct net *net;
>> +
>> +	if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
>> +		NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified");
>> +		return ERR_PTR(-EINVAL);
>> +	}
>> +
>> +	if (netns_pid_attr) {
>> +		net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
>> +	} else if (netns_fd_attr) {
>> +		net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
>> +	} else if (netns_id_attr) {
>> +		net = get_net_ns_by_id(sock_net(skb->sk),
>> +				       nla_get_u32(netns_id_attr));
>> +		if (!net)
>> +			net = ERR_PTR(-EINVAL);
>> +	} else {
>> +		WARN_ON(1);
>> +		net = ERR_PTR(-EINVAL);
>> +	}
>> +	if (IS_ERR(net)) {
>> +		NL_SET_ERR_MSG(info->extack, "Unknown network namespace");
>> +		return ERR_PTR(-EINVAL);
>> +	}
>> +	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
>> +		put_net(net);
>> +		return ERR_PTR(-EPERM);
>> +	}
>> +	return net;
>> +}
>> +
>> +static void devlink_param_notify(struct devlink *devlink,
>> +				 unsigned int port_index,
>> +				 struct devlink_param_item *param_item,
>> +				 enum devlink_command cmd);
>> +
>> +static void devlink_reload_netns_change(struct devlink *devlink,
>> +					struct net *dest_net)
>> +{
>> +	struct devlink_param_item *param_item;
>> +
>> +	/* Userspace needs to be notified about devlink objects
>> +	 * removed from original and entering new network namespace.
>> +	 * The rest of the devlink objects are re-created during
>> +	 * reload process so the notifications are generated separatelly.
>> +	 */
>> +
>> +	list_for_each_entry(param_item, &devlink->param_list, list)
>> +		devlink_param_notify(devlink, 0, param_item,
>> +				     DEVLINK_CMD_PARAM_DEL);
>> +	devlink_notify(devlink, DEVLINK_CMD_DEL);
>> +
>> +	devlink_net_set(devlink, dest_net);
>> +
>> +	devlink_notify(devlink, DEVLINK_CMD_NEW);
>> +	list_for_each_entry(param_item, &devlink->param_list, list)
>> +		devlink_param_notify(devlink, 0, param_item,
>> +				     DEVLINK_CMD_PARAM_NEW);
>> +}
>> +
>>  static bool devlink_reload_supported(struct devlink *devlink)
>>  {
>>  	return devlink->ops->reload_down && devlink->ops->reload_up;
>> @@ -2695,9 +2770,27 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
>>  }
>>  EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
>>  
>> +static int devlink_reload(struct devlink *devlink, struct net *dest_net,
>> +			  struct netlink_ext_ack *extack)
>> +{
>> +	int err;
>> +
>> +	err = devlink->ops->reload_down(devlink, extack);
>> +	if (err)
>> +		return err;
>> +
>> +	if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
>> +		devlink_reload_netns_change(devlink, dest_net);
>> +
>> +	err = devlink->ops->reload_up(devlink, extack);
>> +	devlink_reload_failed_set(devlink, !!err);
>> +	return err;
>> +}
>> +
>>  static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
>>  {
>>  	struct devlink *devlink = info->user_ptr[0];
>> +	struct net *dest_net = NULL;
>>  	int err;
>>  
>>  	if (!devlink_reload_supported(devlink))
>> @@ -2708,11 +2801,20 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
>>  		NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
>>  		return err;
>>  	}
>> -	err = devlink->ops->reload_down(devlink, info->extack);
>> -	if (err)
>> -		return err;
>> -	err = devlink->ops->reload_up(devlink, info->extack);
>> -	devlink_reload_failed_set(devlink, !!err);
>> +
>> +	if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
>> +	    info->attrs[DEVLINK_ATTR_NETNS_FD] ||
>> +	    info->attrs[DEVLINK_ATTR_NETNS_ID]) {
>> +		dest_net = devlink_netns_get(skb, devlink, info);
>
>Hmm, you're never using 'devlink' there, so I guess you can drop it.

Right, will do.


>
>> +		if (IS_ERR(dest_net))
>> +			return PTR_ERR(dest_net);
>> +	}
>> +
>> +	err = devlink_reload(devlink, dest_net, info->extack);
>> +
>> +	if (dest_net)
>> +		put_net(dest_net);
>> +
>>  	return err;
>>  }
>>  
>> @@ -5794,6 +5896,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
>>  	[DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
>>  	[DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
>>  	[DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
>> +	[DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
>> +	[DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
>> +	[DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
>>  };
>>  
>>  static const struct genl_ops devlink_nl_ops[] = {
>> @@ -8061,9 +8166,43 @@ int devlink_compat_switch_id_get(struct net_device *dev,
>>  	return 0;
>>  }
>>  
>> +static void __net_exit devlink_pernet_pre_exit(struct net *net)
>> +{
>> +	struct devlink *devlink;
>> +	int err;
>> +
>> +	/* In case network namespace is getting destroyed, reload
>> +	 * all devlink instances from this namespace into init_net.
>> +	 */
>> +	mutex_lock(&devlink_mutex);
>> +	list_for_each_entry(devlink, &devlink_list, list) {
>> +		if (net_eq(devlink_net(devlink), net)) {
>> +			if (WARN_ON(!devlink_reload_supported(devlink)))
>> +				continue;
>> +			err = devlink_reload(devlink, &init_net, NULL);
>> +			if (err)
>> +				pr_warn("Failed to reload devlink instance into init_net\n");
>> +		}
>> +	}
>> +	mutex_unlock(&devlink_mutex);
>> +}
>> +
>> +static struct pernet_operations devlink_pernet_ops __net_initdata = {
>> +	.pre_exit = devlink_pernet_pre_exit,
>> +};
>> +
>>  static int __init devlink_init(void)
>>  {
>> -	return genl_register_family(&devlink_nl_family);
>> +	int err;
>> +
>> +	err = genl_register_family(&devlink_nl_family);
>> +	if (err)
>> +		goto out;
>> +	err = register_pernet_subsys(&devlink_pernet_ops);
>> +
>> +out:
>> +	WARN_ON(err);
>> +	return err;
>>  }
>>  
>>  subsys_initcall(devlink_init);
>> -- 
>> 2.21.0
>>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index ef3f3d06ff1e..989d0882aaa9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3942,6 +3942,10 @@  static int mlx4_devlink_reload_down(struct devlink *devlink,
 	struct mlx4_dev *dev = &priv->dev;
 	struct mlx4_dev_persistent *persist = dev->persist;
 
+	if (!net_eq(devlink_net(devlink), &init_net)) {
+		NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported");
+		return -EOPNOTSUPP;
+	}
 	if (persist->num_vfs)
 		mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
 	mlx4_restart_one_down(persist->pdev);
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 580b7a2e40e1..b558ea88b766 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -421,6 +421,10 @@  enum devlink_attr {
 
 	DEVLINK_ATTR_RELOAD_FAILED,			/* u8 0 or 1 */
 
+	DEVLINK_ATTR_NETNS_FD,			/* u32 */
+	DEVLINK_ATTR_NETNS_PID,			/* u32 */
+	DEVLINK_ATTR_NETNS_ID,			/* u32 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 362cbbcca225..2a5db95cce3c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -435,8 +435,16 @@  static void devlink_nl_post_doit(const struct genl_ops *ops,
 {
 	struct devlink *devlink;
 
-	devlink = devlink_get_from_info(info);
-	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+	/* When devlink changes netns, it would not be found
+	 * by devlink_get_from_info(). So try if it is stored first.
+	 */
+	if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
+		devlink = info->user_ptr[0];
+	} else {
+		devlink = devlink_get_from_info(info);
+		WARN_ON(IS_ERR(devlink));
+	}
+	if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_unlock(&devlink->lock);
 	mutex_unlock(&devlink_mutex);
 }
@@ -2675,6 +2683,73 @@  devlink_resources_validate(struct devlink *devlink,
 	return err;
 }
 
+static struct net *devlink_netns_get(struct sk_buff *skb,
+				     struct devlink *devlink,
+				     struct genl_info *info)
+{
+	struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
+	struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
+	struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
+	struct net *net;
+
+	if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
+		NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (netns_pid_attr) {
+		net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
+	} else if (netns_fd_attr) {
+		net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
+	} else if (netns_id_attr) {
+		net = get_net_ns_by_id(sock_net(skb->sk),
+				       nla_get_u32(netns_id_attr));
+		if (!net)
+			net = ERR_PTR(-EINVAL);
+	} else {
+		WARN_ON(1);
+		net = ERR_PTR(-EINVAL);
+	}
+	if (IS_ERR(net)) {
+		NL_SET_ERR_MSG(info->extack, "Unknown network namespace");
+		return ERR_PTR(-EINVAL);
+	}
+	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+		put_net(net);
+		return ERR_PTR(-EPERM);
+	}
+	return net;
+}
+
+static void devlink_param_notify(struct devlink *devlink,
+				 unsigned int port_index,
+				 struct devlink_param_item *param_item,
+				 enum devlink_command cmd);
+
+static void devlink_reload_netns_change(struct devlink *devlink,
+					struct net *dest_net)
+{
+	struct devlink_param_item *param_item;
+
+	/* Userspace needs to be notified about devlink objects
+	 * removed from original and entering new network namespace.
+	 * The rest of the devlink objects are re-created during
+	 * reload process so the notifications are generated separatelly.
+	 */
+
+	list_for_each_entry(param_item, &devlink->param_list, list)
+		devlink_param_notify(devlink, 0, param_item,
+				     DEVLINK_CMD_PARAM_DEL);
+	devlink_notify(devlink, DEVLINK_CMD_DEL);
+
+	devlink_net_set(devlink, dest_net);
+
+	devlink_notify(devlink, DEVLINK_CMD_NEW);
+	list_for_each_entry(param_item, &devlink->param_list, list)
+		devlink_param_notify(devlink, 0, param_item,
+				     DEVLINK_CMD_PARAM_NEW);
+}
+
 static bool devlink_reload_supported(struct devlink *devlink)
 {
 	return devlink->ops->reload_down && devlink->ops->reload_up;
@@ -2695,9 +2770,27 @@  bool devlink_is_reload_failed(const struct devlink *devlink)
 }
 EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
 
+static int devlink_reload(struct devlink *devlink, struct net *dest_net,
+			  struct netlink_ext_ack *extack)
+{
+	int err;
+
+	err = devlink->ops->reload_down(devlink, extack);
+	if (err)
+		return err;
+
+	if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
+		devlink_reload_netns_change(devlink, dest_net);
+
+	err = devlink->ops->reload_up(devlink, extack);
+	devlink_reload_failed_set(devlink, !!err);
+	return err;
+}
+
 static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 {
 	struct devlink *devlink = info->user_ptr[0];
+	struct net *dest_net = NULL;
 	int err;
 
 	if (!devlink_reload_supported(devlink))
@@ -2708,11 +2801,20 @@  static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 		NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
 		return err;
 	}
-	err = devlink->ops->reload_down(devlink, info->extack);
-	if (err)
-		return err;
-	err = devlink->ops->reload_up(devlink, info->extack);
-	devlink_reload_failed_set(devlink, !!err);
+
+	if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
+	    info->attrs[DEVLINK_ATTR_NETNS_FD] ||
+	    info->attrs[DEVLINK_ATTR_NETNS_ID]) {
+		dest_net = devlink_netns_get(skb, devlink, info);
+		if (IS_ERR(dest_net))
+			return PTR_ERR(dest_net);
+	}
+
+	err = devlink_reload(devlink, dest_net, info->extack);
+
+	if (dest_net)
+		put_net(dest_net);
+
 	return err;
 }
 
@@ -5794,6 +5896,9 @@  static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
+	[DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
+	[DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -8061,9 +8166,43 @@  int devlink_compat_switch_id_get(struct net_device *dev,
 	return 0;
 }
 
+static void __net_exit devlink_pernet_pre_exit(struct net *net)
+{
+	struct devlink *devlink;
+	int err;
+
+	/* In case network namespace is getting destroyed, reload
+	 * all devlink instances from this namespace into init_net.
+	 */
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (net_eq(devlink_net(devlink), net)) {
+			if (WARN_ON(!devlink_reload_supported(devlink)))
+				continue;
+			err = devlink_reload(devlink, &init_net, NULL);
+			if (err)
+				pr_warn("Failed to reload devlink instance into init_net\n");
+		}
+	}
+	mutex_unlock(&devlink_mutex);
+}
+
+static struct pernet_operations devlink_pernet_ops __net_initdata = {
+	.pre_exit = devlink_pernet_pre_exit,
+};
+
 static int __init devlink_init(void)
 {
-	return genl_register_family(&devlink_nl_family);
+	int err;
+
+	err = genl_register_family(&devlink_nl_family);
+	if (err)
+		goto out;
+	err = register_pernet_subsys(&devlink_pernet_ops);
+
+out:
+	WARN_ON(err);
+	return err;
 }
 
 subsys_initcall(devlink_init);