diff mbox

[3/5] Add checkpoint support for veth devices (v2)

Message ID 1266336187-19105-4-git-send-email-danms@us.ibm.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Dan Smith Feb. 16, 2010, 4:03 p.m. UTC
Adds an ndo_checkpoint() handler for veth devices to checkpoint themselves.
Writes out the pairing information, addresses, and initiates a checkpoint
on the peer if the peer won't be reached from another netns.  Throws an
error of our peer's netns isn't already in the hash (i.e., a tree leak).

Changes in v2:
 - Fix check detecting if peer is in the init netns

Signed-off-by: Dan Smith <danms@us.ibm.com>
---
 drivers/net/veth.c |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 76 insertions(+), 0 deletions(-)

Comments

Serge E. Hallyn Feb. 22, 2010, 7:56 p.m. UTC | #1
Quoting Dan Smith (danms@us.ibm.com):
> Adds an ndo_checkpoint() handler for veth devices to checkpoint themselves.
> Writes out the pairing information, addresses, and initiates a checkpoint
> on the peer if the peer won't be reached from another netns.  Throws an
> error of our peer's netns isn't already in the hash (i.e., a tree leak).
> 
> Changes in v2:
>  - Fix check detecting if peer is in the init netns
> 
> Signed-off-by: Dan Smith <danms@us.ibm.com>
> ---
>  drivers/net/veth.c |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 76 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/net/veth.c b/drivers/net/veth.c
> index 3a15de5..db92de8 100644
> --- a/drivers/net/veth.c
> +++ b/drivers/net/veth.c
> @@ -16,6 +16,9 @@
>  #include <net/xfrm.h>
>  #include <linux/veth.h>
> 
> +#include <linux/checkpoint.h>
> +#include <linux/checkpoint_hdr.h>
> +
>  #define DRV_NAME	"veth"
>  #define DRV_VERSION	"1.0"
> 
> @@ -284,6 +287,76 @@ static void veth_dev_free(struct net_device *dev)
>  	free_netdev(dev);
>  }
> 
> +#ifdef CONFIG_CHECKPOINT
> +static int veth_checkpoint(struct ckpt_ctx *ctx, struct net_device *dev)
> +{
> +	struct ckpt_hdr_netdev *h;
> +	struct veth_priv *priv = netdev_priv(dev);
> +	struct net_device *peer = priv->peer;
> +	struct ckpt_netdev_addr *addrs;
> +	int ret;
> +	int n;
> +
> +	if (!peer) {
> +		ckpt_err(ctx, -EINVAL, "veth device has no peer!\n");
> +		return -EINVAL;
> +	}
> +
> +	h = ckpt_netdev_base(ctx, dev, &addrs);
> +	if (IS_ERR(h))
> +		return PTR_ERR(h);
> +
> +	h->type = CKPT_NETDEV_VETH;
> +
> +	ret = h->veth.this_ref = ckpt_obj_lookup_add(ctx, dev,
> +						     CKPT_OBJ_NETDEV, &n);
> +	if (ret < 0)
> +		goto out;
> +
> +	ret = h->veth.peer_ref = ckpt_obj_lookup_add(ctx, peer,
> +						     CKPT_OBJ_NETDEV, &n);
> +	if (ret < 0)
> +		goto out;
> +
> +	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *)h);
> +	if (ret < 0)
> +		goto out;
> +
> +	ret = ckpt_write_buffer(ctx, dev->name, IFNAMSIZ);
> +	if (ret < 0)
> +		goto out;
> +
> +	ret = ckpt_write_buffer(ctx, peer->name, IFNAMSIZ);
> +	if (ret < 0)
> +		goto out;
> +
> +	if (h->inet_addrs > 0) {
> +		int len = (sizeof(struct ckpt_netdev_addr) * h->inet_addrs);
> +		ret = ckpt_write_buffer(ctx, addrs, len);
> +		if (ret)
> +			goto out;
> +	}
> +
> +	/* Only checkpoint peer if we're not going to arrive at it
> +	 * via another task's netns.  Fail if the pipe exits
> +	 * our container to a netns not already in the hash
> +	 */
> +	if (ckpt_netdev_in_init_netns(ctx, peer))
> +		ret = checkpoint_obj(ctx, peer, CKPT_OBJ_NETDEV);
> +	else if (!ckpt_obj_lookup(ctx, peer->nd_net, CKPT_OBJ_NET_NS)) {
> +		ret = -EINVAL;
> +		ckpt_err(ctx, ret,
> +			 "Peer %s of %s not in checkpointed namespaces\n",
> +			 peer->name, dev->name);

I'm not sure this check does what you think it does:  note that
ckpt_netdev_base(), defined in the previous patch, and called higher
up in this function, is going to checkpoint peer->nd_net.   :)

(right?)

> +	}
> + out:
> +	ckpt_hdr_put(ctx, h);
> +	kfree(addrs);
> +
> +	return ret;
> +}
> +#endif
> +
>  static const struct net_device_ops veth_netdev_ops = {
>  	.ndo_init            = veth_dev_init,
>  	.ndo_open            = veth_open,
> @@ -292,6 +365,9 @@ static const struct net_device_ops veth_netdev_ops = {
>  	.ndo_change_mtu      = veth_change_mtu,
>  	.ndo_get_stats       = veth_get_stats,
>  	.ndo_set_mac_address = eth_mac_addr,
> +#ifdef CONFIG_CHECKPOINT
> +	.ndo_checkpoint      = veth_checkpoint,
> +#endif
>  };
> 
>  static void veth_setup(struct net_device *dev)
> -- 
> 1.6.2.5
> 
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dan Smith Feb. 22, 2010, 8:25 p.m. UTC | #2
>> +	else if (!ckpt_obj_lookup(ctx, peer->nd_net, CKPT_OBJ_NET_NS)) {
>> +		ret = -EINVAL;
>> +		ckpt_err(ctx, ret,
>> +			 "Peer %s of %s not in checkpointed namespaces\n",
>> +			 peer->name, dev->name);

SH> I'm not sure this check does what you think it does: note that
SH> ckpt_netdev_base(), defined in the previous patch, and called
SH> higher up in this function, is going to checkpoint peer->nd_net.
SH> :)

Actually, no, ckpt_netdev_base() can't checkpoint peer->nd_net because
it's device-agnostic and has no knowledge of dev->peer.

The idea here was that we checkpoint a netns when we arrive at it via
nsproxy.  Doing that, we checkpoint the devices within.  We encounter
a veth device, which has a peer, so we decide if:

 1. We won't arrive at the peer later because it is in the init
    namespace, so we checkpoint it now.
 2. We will arrive at it later because the peer's netns is in the list
    we've already collected, so checkpoint the peer with its namespace
 3. Neither are true and we won't arrive at it later and therefore we
    can't allow checkpoint to continue

#2 depends on the collect process having put all the task's netns' in
 the hash ahead of time.
Serge E. Hallyn Feb. 22, 2010, 8:57 p.m. UTC | #3
Quoting Dan Smith (danms@us.ibm.com):
> >> +	else if (!ckpt_obj_lookup(ctx, peer->nd_net, CKPT_OBJ_NET_NS)) {
> >> +		ret = -EINVAL;
> >> +		ckpt_err(ctx, ret,
> >> +			 "Peer %s of %s not in checkpointed namespaces\n",
> >> +			 peer->name, dev->name);
> 
> SH> I'm not sure this check does what you think it does: note that
> SH> ckpt_netdev_base(), defined in the previous patch, and called
> SH> higher up in this function, is going to checkpoint peer->nd_net.
> SH> :)
> 
> Actually, no, ckpt_netdev_base() can't checkpoint peer->nd_net because
> it's device-agnostic and has no knowledge of dev->peer.

Oh, ok.

> The idea here was that we checkpoint a netns when we arrive at it via
> nsproxy.  Doing that, we checkpoint the devices within.  We encounter
> a veth device, which has a peer, so we decide if:
> 
>  1. We won't arrive at the peer later because it is in the init
>     namespace, so we checkpoint it now.
>  2. We will arrive at it later because the peer's netns is in the list
>     we've already collected, so checkpoint the peer with its namespace
>  3. Neither are true and we won't arrive at it later and therefore we
>     can't allow checkpoint to continue
> 
> #2 depends on the collect process having put all the task's netns' in
>  the hash ahead of time.

Right, that was what I was originally starting to hunt down when I
thought I saw ckpt_netdev_base() checkpointing peer's netns.

So do you actually know that the peer's netns will have been
checkpointed?  I'm a little fuzzy about where netns and netdevs
are checkpointed.  If you have two private netns's in a container,
with a veth connecting them, and you checkpoint a task in netns 1,
will you fail bc netns 2 hasn't been checkpointed yet bc no task in
it has been checkpointed yet?

-serge
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dan Smith Feb. 22, 2010, 9:01 p.m. UTC | #4
SH> So do you actually know that the peer's netns will have been
SH> checkpointed?  I'm a little fuzzy about where netns and netdevs
SH> are checkpointed.  If you have two private netns's in a container,
SH> with a veth connecting them, and you checkpoint a task in netns 1,
SH> will you fail bc netns 2 hasn't been checkpointed yet bc no task
SH> in it has been checkpointed yet?

Nope, because they're collect()'ed before we checkpoint().
diff mbox

Patch

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 3a15de5..db92de8 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -16,6 +16,9 @@ 
 #include <net/xfrm.h>
 #include <linux/veth.h>
 
+#include <linux/checkpoint.h>
+#include <linux/checkpoint_hdr.h>
+
 #define DRV_NAME	"veth"
 #define DRV_VERSION	"1.0"
 
@@ -284,6 +287,76 @@  static void veth_dev_free(struct net_device *dev)
 	free_netdev(dev);
 }
 
+#ifdef CONFIG_CHECKPOINT
+static int veth_checkpoint(struct ckpt_ctx *ctx, struct net_device *dev)
+{
+	struct ckpt_hdr_netdev *h;
+	struct veth_priv *priv = netdev_priv(dev);
+	struct net_device *peer = priv->peer;
+	struct ckpt_netdev_addr *addrs;
+	int ret;
+	int n;
+
+	if (!peer) {
+		ckpt_err(ctx, -EINVAL, "veth device has no peer!\n");
+		return -EINVAL;
+	}
+
+	h = ckpt_netdev_base(ctx, dev, &addrs);
+	if (IS_ERR(h))
+		return PTR_ERR(h);
+
+	h->type = CKPT_NETDEV_VETH;
+
+	ret = h->veth.this_ref = ckpt_obj_lookup_add(ctx, dev,
+						     CKPT_OBJ_NETDEV, &n);
+	if (ret < 0)
+		goto out;
+
+	ret = h->veth.peer_ref = ckpt_obj_lookup_add(ctx, peer,
+						     CKPT_OBJ_NETDEV, &n);
+	if (ret < 0)
+		goto out;
+
+	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *)h);
+	if (ret < 0)
+		goto out;
+
+	ret = ckpt_write_buffer(ctx, dev->name, IFNAMSIZ);
+	if (ret < 0)
+		goto out;
+
+	ret = ckpt_write_buffer(ctx, peer->name, IFNAMSIZ);
+	if (ret < 0)
+		goto out;
+
+	if (h->inet_addrs > 0) {
+		int len = (sizeof(struct ckpt_netdev_addr) * h->inet_addrs);
+		ret = ckpt_write_buffer(ctx, addrs, len);
+		if (ret)
+			goto out;
+	}
+
+	/* Only checkpoint peer if we're not going to arrive at it
+	 * via another task's netns.  Fail if the pipe exits
+	 * our container to a netns not already in the hash
+	 */
+	if (ckpt_netdev_in_init_netns(ctx, peer))
+		ret = checkpoint_obj(ctx, peer, CKPT_OBJ_NETDEV);
+	else if (!ckpt_obj_lookup(ctx, peer->nd_net, CKPT_OBJ_NET_NS)) {
+		ret = -EINVAL;
+		ckpt_err(ctx, ret,
+			 "Peer %s of %s not in checkpointed namespaces\n",
+			 peer->name, dev->name);
+	}
+ out:
+	ckpt_hdr_put(ctx, h);
+	kfree(addrs);
+
+	return ret;
+}
+#endif
+
 static const struct net_device_ops veth_netdev_ops = {
 	.ndo_init            = veth_dev_init,
 	.ndo_open            = veth_open,
@@ -292,6 +365,9 @@  static const struct net_device_ops veth_netdev_ops = {
 	.ndo_change_mtu      = veth_change_mtu,
 	.ndo_get_stats       = veth_get_stats,
 	.ndo_set_mac_address = eth_mac_addr,
+#ifdef CONFIG_CHECKPOINT
+	.ndo_checkpoint      = veth_checkpoint,
+#endif
 };
 
 static void veth_setup(struct net_device *dev)