diff mbox

[RFC,1/3] vsockmon: Add tap functions.

Message ID 20160528162907.14809-2-ggarcia@deic.uab.cat
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

ggarcia@abra.uab.cat May 28, 2016, 4:29 p.m. UTC
From: Gerard Garcia <ggarcia@deic.uab.cat>

Signed-off-by: Gerard Garcia <ggarcia@deic.uab.cat>
---
 include/net/af_vsock.h      |  13 ++++++
 include/uapi/linux/if_arp.h |   1 +
 net/vmw_vsock/af_vsock.c    | 105 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 119 insertions(+)

Comments

Stefan Hajnoczi June 1, 2016, 9:07 p.m. UTC | #1
On Sat, May 28, 2016 at 06:29:05PM +0200, ggarcia@abra.uab.cat wrote:
> From: Gerard Garcia <ggarcia@deic.uab.cat>
> 
> Signed-off-by: Gerard Garcia <ggarcia@deic.uab.cat>
> ---
>  include/net/af_vsock.h      |  13 ++++++
>  include/uapi/linux/if_arp.h |   1 +
>  net/vmw_vsock/af_vsock.c    | 105 ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 119 insertions(+)
> 
> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> index 15694c9..c4593d8 100644
> --- a/include/net/af_vsock.h
> +++ b/include/net/af_vsock.h
> @@ -187,4 +187,17 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
>  					 struct sockaddr_vm *dst);
>  void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
>  
> +/**** TAP ****/
> +
> +struct vsock_tap {
> +	struct net_device *dev;
> +	struct module *module;
> +	struct list_head list;
> +};
> +
> +extern int vsock_init_tap(void);
> +extern int vsock_add_tap(struct vsock_tap *vt);
> +extern int vsock_remove_tap(struct vsock_tap *vt);
> +extern void vsock_deliver_tap(struct sk_buff *skb);

The other function prototypes in this header don't use "extern" either.
Please drop for consistency.

> +
>  #endif /* __AF_VSOCK_H__ */
> diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h
> index 4d024d7..869262a 100644
> --- a/include/uapi/linux/if_arp.h
> +++ b/include/uapi/linux/if_arp.h
> @@ -95,6 +95,7 @@
>  #define ARPHRD_IP6GRE	823		/* GRE over IPv6		*/
>  #define ARPHRD_NETLINK	824		/* Netlink header		*/
>  #define ARPHRD_6LOWPAN	825		/* IPv6 over LoWPAN             */
> +#define ARPHRD_VSOCKMON 826		/* Vsock monitor header		*/

Previous lines use a tab character (^I) before the numeric constant.
Please follow that style for consistency.

I suggest calling it just ARPHRD_VSOCK.  Netlink also uses
ARPHRD_NETLINK instead of ARPHRD_NLMON.

>  
>  #define ARPHRD_VOID	  0xFFFF	/* Void type, nothing is known */
>  #define ARPHRD_NONE	  0xFFFE	/* zero header length */
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index 6b158ab..ec7a05d 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -96,6 +96,7 @@
>  #include <linux/unistd.h>
>  #include <linux/wait.h>
>  #include <linux/workqueue.h>
> +#include <linux/if_arp.h>
>  #include <net/sock.h>
>  #include <net/af_vsock.h>
>  
> @@ -2012,6 +2013,110 @@ const struct vsock_transport *vsock_core_get_transport(void)
>  }
>  EXPORT_SYMBOL_GPL(vsock_core_get_transport);
>  
> +/**** TAP ****/

Feel free to put this in a separate source file.  The Kbuild can link
multiple objects into a single kernel module.  That would be cleaner
than using a big comment to separate it from af_vsock.c code.

I wonder whether this tap mechanism should be generic so that netlink,
vsock, and other address families can reuse the code.  This is basically
copied from netlink.

> +static DEFINE_SPINLOCK(vsock_tap_lock);
> +static struct list_head vsock_tap_all __read_mostly;
> +
> +int vsock_init_tap(void) {
> +	INIT_LIST_HEAD(&vsock_tap_all);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(vsock_init_tap);

Can you use LIST_HEAD_INIT() on the vsock_tap_all declaration line to
eliminate the need for vsock_init_tap() completely?

> +
> +int vsock_add_tap(struct vsock_tap *vt) {
> +	if (unlikely(vt->dev->type != ARPHRD_VSOCKMON))
> +		return -EINVAL;
> +
> +	spin_lock(&vsock_tap_lock);
> +	list_add_rcu(&vt->list, &vsock_tap_all);
> +	spin_unlock(&vsock_tap_lock);
> +
> +	__module_get(vt->module);

It's slightly safer to get the module before publishing it on the list.
But in practice I guess the caller is the module so the module won't
disappear underneath us.

> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(vsock_add_tap);
> +
> +int __vsock_remove_tap(struct vsock_tap *vt) {
> +	bool found = false;
> +	struct vsock_tap *tmp;
> +
> +	spin_lock(&vsock_tap_lock);
> +
> +	list_for_each_entry(tmp, &vsock_tap_all, list) {
> +		if (vt == tmp) {
> +			list_del_rcu(&vt->list);
> +			found = true;
> +			goto out;
> +		}
> +	}
> +
> +	pr_warn("__vsock_remove_tap: %p not found\n", vt);
> +out:
> +	spin_unlock(&vsock_tap_lock);
> +
> +	if (found)
> +		module_put(vt->module);
> +
> +	return found ? 0 : -ENODEV;
> +}
> +
> +int vsock_remove_tap(struct vsock_tap *vt)
> +{
> +	int ret;
> +
> +	ret = __vsock_remove_tap(vt);
> +	synchronize_net();

module_put() should be called after synchronize_net().  That way we
guarantee that no one is still accessing vt when the module is put.  The
caller is probably the module though, so maybe there is an implicit
reference...

> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(vsock_remove_tap);
> +
> +static int __vsock_deliver_tap_skb(struct sk_buff *skb,
> +				     struct net_device *dev)
> +{
> +	struct sk_buff *vskb;
> +	int ret = 0;
> +
> +	if (skb) {
> +		dev_hold(dev);
> +		vskb = skb_clone(skb, GFP_ATOMIC);

You must handle the case where skb_clone() returns NULL.  In other
words, we don't have enough memory to capture the packet and just return
-ENOMEM.

> +		vskb->dev = dev;
> +		vskb->pkt_type = PACKET_USER;

I'm not sure if PACKET_USER is correct (or if pkt_type matters at all).

PACKET_USER/PACKET_KERNEL seems to be purely a netlink concept to
distinguish netlink packets originating from the kernel or userspace.

AF_VSOCK is more like Ethernet where packets come from the host itself
or from a VM.  I would consider PACKET_HOST and PACKET_OTHERHOST.

> +		ret = dev_queue_xmit(vskb);
> +		if (unlikely(ret > 0))
> +			ret = net_xmit_errno(ret);
> +
> +		dev_put(dev);
> +	}
> +
> +	return ret;
> +}
> +
> +static void __vsock_deliver_tap(struct sk_buff *skb)
> +{
> +	int ret;
> +	struct vsock_tap *tmp;
> +
> +	list_for_each_entry_rcu(tmp, &vsock_tap_all, list) {
> +		ret = __vsock_deliver_tap_skb(skb, tmp->dev);
> +		if (unlikely(ret))
> +			break;
> +	}
> +}
> +
> +void vsock_deliver_tap(struct sk_buff *skb)
> +{
> +	rcu_read_lock();
> +
> +	if (unlikely(!list_empty(&vsock_tap_all)))
> +		__vsock_deliver_tap(skb);
> +
> +	rcu_read_unlock();
> +}
> +EXPORT_SYMBOL_GPL(vsock_deliver_tap);
> +
>  MODULE_AUTHOR("VMware, Inc.");
>  MODULE_DESCRIPTION("VMware Virtual Socket Family");
>  MODULE_VERSION("1.0.1.0-k");
> -- 
> 2.8.3
>
ggarcia@abra.uab.cat June 9, 2016, 3:02 p.m. UTC | #2
On 06/01/2016 11:07 PM, Stefan Hajnoczi wrote:
> On Sat, May 28, 2016 at 06:29:05PM +0200, ggarcia@abra.uab.cat wrote:
>> From: Gerard Garcia <ggarcia@deic.uab.cat>
>>
>> Signed-off-by: Gerard Garcia <ggarcia@deic.uab.cat>
>> ---
>>   include/net/af_vsock.h      |  13 ++++++
>>   include/uapi/linux/if_arp.h |   1 +
>>   net/vmw_vsock/af_vsock.c    | 105 ++++++++++++++++++++++++++++++++++++++++++++
>>   3 files changed, 119 insertions(+)
>>
>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>> index 15694c9..c4593d8 100644
>> --- a/include/net/af_vsock.h
>> +++ b/include/net/af_vsock.h
>> @@ -187,4 +187,17 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
>>   					 struct sockaddr_vm *dst);
>>   void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
>>   
>> +/**** TAP ****/
>> +
>> +struct vsock_tap {
>> +	struct net_device *dev;
>> +	struct module *module;
>> +	struct list_head list;
>> +};
>> +
>> +extern int vsock_init_tap(void);
>> +extern int vsock_add_tap(struct vsock_tap *vt);
>> +extern int vsock_remove_tap(struct vsock_tap *vt);
>> +extern void vsock_deliver_tap(struct sk_buff *skb);
> The other function prototypes in this header don't use "extern" either.
> Please drop for consistency.
Ok
>> +
>>   #endif /* __AF_VSOCK_H__ */
>> diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h
>> index 4d024d7..869262a 100644
>> --- a/include/uapi/linux/if_arp.h
>> +++ b/include/uapi/linux/if_arp.h
>> @@ -95,6 +95,7 @@
>>   #define ARPHRD_IP6GRE	823		/* GRE over IPv6		*/
>>   #define ARPHRD_NETLINK	824		/* Netlink header		*/
>>   #define ARPHRD_6LOWPAN	825		/* IPv6 over LoWPAN             */
>> +#define ARPHRD_VSOCKMON 826		/* Vsock monitor header		*/
> Previous lines use a tab character (^I) before the numeric constant.
> Please follow that style for consistency.
Ok
> I suggest calling it just ARPHRD_VSOCK.  Netlink also uses
> ARPHRD_NETLINK instead of ARPHRD_NLMON.
We define a new header used exclusively by the vsockmon device, nlmon 
copies the netlink header entirely.
I'm not sure if we should *link* the ARPHRD_VSOCK identifier to the 
vsockmon header.
>>   
>>   #define ARPHRD_VOID	  0xFFFF	/* Void type, nothing is known */
>>   #define ARPHRD_NONE	  0xFFFE	/* zero header length */
>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>> index 6b158ab..ec7a05d 100644
>> --- a/net/vmw_vsock/af_vsock.c
>> +++ b/net/vmw_vsock/af_vsock.c
>> @@ -96,6 +96,7 @@
>>   #include <linux/unistd.h>
>>   #include <linux/wait.h>
>>   #include <linux/workqueue.h>
>> +#include <linux/if_arp.h>
>>   #include <net/sock.h>
>>   #include <net/af_vsock.h>
>>   
>> @@ -2012,6 +2013,110 @@ const struct vsock_transport *vsock_core_get_transport(void)
>>   }
>>   EXPORT_SYMBOL_GPL(vsock_core_get_transport);
>>   
>> +/**** TAP ****/
> Feel free to put this in a separate source file.  The Kbuild can link
> multiple objects into a single kernel module.  That would be cleaner
> than using a big comment to separate it from af_vsock.c code.
I'm following the af_vsock.c style, where different logic is separated 
using this style of comments. It is not a lot of code
so I thought it would be cleaner to have it in the same file.
> I wonder whether this tap mechanism should be generic so that netlink,
> vsock, and other address families can reuse the code.  This is basically
> copied from netlink.
>
>> +static DEFINE_SPINLOCK(vsock_tap_lock);
>> +static struct list_head vsock_tap_all __read_mostly;
>> +
>> +int vsock_init_tap(void) {
>> +	INIT_LIST_HEAD(&vsock_tap_all);
>> +
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(vsock_init_tap);
> Can you use LIST_HEAD_INIT() on the vsock_tap_all declaration line to
> eliminate the need for vsock_init_tap() completely?
Yes, I'll do that. I though we may need to do more initialization at 
some point, but for now it is better to just
statically initialize it.
>> +
>> +int vsock_add_tap(struct vsock_tap *vt) {
>> +	if (unlikely(vt->dev->type != ARPHRD_VSOCKMON))
>> +		return -EINVAL;
>> +
>> +	spin_lock(&vsock_tap_lock);
>> +	list_add_rcu(&vt->list, &vsock_tap_all);
>> +	spin_unlock(&vsock_tap_lock);
>> +
>> +	__module_get(vt->module);
> It's slightly safer to get the module before publishing it on the list.
> But in practice I guess the caller is the module so the module won't
> disappear underneath us.
This function is equal to the function in af_netlink.c used by nlmon. As 
you said, in practice the caller is the module
so it won't disappear.
>> +
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(vsock_add_tap);
>> +
>> +int __vsock_remove_tap(struct vsock_tap *vt) {
>> +	bool found = false;
>> +	struct vsock_tap *tmp;
>> +
>> +	spin_lock(&vsock_tap_lock);
>> +
>> +	list_for_each_entry(tmp, &vsock_tap_all, list) {
>> +		if (vt == tmp) {
>> +			list_del_rcu(&vt->list);
>> +			found = true;
>> +			goto out;
>> +		}
>> +	}
>> +
>> +	pr_warn("__vsock_remove_tap: %p not found\n", vt);
>> +out:
>> +	spin_unlock(&vsock_tap_lock);
>> +
>> +	if (found)
>> +		module_put(vt->module);
>> +
>> +	return found ? 0 : -ENODEV;
>> +}
>> +
>> +int vsock_remove_tap(struct vsock_tap *vt)
>> +{
>> +	int ret;
>> +
>> +	ret = __vsock_remove_tap(vt);
>> +	synchronize_net();
> module_put() should be called after synchronize_net().  That way we
> guarantee that no one is still accessing vt when the module is put.  The
> caller is probably the module though, so maybe there is an implicit
> reference...
Also, it is equal to the function in af_netlink.c used by nlmon. And as 
before, this function is supposed
to be called from the same module so it can't disappear before calling 
synchronize_net().
>> +
>> +	return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(vsock_remove_tap);
>> +
>> +static int __vsock_deliver_tap_skb(struct sk_buff *skb,
>> +				     struct net_device *dev)
>> +{
>> +	struct sk_buff *vskb;
>> +	int ret = 0;
>> +
>> +	if (skb) {
>> +		dev_hold(dev);
>> +		vskb = skb_clone(skb, GFP_ATOMIC);
> You must handle the case where skb_clone() returns NULL.  In other
> words, we don't have enough memory to capture the packet and just return
> -ENOMEM.
You are right!
>> +		vskb->dev = dev;
>> +		vskb->pkt_type = PACKET_USER;
> I'm not sure if PACKET_USER is correct (or if pkt_type matters at all).
>
> PACKET_USER/PACKET_KERNEL seems to be purely a netlink concept to
> distinguish netlink packets originating from the kernel or userspace.
>
> AF_VSOCK is more like Ethernet where packets come from the host itself
> or from a VM.  I would consider PACKET_HOST and PACKET_OTHERHOST.
>
I'm not sure either if pkt_type matters. I haven't found any code that 
uses it so I'll just leave it unset.
>> +		ret = dev_queue_xmit(vskb);
>> +		if (unlikely(ret > 0))
>> +			ret = net_xmit_errno(ret);
>> +
>> +		dev_put(dev);
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +static void __vsock_deliver_tap(struct sk_buff *skb)
>> +{
>> +	int ret;
>> +	struct vsock_tap *tmp;
>> +
>> +	list_for_each_entry_rcu(tmp, &vsock_tap_all, list) {
>> +		ret = __vsock_deliver_tap_skb(skb, tmp->dev);
>> +		if (unlikely(ret))
>> +			break;
>> +	}
>> +}
>> +
>> +void vsock_deliver_tap(struct sk_buff *skb)
>> +{
>> +	rcu_read_lock();
>> +
>> +	if (unlikely(!list_empty(&vsock_tap_all)))
>> +		__vsock_deliver_tap(skb);
>> +
>> +	rcu_read_unlock();
>> +}
>> +EXPORT_SYMBOL_GPL(vsock_deliver_tap);
>> +
>>   MODULE_AUTHOR("VMware, Inc.");
>>   MODULE_DESCRIPTION("VMware Virtual Socket Family");
>>   MODULE_VERSION("1.0.1.0-k");
>> -- 
>> 2.8.3
>>
Stefan Hajnoczi June 10, 2016, 3:44 p.m. UTC | #3
On Thu, Jun 09, 2016 at 05:02:47PM +0200, Gerard Garcia wrote:
> On 06/01/2016 11:07 PM, Stefan Hajnoczi wrote:
> > On Sat, May 28, 2016 at 06:29:05PM +0200, ggarcia@abra.uab.cat wrote:
> > > diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> > > index 6b158ab..ec7a05d 100644
> > > --- a/net/vmw_vsock/af_vsock.c
> > > +++ b/net/vmw_vsock/af_vsock.c
> > > @@ -96,6 +96,7 @@
> > >   #include <linux/unistd.h>
> > >   #include <linux/wait.h>
> > >   #include <linux/workqueue.h>
> > > +#include <linux/if_arp.h>
> > >   #include <net/sock.h>
> > >   #include <net/af_vsock.h>
> > > @@ -2012,6 +2013,110 @@ const struct vsock_transport *vsock_core_get_transport(void)
> > >   }
> > >   EXPORT_SYMBOL_GPL(vsock_core_get_transport);
> > > +/**** TAP ****/
> > Feel free to put this in a separate source file.  The Kbuild can link
> > multiple objects into a single kernel module.  That would be cleaner
> > than using a big comment to separate it from af_vsock.c code.
> I'm following the af_vsock.c style, where different logic is separated using
> this style of comments. It is not a lot of code
> so I thought it would be cleaner to have it in the same file.

It's up to the af_vsock.c maintainer, but if we keep appending
independent chunks of code to one file it becomes hard to manage and
chances of conflicts during patch merging increases.

> > > +int vsock_add_tap(struct vsock_tap *vt) {
> > > +	if (unlikely(vt->dev->type != ARPHRD_VSOCKMON))
> > > +		return -EINVAL;
> > > +
> > > +	spin_lock(&vsock_tap_lock);
> > > +	list_add_rcu(&vt->list, &vsock_tap_all);
> > > +	spin_unlock(&vsock_tap_lock);
> > > +
> > > +	__module_get(vt->module);
> > It's slightly safer to get the module before publishing it on the list.
> > But in practice I guess the caller is the module so the module won't
> > disappear underneath us.
> This function is equal to the function in af_netlink.c used by nlmon. As you
> said, in practice the caller is the module
> so it won't disappear.

Yes, there isn't a huge win right now but given that it's easy to
resolve the issue I'd do it.  The problem comes when people copy-paste
code that contains assumptions and the assumption no longer holds.
Better to write it in the safe way, eliminating the assumption, so that
derived code will be correct under more conditions.  There is no
drawback to moving __module_get() above the spin_lock().
Jorgen Hansen June 14, 2016, 12:05 p.m. UTC | #4
On 6/10/16, 5:44 PM, "Stefan Hajnoczi" <stefanha@redhat.com> wrote:



>On Thu, Jun 09, 2016 at 05:02:47PM +0200, Gerard Garcia wrote:

>> On 06/01/2016 11:07 PM, Stefan Hajnoczi wrote:

>> > On Sat, May 28, 2016 at 06:29:05PM +0200, ggarcia@abra.uab.cat wrote:

>> > > diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c

>> > > index 6b158ab..ec7a05d 100644

>> > > --- a/net/vmw_vsock/af_vsock.c

>> > > +++ b/net/vmw_vsock/af_vsock.c

>> > > @@ -96,6 +96,7 @@

>> > >   #include <linux/unistd.h>

>> > >   #include <linux/wait.h>

>> > >   #include <linux/workqueue.h>

>> > > +#include <linux/if_arp.h>

>> > >   #include <net/sock.h>

>> > >   #include <net/af_vsock.h>

>> > > @@ -2012,6 +2013,110 @@ const struct vsock_transport *vsock_core_get_transport(void)

>> > >   }

>> > >   EXPORT_SYMBOL_GPL(vsock_core_get_transport);

>> > > +/**** TAP ****/

>> > Feel free to put this in a separate source file.  The Kbuild can link

>> > multiple objects into a single kernel module.  That would be cleaner

>> > than using a big comment to separate it from af_vsock.c code.

>> I'm following the af_vsock.c style, where different logic is separated using

>> this style of comments. It is not a lot of code

>> so I thought it would be cleaner to have it in the same file.

>

>It's up to the af_vsock.c maintainer, but if we keep appending

>independent chunks of code to one file it becomes hard to manage and

>chances of conflicts during patch merging increases.


I agree with Stefan - af_vsock.c is already undesirably large, so it would be great if you could make this a separate file.
diff mbox

Patch

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 15694c9..c4593d8 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -187,4 +187,17 @@  struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
 					 struct sockaddr_vm *dst);
 void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
 
+/**** TAP ****/
+
+struct vsock_tap {
+	struct net_device *dev;
+	struct module *module;
+	struct list_head list;
+};
+
+extern int vsock_init_tap(void);
+extern int vsock_add_tap(struct vsock_tap *vt);
+extern int vsock_remove_tap(struct vsock_tap *vt);
+extern void vsock_deliver_tap(struct sk_buff *skb);
+
 #endif /* __AF_VSOCK_H__ */
diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h
index 4d024d7..869262a 100644
--- a/include/uapi/linux/if_arp.h
+++ b/include/uapi/linux/if_arp.h
@@ -95,6 +95,7 @@ 
 #define ARPHRD_IP6GRE	823		/* GRE over IPv6		*/
 #define ARPHRD_NETLINK	824		/* Netlink header		*/
 #define ARPHRD_6LOWPAN	825		/* IPv6 over LoWPAN             */
+#define ARPHRD_VSOCKMON 826		/* Vsock monitor header		*/
 
 #define ARPHRD_VOID	  0xFFFF	/* Void type, nothing is known */
 #define ARPHRD_NONE	  0xFFFE	/* zero header length */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 6b158ab..ec7a05d 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -96,6 +96,7 @@ 
 #include <linux/unistd.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
+#include <linux/if_arp.h>
 #include <net/sock.h>
 #include <net/af_vsock.h>
 
@@ -2012,6 +2013,110 @@  const struct vsock_transport *vsock_core_get_transport(void)
 }
 EXPORT_SYMBOL_GPL(vsock_core_get_transport);
 
+/**** TAP ****/
+static DEFINE_SPINLOCK(vsock_tap_lock);
+static struct list_head vsock_tap_all __read_mostly;
+
+int vsock_init_tap(void) {
+	INIT_LIST_HEAD(&vsock_tap_all);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vsock_init_tap);
+
+int vsock_add_tap(struct vsock_tap *vt) {
+	if (unlikely(vt->dev->type != ARPHRD_VSOCKMON))
+		return -EINVAL;
+
+	spin_lock(&vsock_tap_lock);
+	list_add_rcu(&vt->list, &vsock_tap_all);
+	spin_unlock(&vsock_tap_lock);
+
+	__module_get(vt->module);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vsock_add_tap);
+
+int __vsock_remove_tap(struct vsock_tap *vt) {
+	bool found = false;
+	struct vsock_tap *tmp;
+
+	spin_lock(&vsock_tap_lock);
+
+	list_for_each_entry(tmp, &vsock_tap_all, list) {
+		if (vt == tmp) {
+			list_del_rcu(&vt->list);
+			found = true;
+			goto out;
+		}
+	}
+
+	pr_warn("__vsock_remove_tap: %p not found\n", vt);
+out:
+	spin_unlock(&vsock_tap_lock);
+
+	if (found)
+		module_put(vt->module);
+
+	return found ? 0 : -ENODEV;
+}
+
+int vsock_remove_tap(struct vsock_tap *vt)
+{
+	int ret;
+
+	ret = __vsock_remove_tap(vt);
+	synchronize_net();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vsock_remove_tap);
+
+static int __vsock_deliver_tap_skb(struct sk_buff *skb,
+				     struct net_device *dev)
+{
+	struct sk_buff *vskb;
+	int ret = 0;
+
+	if (skb) {
+		dev_hold(dev);
+		vskb = skb_clone(skb, GFP_ATOMIC);
+		vskb->dev = dev;
+		vskb->pkt_type = PACKET_USER;
+		ret = dev_queue_xmit(vskb);
+		if (unlikely(ret > 0))
+			ret = net_xmit_errno(ret);
+
+		dev_put(dev);
+	}
+
+	return ret;
+}
+
+static void __vsock_deliver_tap(struct sk_buff *skb)
+{
+	int ret;
+	struct vsock_tap *tmp;
+
+	list_for_each_entry_rcu(tmp, &vsock_tap_all, list) {
+		ret = __vsock_deliver_tap_skb(skb, tmp->dev);
+		if (unlikely(ret))
+			break;
+	}
+}
+
+void vsock_deliver_tap(struct sk_buff *skb)
+{
+	rcu_read_lock();
+
+	if (unlikely(!list_empty(&vsock_tap_all)))
+		__vsock_deliver_tap(skb);
+
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(vsock_deliver_tap);
+
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMware Virtual Socket Family");
 MODULE_VERSION("1.0.1.0-k");