Message ID | 201002131135.08477.arnd@arndb.de |
---|---|
State | New |
Headers | show |
On Sat, Feb 13, 2010 at 11:35:08AM +0100, Arnd Bergmann wrote: > This adds support for passing a macvtap file descriptor into > vhost-net, much like we already do for tun/tap. > > Most of the new code is taken from the respective patch > in the tun driver and may get consolidated in the future. > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > --- > drivers/net/macvtap.c | 98 ++++++++++++++++++++++++++++++++++--------- > drivers/vhost/net.c | 8 +++- > include/linux/if_macvlan.h | 13 ++++++ > 3 files changed, 96 insertions(+), 23 deletions(-) > > diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c > index 7050997..e354501 100644 > --- a/drivers/net/macvtap.c > +++ b/drivers/net/macvtap.c > @@ -58,6 +58,8 @@ static unsigned int macvtap_major; > static struct class *macvtap_class; > static struct cdev macvtap_cdev; > > +static const struct proto_ops macvtap_socket_ops; > + > /* > * RCU usage: > * The macvtap_queue and the macvlan_dev are loosely coupled, the > @@ -176,7 +178,7 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) > return -ENOLINK; > > skb_queue_tail(&q->sk.sk_receive_queue, skb); > - wake_up(q->sk.sk_sleep); > + wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); > return 0; > } > > @@ -242,7 +244,7 @@ static void macvtap_sock_write_space(struct sock *sk) > return; > > if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) > - wake_up_interruptible_sync(sk->sk_sleep); > + wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); > } > > static int macvtap_open(struct inode *inode, struct file *file) > @@ -270,6 +272,8 @@ static int macvtap_open(struct inode *inode, struct file *file) > init_waitqueue_head(&q->sock.wait); > q->sock.type = SOCK_RAW; > q->sock.state = SS_CONNECTED; > + q->sock.file = file; > + q->sock.ops = &macvtap_socket_ops; > sock_init_data(&q->sock, &q->sk); > q->sk.sk_write_space = macvtap_sock_write_space; > > @@ -387,32 +391,20 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, > > rcu_read_lock_bh(); > vlan = rcu_dereference(q->vlan); > - macvlan_count_rx(vlan, len, ret == 0, 0); > + if (vlan) > + macvlan_count_rx(vlan, len, ret == 0, 0); > rcu_read_unlock_bh(); > > return ret ? ret : len; > } > > -static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, > - unsigned long count, loff_t pos) > +static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, > + const struct iovec *iv, unsigned long len, > + int noblock) > { > - struct file *file = iocb->ki_filp; > - struct macvtap_queue *q = file->private_data; > - > DECLARE_WAITQUEUE(wait, current); > struct sk_buff *skb; > - ssize_t len, ret = 0; > - > - if (!q) { > - ret = -ENOLINK; > - goto out; > - } > - > - len = iov_length(iv, count); > - if (len < 0) { > - ret = -EINVAL; > - goto out; > - } > + ssize_t ret = 0; > > add_wait_queue(q->sk.sk_sleep, &wait); > while (len) { > @@ -421,7 +413,7 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, > /* Read frames from the queue */ > skb = skb_dequeue(&q->sk.sk_receive_queue); > if (!skb) { > - if (file->f_flags & O_NONBLOCK) { > + if (noblock) { > ret = -EAGAIN; > break; > } > @@ -440,7 +432,24 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, > > current->state = TASK_RUNNING; > remove_wait_queue(q->sk.sk_sleep, &wait); > + return ret; > +} > + > +static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, > + unsigned long count, loff_t pos) > +{ > + struct file *file = iocb->ki_filp; > + struct macvtap_queue *q = file->private_data; > + ssize_t len, ret = 0; > > + len = iov_length(iv, count); > + if (len < 0) { > + ret = -EINVAL; > + goto out; > + } > + > + ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); > + ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ > out: > return ret; > } > @@ -538,6 +547,53 @@ static const struct file_operations macvtap_fops = { > #endif > }; > > +static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock, > + struct msghdr *m, size_t total_len) > +{ > + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); > + return macvtap_get_user(q, m->msg_iov, total_len, > + m->msg_flags & MSG_DONTWAIT); > +} > + > +static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock, > + struct msghdr *m, size_t total_len, > + int flags) > +{ > + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); > + int ret; > + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) > + return -EINVAL; > + ret = macvtap_do_read(q, iocb, m->msg_iov, total_len, > + flags & MSG_DONTWAIT); > + if (ret > total_len) { > + m->msg_flags |= MSG_TRUNC; > + ret = flags & MSG_TRUNC ? ret : total_len; > + } > + return ret; > +} > + > +/* Ops structure to mimic raw sockets with tun */ > +static const struct proto_ops macvtap_socket_ops = { > + .sendmsg = macvtap_sendmsg, > + .recvmsg = macvtap_recvmsg, > +}; > + > +/* Get an underlying socket object from tun file. Returns error unless file is > + * attached to a device. The returned object works like a packet socket, it > + * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for > + * holding a reference to the file for as long as the socket is in use. */ > +struct socket *macvtap_get_socket(struct file *file) > +{ > + struct macvtap_queue *q; > + if (file->f_op != &macvtap_fops) > + return ERR_PTR(-EINVAL); > + q = file->private_data; > + if (!q) > + return ERR_PTR(-EBADFD); > + return &q->sock; > +} > +EXPORT_SYMBOL_GPL(macvtap_get_socket); > + > static int macvtap_init(void) > { > int err; > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index 4c89283..91a324c 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -22,6 +22,7 @@ > #include <linux/if_packet.h> > #include <linux/if_arp.h> > #include <linux/if_tun.h> > +#include <linux/if_macvlan.h> > > #include <net/sock.h> > > @@ -452,13 +453,16 @@ err: > return ERR_PTR(r); > } > > -static struct socket *get_tun_socket(int fd) > +static struct socket *get_tap_socket(int fd) > { > struct file *file = fget(fd); > struct socket *sock; > if (!file) > return ERR_PTR(-EBADF); > sock = tun_get_socket(file); > + if (!IS_ERR(sock)) > + return sock; > + sock = macvtap_get_socket(file); > if (IS_ERR(sock)) > fput(file); > return sock; > @@ -473,7 +477,7 @@ static struct socket *get_socket(int fd) > sock = get_raw_socket(fd); > if (!IS_ERR(sock)) > return sock; > - sock = get_tun_socket(fd); > + sock = get_tap_socket(fd); > if (!IS_ERR(sock)) > return sock; > return ERR_PTR(-ENOTSOCK); This will also need a dependency on macvtap in Kconfig. See how it's done for tun. > diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h > index 51f1512..7d7f1e3 100644 > --- a/include/linux/if_macvlan.h > +++ b/include/linux/if_macvlan.h > @@ -7,6 +7,19 @@ > #include <linux/netlink.h> > #include <net/netlink.h> > > +#if defined(CONFIG_MACVTAP) || defined(CONFIG_MACVTAP_MODULE) > +struct socket *macvtap_get_socket(struct file *); > +#else > +#include <linux/err.h> > +#include <linux/errno.h> > +struct file; > +struct socket; > +static inline struct socket *macvtap_get_socket(struct file *f) > +{ > + return ERR_PTR(-EINVAL); > +} > +#endif /* CONFIG_MACVTAP */ > + > struct macvlan_port; > struct macvtap_queue; > > -- > 1.6.3.3 > > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sunday 14 February 2010, Michael S. Tsirkin wrote: > > @@ -473,7 +477,7 @@ static struct socket *get_socket(int fd) > > sock = get_raw_socket(fd); > > if (!IS_ERR(sock)) > > return sock; > > - sock = get_tun_socket(fd); > > + sock = get_tap_socket(fd); > > if (!IS_ERR(sock)) > > return sock; > > return ERR_PTR(-ENOTSOCK); > > This will also need a dependency on macvtap in Kconfig. > See how it's done for tun. Ok, I'll add that. Thanks, Arnd
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 7050997..e354501 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -58,6 +58,8 @@ static unsigned int macvtap_major; static struct class *macvtap_class; static struct cdev macvtap_cdev; +static const struct proto_ops macvtap_socket_ops; + /* * RCU usage: * The macvtap_queue and the macvlan_dev are loosely coupled, the @@ -176,7 +178,7 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) return -ENOLINK; skb_queue_tail(&q->sk.sk_receive_queue, skb); - wake_up(q->sk.sk_sleep); + wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); return 0; } @@ -242,7 +244,7 @@ static void macvtap_sock_write_space(struct sock *sk) return; if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync(sk->sk_sleep); + wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); } static int macvtap_open(struct inode *inode, struct file *file) @@ -270,6 +272,8 @@ static int macvtap_open(struct inode *inode, struct file *file) init_waitqueue_head(&q->sock.wait); q->sock.type = SOCK_RAW; q->sock.state = SS_CONNECTED; + q->sock.file = file; + q->sock.ops = &macvtap_socket_ops; sock_init_data(&q->sock, &q->sk); q->sk.sk_write_space = macvtap_sock_write_space; @@ -387,32 +391,20 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, rcu_read_lock_bh(); vlan = rcu_dereference(q->vlan); - macvlan_count_rx(vlan, len, ret == 0, 0); + if (vlan) + macvlan_count_rx(vlan, len, ret == 0, 0); rcu_read_unlock_bh(); return ret ? ret : len; } -static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, - unsigned long count, loff_t pos) +static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, + const struct iovec *iv, unsigned long len, + int noblock) { - struct file *file = iocb->ki_filp; - struct macvtap_queue *q = file->private_data; - DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; - ssize_t len, ret = 0; - - if (!q) { - ret = -ENOLINK; - goto out; - } - - len = iov_length(iv, count); - if (len < 0) { - ret = -EINVAL; - goto out; - } + ssize_t ret = 0; add_wait_queue(q->sk.sk_sleep, &wait); while (len) { @@ -421,7 +413,7 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, /* Read frames from the queue */ skb = skb_dequeue(&q->sk.sk_receive_queue); if (!skb) { - if (file->f_flags & O_NONBLOCK) { + if (noblock) { ret = -EAGAIN; break; } @@ -440,7 +432,24 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, current->state = TASK_RUNNING; remove_wait_queue(q->sk.sk_sleep, &wait); + return ret; +} + +static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, + unsigned long count, loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct macvtap_queue *q = file->private_data; + ssize_t len, ret = 0; + len = iov_length(iv, count); + if (len < 0) { + ret = -EINVAL; + goto out; + } + + ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); + ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ out: return ret; } @@ -538,6 +547,53 @@ static const struct file_operations macvtap_fops = { #endif }; +static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) +{ + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); + return macvtap_get_user(q, m->msg_iov, total_len, + m->msg_flags & MSG_DONTWAIT); +} + +static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len, + int flags) +{ + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); + int ret; + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) + return -EINVAL; + ret = macvtap_do_read(q, iocb, m->msg_iov, total_len, + flags & MSG_DONTWAIT); + if (ret > total_len) { + m->msg_flags |= MSG_TRUNC; + ret = flags & MSG_TRUNC ? ret : total_len; + } + return ret; +} + +/* Ops structure to mimic raw sockets with tun */ +static const struct proto_ops macvtap_socket_ops = { + .sendmsg = macvtap_sendmsg, + .recvmsg = macvtap_recvmsg, +}; + +/* Get an underlying socket object from tun file. Returns error unless file is + * attached to a device. The returned object works like a packet socket, it + * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for + * holding a reference to the file for as long as the socket is in use. */ +struct socket *macvtap_get_socket(struct file *file) +{ + struct macvtap_queue *q; + if (file->f_op != &macvtap_fops) + return ERR_PTR(-EINVAL); + q = file->private_data; + if (!q) + return ERR_PTR(-EBADFD); + return &q->sock; +} +EXPORT_SYMBOL_GPL(macvtap_get_socket); + static int macvtap_init(void) { int err; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4c89283..91a324c 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -22,6 +22,7 @@ #include <linux/if_packet.h> #include <linux/if_arp.h> #include <linux/if_tun.h> +#include <linux/if_macvlan.h> #include <net/sock.h> @@ -452,13 +453,16 @@ err: return ERR_PTR(r); } -static struct socket *get_tun_socket(int fd) +static struct socket *get_tap_socket(int fd) { struct file *file = fget(fd); struct socket *sock; if (!file) return ERR_PTR(-EBADF); sock = tun_get_socket(file); + if (!IS_ERR(sock)) + return sock; + sock = macvtap_get_socket(file); if (IS_ERR(sock)) fput(file); return sock; @@ -473,7 +477,7 @@ static struct socket *get_socket(int fd) sock = get_raw_socket(fd); if (!IS_ERR(sock)) return sock; - sock = get_tun_socket(fd); + sock = get_tap_socket(fd); if (!IS_ERR(sock)) return sock; return ERR_PTR(-ENOTSOCK); diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 51f1512..7d7f1e3 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -7,6 +7,19 @@ #include <linux/netlink.h> #include <net/netlink.h> +#if defined(CONFIG_MACVTAP) || defined(CONFIG_MACVTAP_MODULE) +struct socket *macvtap_get_socket(struct file *); +#else +#include <linux/err.h> +#include <linux/errno.h> +struct file; +struct socket; +static inline struct socket *macvtap_get_socket(struct file *f) +{ + return ERR_PTR(-EINVAL); +} +#endif /* CONFIG_MACVTAP */ + struct macvlan_port; struct macvtap_queue;
This adds support for passing a macvtap file descriptor into vhost-net, much like we already do for tun/tap. Most of the new code is taken from the respective patch in the tun driver and may get consolidated in the future. Signed-off-by: Arnd Bergmann <arnd@arndb.de> --- drivers/net/macvtap.c | 98 ++++++++++++++++++++++++++++++++++--------- drivers/vhost/net.c | 8 +++- include/linux/if_macvlan.h | 13 ++++++ 3 files changed, 96 insertions(+), 23 deletions(-)