From patchwork Thu Oct 22 16:43:35 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mark McLoughlin X-Patchwork-Id: 36704 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 3BF42B7BB0 for ; Fri, 23 Oct 2009 04:02:14 +1100 (EST) Received: from localhost ([127.0.0.1]:55776 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1N113D-00039S-Hz for incoming@patchwork.ozlabs.org; Thu, 22 Oct 2009 13:02:11 -0400 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1N10nV-0007F8-G9 for qemu-devel@nongnu.org; Thu, 22 Oct 2009 12:45:57 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1N10nL-0006zu-TG for qemu-devel@nongnu.org; Thu, 22 Oct 2009 12:45:52 -0400 Received: from [199.232.76.173] (port=47951 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1N10nL-0006ys-1s for qemu-devel@nongnu.org; Thu, 22 Oct 2009 12:45:47 -0400 Received: from mx1.redhat.com ([209.132.183.28]:16634) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1N10nJ-0003FP-44 for qemu-devel@nongnu.org; Thu, 22 Oct 2009 12:45:45 -0400 Received: from int-mx05.intmail.prod.int.phx2.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.18]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id n9MGjhi6011663 for ; Thu, 22 Oct 2009 12:45:43 -0400 Received: from blaa.localdomain (ovpn01.gateway.prod.ext.phx2.redhat.com [10.5.9.1]) by int-mx05.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id n9MGjfIX008533; Thu, 22 Oct 2009 12:45:42 -0400 Received: by blaa.localdomain (Postfix, from userid 500) id 5B35C5A254; Thu, 22 Oct 2009 17:43:50 +0100 (IST) From: Mark McLoughlin To: qemu-devel@nongnu.org Date: Thu, 22 Oct 2009 17:43:35 +0100 Message-Id: <1256229830-28066-5-git-send-email-markmc@redhat.com> In-Reply-To: <1256229830-28066-1-git-send-email-markmc@redhat.com> References: <1256229830-28066-1-git-send-email-markmc@redhat.com> X-Scanned-By: MIMEDefang 2.67 on 10.5.11.18 X-detected-operating-system: by monty-python.gnu.org: Genre and OS details not recognized. Cc: Mark McLoughlin Subject: [Qemu-devel] [PATCH 04/19] net: enable IFF_VNET_HDR on tap fds if available X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org For now, we just add an empty header before writing and strip the header after reading. We really only want IFF_VNET_HDR when virtio_net is using it, but it would significantly complicate matters to try and do that. There should be little or no performance impact with always adding headers. Signed-off-by: Mark McLoughlin --- net.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++--------- tap-linux.h | 14 ++++++++++ 2 files changed, 84 insertions(+), 13 deletions(-) diff --git a/net.c b/net.c index 728941a..356a280 100644 --- a/net.c +++ b/net.c @@ -1245,14 +1245,20 @@ void do_info_usernet(Monitor *mon) #if !defined(_WIN32) +/* Maximum GSO packet size (64k) plus plenty of room for + * the ethernet and virtio_net headers + */ +#define TAP_BUFSIZE (4096 + 65536) + typedef struct TAPState { VLANClientState *vc; int fd; char down_script[1024]; char down_script_arg[128]; - uint8_t buf[4096]; + uint8_t buf[TAP_BUFSIZE]; unsigned int read_poll : 1; unsigned int write_poll : 1; + unsigned int has_vnet_hdr : 1; } TAPState; static int launch_script(const char *setup_script, const char *ifname, int fd); @@ -1311,15 +1317,33 @@ static ssize_t tap_receive_iov(VLANClientState *vc, const struct iovec *iov, int iovcnt) { TAPState *s = vc->opaque; + const struct iovec *iovp = iov; + struct iovec iov_copy[iovcnt + 1]; + struct virtio_net_hdr hdr = { 0, }; - return tap_write_packet(s, iov, iovcnt); + if (s->has_vnet_hdr) { + iov_copy[0].iov_base = &hdr; + iov_copy[0].iov_len = sizeof(hdr); + memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); + iovp = iov_copy; + iovcnt++; + } + + return tap_write_packet(s, iovp, iovcnt); } static ssize_t tap_receive(VLANClientState *vc, const uint8_t *buf, size_t size) { TAPState *s = vc->opaque; - struct iovec iov[1]; + struct iovec iov[2]; int iovcnt = 0; + struct virtio_net_hdr hdr = { 0, }; + + if (s->has_vnet_hdr) { + iov[iovcnt].iov_base = &hdr; + iov[iovcnt].iov_len = sizeof(hdr); + iovcnt++; + } iov[iovcnt].iov_base = (char *)buf; iov[iovcnt].iov_len = size; @@ -1365,12 +1389,19 @@ static void tap_send(void *opaque) int size; do { + uint8_t *buf = s->buf; + size = tap_read_packet(s->fd, s->buf, sizeof(s->buf)); if (size <= 0) { break; } - size = qemu_send_packet_async(s->vc, s->buf, size, tap_send_completed); + if (s->has_vnet_hdr) { + buf += sizeof(struct virtio_net_hdr); + size -= sizeof(struct virtio_net_hdr); + } + + size = qemu_send_packet_async(s->vc, buf, size, tap_send_completed); if (size == 0) { tap_read_poll(s, 0); } @@ -1400,6 +1431,18 @@ static int tap_set_sndbuf(TAPState *s, QemuOpts *opts) return 0; } +static int tap_probe_vnet_hdr(int fd) +{ + struct ifreq ifr; + + if (ioctl(fd, TUNGETIFF, &ifr) != 0) { + qemu_error("TUNGETIFF ioctl() failed: %s\n", strerror(errno)); + return 0; + } + + return ifr.ifr_flags & IFF_VNET_HDR; +} + static void tap_cleanup(VLANClientState *vc) { TAPState *s = vc->opaque; @@ -1420,12 +1463,14 @@ static void tap_cleanup(VLANClientState *vc) static TAPState *net_tap_fd_init(VLANState *vlan, const char *model, const char *name, - int fd) + int fd, + int vnet_hdr) { TAPState *s; s = qemu_mallocz(sizeof(TAPState)); s->fd = fd; + s->has_vnet_hdr = vnet_hdr != 0; s->vc = qemu_new_vlan_client(vlan, NULL, model, name, NULL, tap_receive, tap_receive_iov, tap_cleanup, s); @@ -1435,7 +1480,7 @@ static TAPState *net_tap_fd_init(VLANState *vlan, } #if defined (CONFIG_BSD) || defined (__FreeBSD_kernel__) -static int tap_open(char *ifname, int ifname_size) +static int tap_open(char *ifname, int ifname_size, int *vnet_hdr) { int fd; char *dev; @@ -1577,7 +1622,7 @@ static int tap_alloc(char *dev, size_t dev_size) return tap_fd; } -static int tap_open(char *ifname, int ifname_size) +static int tap_open(char *ifname, int ifname_size, int *vnet_hdr) { char dev[10]=""; int fd; @@ -1590,13 +1635,13 @@ static int tap_open(char *ifname, int ifname_size) return fd; } #elif defined (_AIX) -static int tap_open(char *ifname, int ifname_size) +static int tap_open(char *ifname, int ifname_size, int *vnet_hdr) { fprintf (stderr, "no tap on AIX\n"); return -1; } #else -static int tap_open(char *ifname, int ifname_size) +static int tap_open(char *ifname, int ifname_size, int *vnet_hdr) { struct ifreq ifr; int fd, ret; @@ -1608,6 +1653,17 @@ static int tap_open(char *ifname, int ifname_size) } memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + + { + unsigned int features; + + if (ioctl(fd, TUNGETFEATURES, &features) == 0 && + features & IFF_VNET_HDR) { + *vnet_hdr = 1; + ifr.ifr_flags |= IFF_VNET_HDR; + } + } + if (ifname[0] != '\0') pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname); else @@ -1673,14 +1729,15 @@ static TAPState *net_tap_init(VLANState *vlan, const char *model, const char *setup_script, const char *down_script) { TAPState *s; - int fd; + int fd, vnet_hdr; char ifname[128]; if (ifname1 != NULL) pstrcpy(ifname, sizeof(ifname), ifname1); else ifname[0] = '\0'; - TFR(fd = tap_open(ifname, sizeof(ifname))); + vnet_hdr = 0; + TFR(fd = tap_open(ifname, sizeof(ifname), &vnet_hdr)); if (fd < 0) return NULL; @@ -1690,7 +1747,7 @@ static TAPState *net_tap_init(VLANState *vlan, const char *model, launch_script(setup_script, ifname, fd)) { return NULL; } - s = net_tap_fd_init(vlan, model, name, fd); + s = net_tap_fd_init(vlan, model, name, fd, vnet_hdr); snprintf(s->vc->info_str, sizeof(s->vc->info_str), "ifname=%s,script=%s,downscript=%s", ifname, setup_script, down_script); @@ -2644,7 +2701,7 @@ static int net_init_tap(QemuOpts *opts, fcntl(fd, F_SETFL, O_NONBLOCK); - s = net_tap_fd_init(vlan, "tap", name, fd); + s = net_tap_fd_init(vlan, "tap", name, fd, tap_probe_vnet_hdr(fd)); if (!s) { close(fd); } diff --git a/tap-linux.h b/tap-linux.h index cd07ea8..8e75348 100644 --- a/tap-linux.h +++ b/tap-linux.h @@ -16,14 +16,28 @@ #ifndef QEMU_TAP_H #define QEMU_TAP_H +#include #include /* Ioctl defines */ #define TUNSETIFF _IOW('T', 202, int) +#define TUNGETFEATURES _IOR('T', 207, unsigned int) +#define TUNGETIFF _IOR('T', 210, unsigned int) #define TUNSETSNDBUF _IOW('T', 212, int) /* TUNSETIFF ifr flags */ #define IFF_TAP 0x0002 #define IFF_NO_PI 0x1000 +#define IFF_VNET_HDR 0x4000 + +struct virtio_net_hdr +{ + uint8_t flags; + uint8_t gso_type; + uint16_t hdr_len; + uint16_t gso_size; + uint16_t csum_start; + uint16_t csum_offset; +}; #endif /* QEMU_TAP_H */