From patchwork Fri Sep 22 08:02:31 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 817376 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3xz5bs0XBcz9s7C for ; Fri, 22 Sep 2017 18:04:25 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751979AbdIVICv (ORCPT ); Fri, 22 Sep 2017 04:02:51 -0400 Received: from mx1.redhat.com ([209.132.183.28]:36254 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751704AbdIVICr (ORCPT ); Fri, 22 Sep 2017 04:02:47 -0400 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 48ACD25B9A; Fri, 22 Sep 2017 08:02:47 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 48ACD25B9A Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=jasowang@redhat.com Received: from jason-ThinkPad-T450s.redhat.com (ovpn-12-19.pek2.redhat.com [10.72.12.19]) by smtp.corp.redhat.com (Postfix) with ESMTP id C615763621; Fri, 22 Sep 2017 08:02:44 +0000 (UTC) From: Jason Wang To: mst@redhat.com, jasowang@redhat.com, virtualization@lists.linux-foundation.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Cc: kvm@vger.kernel.org Subject: [PATCH net-next RFC 1/5] vhost: split out ring head fetching logic Date: Fri, 22 Sep 2017 16:02:31 +0800 Message-Id: <1506067355-5771-2-git-send-email-jasowang@redhat.com> In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com> References: <1506067355-5771-1-git-send-email-jasowang@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.39]); Fri, 22 Sep 2017 08:02:47 +0000 (UTC) Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org This patch splits out ring head fetching logic and leave the descriptor fetching and translation logic. This makes it is possible to batch fetching the descriptor indices. Signed-off-by: Jason Wang --- drivers/vhost/vhost.c | 75 +++++++++++++++++++++++++++++++++------------------ drivers/vhost/vhost.h | 5 ++++ 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index d6dbb28..f87ec75 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1984,41 +1984,23 @@ static int get_indirect(struct vhost_virtqueue *vq, return 0; } -/* This looks in the virtqueue and for the first available buffer, and converts - * it to an iovec for convenient access. Since descriptors consist of some - * number of output then some number of input descriptors, it's actually two - * iovecs, but we pack them into one and note how many of each there were. - * - * This function returns the descriptor number found, or vq->num (which is - * never a valid descriptor number) if none was found. A negative code is - * returned on error. */ -int vhost_get_vq_desc(struct vhost_virtqueue *vq, - struct iovec iov[], unsigned int iov_size, - unsigned int *out_num, unsigned int *in_num, - struct vhost_log *log, unsigned int *log_num) +static unsigned int vhost_get_vq_head(struct vhost_virtqueue *vq, int *err) { - struct vring_desc desc; - unsigned int i, head, found = 0; - u16 last_avail_idx; - __virtio16 avail_idx; - __virtio16 ring_head; - int ret, access; - - /* Check it isn't doing very strange things with descriptor numbers. */ - last_avail_idx = vq->last_avail_idx; + u16 last_avail_idx = vq->last_avail_idx; + __virtio16 avail_idx, ring_head; if (vq->avail_idx == vq->last_avail_idx) { if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) { vq_err(vq, "Failed to access avail idx at %p\n", &vq->avail->idx); - return -EFAULT; + goto err; } vq->avail_idx = vhost16_to_cpu(vq, avail_idx); if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { vq_err(vq, "Guest moved used index from %u to %u", last_avail_idx, vq->avail_idx); - return -EFAULT; + goto err; } /* If there's nothing new since last we looked, return @@ -2040,13 +2022,35 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, &vq->avail->ring[last_avail_idx % vq->num]); - return -EFAULT; + goto err; } - head = vhost16_to_cpu(vq, ring_head); + return vhost16_to_cpu(vq, ring_head); +err: + *err = -EFAULT; + return 0; +} + +/* This looks in the virtqueue and for the first available buffer, and converts + * it to an iovec for convenient access. Since descriptors consist of some + * number of output then some number of input descriptors, it's actually two + * iovecs, but we pack them into one and note how many of each there were. + * + * This function returns the descriptor number found, or vq->num (which is + * never a valid descriptor number) if none was found. A negative code is + * returned on error. */ +int __vhost_get_vq_desc(struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num, + __virtio16 head) +{ + struct vring_desc desc; + unsigned int i, found = 0; + int ret = 0, access; /* If their number is silly, that's an error. */ - if (unlikely(head >= vq->num)) { + if (unlikely(head > vq->num)) { vq_err(vq, "Guest says index %u > %u is available", head, vq->num); return -EINVAL; @@ -2133,6 +2137,25 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); return head; } +EXPORT_SYMBOL_GPL(__vhost_get_vq_desc); + +int vhost_get_vq_desc(struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num) +{ + int ret = 0; + unsigned int head = vhost_get_vq_head(vq, &ret); + + if (ret) + return ret; + + if (head == vq->num) + return head; + + return __vhost_get_vq_desc(vq, iov, iov_size, out_num, in_num, + log, log_num, head); +} EXPORT_SYMBOL_GPL(vhost_get_vq_desc); /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index d59a9cc..39ff897 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -191,6 +191,11 @@ int vhost_get_vq_desc(struct vhost_virtqueue *, struct iovec iov[], unsigned int iov_count, unsigned int *out_num, unsigned int *in_num, struct vhost_log *log, unsigned int *log_num); +int __vhost_get_vq_desc(struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_count, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num, + __virtio16 ring_head); void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); int vhost_vq_init_access(struct vhost_virtqueue *); From patchwork Fri Sep 22 08:02:32 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 817374 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3xz5bB6KJlz9sNc for ; Fri, 22 Sep 2017 18:03:50 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752013AbdIVIC4 (ORCPT ); Fri, 22 Sep 2017 04:02:56 -0400 Received: from mx1.redhat.com ([209.132.183.28]:55846 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751995AbdIVICv (ORCPT ); Fri, 22 Sep 2017 04:02:51 -0400 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 2DBA3C0546D1; Fri, 22 Sep 2017 08:02:51 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 2DBA3C0546D1 Authentication-Results: ext-mx08.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx08.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=jasowang@redhat.com Received: from jason-ThinkPad-T450s.redhat.com (ovpn-12-19.pek2.redhat.com [10.72.12.19]) by smtp.corp.redhat.com (Postfix) with ESMTP id D7AA962679; Fri, 22 Sep 2017 08:02:47 +0000 (UTC) From: Jason Wang To: mst@redhat.com, jasowang@redhat.com, virtualization@lists.linux-foundation.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Cc: kvm@vger.kernel.org Subject: [PATCH net-next RFC 2/5] vhost: introduce helper to prefetch desc index Date: Fri, 22 Sep 2017 16:02:32 +0800 Message-Id: <1506067355-5771-3-git-send-email-jasowang@redhat.com> In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com> References: <1506067355-5771-1-git-send-email-jasowang@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.32]); Fri, 22 Sep 2017 08:02:51 +0000 (UTC) Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org This patch introduces vhost_prefetch_desc_indices() which could batch descriptor indices fetching and used ring updating. This intends to reduce the cache misses of indices fetching and updating and reduce cache line bounce when virtqueue is almost full. copy_to_user() was used in order to benefit from modern cpus that support fast string copy. Batched virtqueue processing will be the first user. Signed-off-by: Jason Wang --- drivers/vhost/vhost.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/vhost/vhost.h | 3 +++ 2 files changed, 58 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index f87ec75..8424166d 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2437,6 +2437,61 @@ struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, } EXPORT_SYMBOL_GPL(vhost_dequeue_msg); +int vhost_prefetch_desc_indices(struct vhost_virtqueue *vq, + struct vring_used_elem *heads, + u16 num, bool used_update) +{ + int ret, ret2; + u16 last_avail_idx, last_used_idx, total, copied; + __virtio16 avail_idx; + struct vring_used_elem __user *used; + int i; + + if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) { + vq_err(vq, "Failed to access avail idx at %p\n", + &vq->avail->idx); + return -EFAULT; + } + last_avail_idx = vq->last_avail_idx & (vq->num - 1); + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); + total = vq->avail_idx - vq->last_avail_idx; + ret = total = min(total, num); + + for (i = 0; i < ret; i++) { + ret2 = vhost_get_avail(vq, heads[i].id, + &vq->avail->ring[last_avail_idx]); + if (unlikely(ret2)) { + vq_err(vq, "Failed to get descriptors\n"); + return -EFAULT; + } + last_avail_idx = (last_avail_idx + 1) & (vq->num - 1); + } + + if (!used_update) + return ret; + + last_used_idx = vq->last_used_idx & (vq->num - 1); + while (total) { + copied = min((u16)(vq->num - last_used_idx), total); + ret2 = vhost_copy_to_user(vq, + &vq->used->ring[last_used_idx], + &heads[ret - total], + copied * sizeof(*used)); + + if (unlikely(ret2)) { + vq_err(vq, "Failed to update used ring!\n"); + return -EFAULT; + } + + last_used_idx = 0; + total -= copied; + } + + /* Only get avail ring entries after they have been exposed by guest. */ + smp_rmb(); + return ret; +} +EXPORT_SYMBOL(vhost_prefetch_desc_indices); static int __init vhost_init(void) { diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 39ff897..16c2cb6 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -228,6 +228,9 @@ ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, ssize_t vhost_chr_write_iter(struct vhost_dev *dev, struct iov_iter *from); int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled); +int vhost_prefetch_desc_indices(struct vhost_virtqueue *vq, + struct vring_used_elem *heads, + u16 num, bool used_update); #define vq_err(vq, fmt, ...) do { \ pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ From patchwork Fri Sep 22 08:02:33 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 817375 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3xz5bf2zcWz9sP1 for ; Fri, 22 Sep 2017 18:04:13 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751875AbdIVID7 (ORCPT ); Fri, 22 Sep 2017 04:03:59 -0400 Received: from mx1.redhat.com ([209.132.183.28]:57206 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751704AbdIVICy (ORCPT ); Fri, 22 Sep 2017 04:02:54 -0400 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 405DD1F57C; Fri, 22 Sep 2017 08:02:54 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 405DD1F57C Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=jasowang@redhat.com Received: from jason-ThinkPad-T450s.redhat.com (ovpn-12-19.pek2.redhat.com [10.72.12.19]) by smtp.corp.redhat.com (Postfix) with ESMTP id BDA926266E; Fri, 22 Sep 2017 08:02:51 +0000 (UTC) From: Jason Wang To: mst@redhat.com, jasowang@redhat.com, virtualization@lists.linux-foundation.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Cc: kvm@vger.kernel.org Subject: [PATCH net-next RFC 3/5] vhost: introduce vhost_add_used_idx() Date: Fri, 22 Sep 2017 16:02:33 +0800 Message-Id: <1506067355-5771-4-git-send-email-jasowang@redhat.com> In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com> References: <1506067355-5771-1-git-send-email-jasowang@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.30]); Fri, 22 Sep 2017 08:02:54 +0000 (UTC) Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org This patch introduces a helper which just increase the used idx. This will be used in pair with vhost_prefetch_desc_indices() by batching code. Signed-off-by: Jason Wang Reviewed-by: Stefan Hajnoczi --- drivers/vhost/vhost.c | 33 +++++++++++++++++++++++++++++++++ drivers/vhost/vhost.h | 1 + 2 files changed, 34 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 8424166d..6532cda 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2178,6 +2178,39 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) } EXPORT_SYMBOL_GPL(vhost_add_used); +int vhost_add_used_idx(struct vhost_virtqueue *vq, int n) +{ + u16 old, new; + + old = vq->last_used_idx; + new = (vq->last_used_idx += n); + /* If the driver never bothers to signal in a very long while, + * used index might wrap around. If that happens, invalidate + * signalled_used index we stored. TODO: make sure driver + * signals at least once in 2^16 and remove this. + */ + if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) + vq->signalled_used_valid = false; + + /* Make sure buffer is written before we update index. */ + smp_wmb(); + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), + &vq->used->idx)) { + vq_err(vq, "Failed to increment used idx"); + return -EFAULT; + } + if (unlikely(vq->log_used)) { + /* Log used index update. */ + log_write(vq->log_base, + vq->log_addr + offsetof(struct vring_used, idx), + sizeof(vq->used->idx)); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } + return 0; +} +EXPORT_SYMBOL_GPL(vhost_add_used_idx); + static int __vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 16c2cb6..5dd6c05 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -199,6 +199,7 @@ int __vhost_get_vq_desc(struct vhost_virtqueue *vq, void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); int vhost_vq_init_access(struct vhost_virtqueue *); +int vhost_add_used_idx(struct vhost_virtqueue *vq, int n); int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); From patchwork Fri Sep 22 08:02:34 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 817373 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3xz5Zw5wyCz9sNc for ; Fri, 22 Sep 2017 18:03:36 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752052AbdIVIDA (ORCPT ); Fri, 22 Sep 2017 04:03:00 -0400 Received: from mx1.redhat.com ([209.132.183.28]:36662 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751995AbdIVIC5 (ORCPT ); Fri, 22 Sep 2017 04:02:57 -0400 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 71C8B61462; Fri, 22 Sep 2017 08:02:57 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 71C8B61462 Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=jasowang@redhat.com Received: from jason-ThinkPad-T450s.redhat.com (ovpn-12-19.pek2.redhat.com [10.72.12.19]) by smtp.corp.redhat.com (Postfix) with ESMTP id 0E5E16266E; Fri, 22 Sep 2017 08:02:54 +0000 (UTC) From: Jason Wang To: mst@redhat.com, jasowang@redhat.com, virtualization@lists.linux-foundation.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Cc: kvm@vger.kernel.org Subject: [PATCH net-next RFC 4/5] vhost_net: rename VHOST_RX_BATCH to VHOST_NET_BATCH Date: Fri, 22 Sep 2017 16:02:34 +0800 Message-Id: <1506067355-5771-5-git-send-email-jasowang@redhat.com> In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com> References: <1506067355-5771-1-git-send-email-jasowang@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.39]); Fri, 22 Sep 2017 08:02:57 +0000 (UTC) Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Signed-off-by: Jason Wang --- drivers/vhost/net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 58585ec..c89640e 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -87,7 +87,7 @@ struct vhost_net_ubuf_ref { struct vhost_virtqueue *vq; }; -#define VHOST_RX_BATCH 64 +#define VHOST_NET_BATCH 64 struct vhost_net_buf { struct sk_buff **queue; int tail; @@ -159,7 +159,7 @@ static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq) rxq->head = 0; rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue, - VHOST_RX_BATCH); + VHOST_NET_BATCH); return rxq->tail; } @@ -912,7 +912,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) return -ENOMEM; } - queue = kmalloc_array(VHOST_RX_BATCH, sizeof(struct sk_buff *), + queue = kmalloc_array(VHOST_NET_BATCH, sizeof(struct sk_buff *), GFP_KERNEL); if (!queue) { kfree(vqs); From patchwork Fri Sep 22 08:02:35 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 817372 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3xz5Zb3hncz9sNc for ; Fri, 22 Sep 2017 18:03:19 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752076AbdIVIDG (ORCPT ); Fri, 22 Sep 2017 04:03:06 -0400 Received: from mx1.redhat.com ([209.132.183.28]:35108 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751995AbdIVIDC (ORCPT ); Fri, 22 Sep 2017 04:03:02 -0400 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id AA2BB7E45B; Fri, 22 Sep 2017 08:03:02 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com AA2BB7E45B Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=jasowang@redhat.com Received: from jason-ThinkPad-T450s.redhat.com (ovpn-12-19.pek2.redhat.com [10.72.12.19]) by smtp.corp.redhat.com (Postfix) with ESMTP id 148C062675; Fri, 22 Sep 2017 08:02:57 +0000 (UTC) From: Jason Wang To: mst@redhat.com, jasowang@redhat.com, virtualization@lists.linux-foundation.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Cc: kvm@vger.kernel.org Subject: [PATCH net-next RFC 5/5] vhost_net: basic tx virtqueue batched processing Date: Fri, 22 Sep 2017 16:02:35 +0800 Message-Id: <1506067355-5771-6-git-send-email-jasowang@redhat.com> In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com> References: <1506067355-5771-1-git-send-email-jasowang@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.27]); Fri, 22 Sep 2017 08:03:02 +0000 (UTC) Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org This patch implements basic batched processing of tx virtqueue by prefetching desc indices and updating used ring in a batch. For non-zerocopy case, vq->heads were used for storing the prefetched indices and updating used ring. It is also a requirement for doing more batching on top. For zerocopy case and for simplicity, batched processing were simply disabled by only fetching and processing one descriptor at a time, this could be optimized in the future. XDP_DROP (without touching skb) on tun (with Moongen in guest) with zercopy disabled: Intel(R) Xeon(R) CPU E5-2650 0 @ 2.00GHz: Before: 3.20Mpps After: 3.90Mpps (+22%) No differences were seen with zerocopy enabled. Signed-off-by: Jason Wang --- drivers/vhost/net.c | 215 ++++++++++++++++++++++++++++---------------------- drivers/vhost/vhost.c | 2 +- 2 files changed, 121 insertions(+), 96 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index c89640e..c439892 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -408,27 +408,25 @@ static int vhost_net_enable_vq(struct vhost_net *n, return vhost_poll_start(poll, sock->file); } -static int vhost_net_tx_get_vq_desc(struct vhost_net *net, - struct vhost_virtqueue *vq, - struct iovec iov[], unsigned int iov_size, - unsigned int *out_num, unsigned int *in_num) +static bool vhost_net_tx_avail(struct vhost_net *net, + struct vhost_virtqueue *vq) { unsigned long uninitialized_var(endtime); - int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), - out_num, in_num, NULL, NULL); - if (r == vq->num && vq->busyloop_timeout) { - preempt_disable(); - endtime = busy_clock() + vq->busyloop_timeout; - while (vhost_can_busy_poll(vq->dev, endtime) && - vhost_vq_avail_empty(vq->dev, vq)) - cpu_relax(); - preempt_enable(); - r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), - out_num, in_num, NULL, NULL); - } + if (!vq->busyloop_timeout) + return false; - return r; + if (!vhost_vq_avail_empty(vq->dev, vq)) + return true; + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + vhost_vq_avail_empty(vq->dev, vq)) + cpu_relax(); + preempt_enable(); + + return !vhost_vq_avail_empty(vq->dev, vq); } static bool vhost_exceeds_maxpend(struct vhost_net *net) @@ -446,8 +444,9 @@ static void handle_tx(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; + struct vring_used_elem used, *heads = vq->heads; unsigned out, in; - int head; + int avails, head; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, @@ -461,6 +460,7 @@ static void handle_tx(struct vhost_net *net) struct socket *sock; struct vhost_net_ubuf_ref *uninitialized_var(ubufs); bool zcopy, zcopy_used; + int i, batched = VHOST_NET_BATCH; mutex_lock(&vq->mutex); sock = vq->private_data; @@ -475,6 +475,12 @@ static void handle_tx(struct vhost_net *net) hdr_size = nvq->vhost_hlen; zcopy = nvq->ubufs; + /* Disable zerocopy batched fetching for simplicity */ + if (zcopy) { + heads = &used; + batched = 1; + } + for (;;) { /* Release DMAs done buffers first */ if (zcopy) @@ -486,95 +492,114 @@ static void handle_tx(struct vhost_net *net) if (unlikely(vhost_exceeds_maxpend(net))) break; - head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, - ARRAY_SIZE(vq->iov), - &out, &in); + avails = vhost_prefetch_desc_indices(vq, heads, batched, !zcopy); /* On error, stop handling until the next kick. */ - if (unlikely(head < 0)) + if (unlikely(avails < 0)) break; - /* Nothing new? Wait for eventfd to tell us they refilled. */ - if (head == vq->num) { + /* Nothing new? Busy poll for a while or wait for + * eventfd to tell us they refilled. */ + if (!avails) { + if (vhost_net_tx_avail(net, vq)) + continue; if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); continue; } break; } - if (in) { - vq_err(vq, "Unexpected descriptor format for TX: " - "out %d, int %d\n", out, in); - break; - } - /* Skip header. TODO: support TSO. */ - len = iov_length(vq->iov, out); - iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len); - iov_iter_advance(&msg.msg_iter, hdr_size); - /* Sanity check */ - if (!msg_data_left(&msg)) { - vq_err(vq, "Unexpected header len for TX: " - "%zd expected %zd\n", - len, hdr_size); - break; - } - len = msg_data_left(&msg); - - zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN - && (nvq->upend_idx + 1) % UIO_MAXIOV != - nvq->done_idx - && vhost_net_tx_select_zcopy(net); - - /* use msg_control to pass vhost zerocopy ubuf info to skb */ - if (zcopy_used) { - struct ubuf_info *ubuf; - ubuf = nvq->ubuf_info + nvq->upend_idx; - - vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); - vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; - ubuf->callback = vhost_zerocopy_callback; - ubuf->ctx = nvq->ubufs; - ubuf->desc = nvq->upend_idx; - refcount_set(&ubuf->refcnt, 1); - msg.msg_control = ubuf; - msg.msg_controllen = sizeof(ubuf); - ubufs = nvq->ubufs; - atomic_inc(&ubufs->refcount); - nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; - } else { - msg.msg_control = NULL; - ubufs = NULL; - } + for (i = 0; i < avails; i++) { + head = __vhost_get_vq_desc(vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in, NULL, NULL, + vhost16_to_cpu(vq, heads[i].id)); + if (in) { + vq_err(vq, "Unexpected descriptor format for " + "TX: out %d, int %d\n", out, in); + goto out; + } - total_len += len; - if (total_len < VHOST_NET_WEIGHT && - !vhost_vq_avail_empty(&net->dev, vq) && - likely(!vhost_exceeds_maxpend(net))) { - msg.msg_flags |= MSG_MORE; - } else { - msg.msg_flags &= ~MSG_MORE; - } + /* Skip header. TODO: support TSO. */ + len = iov_length(vq->iov, out); + iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len); + iov_iter_advance(&msg.msg_iter, hdr_size); + /* Sanity check */ + if (!msg_data_left(&msg)) { + vq_err(vq, "Unexpected header len for TX: " + "%zd expected %zd\n", + len, hdr_size); + goto out; + } + len = msg_data_left(&msg); - /* TODO: Check specific error and bomb out unless ENOBUFS? */ - err = sock->ops->sendmsg(sock, &msg, len); - if (unlikely(err < 0)) { + zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN + && (nvq->upend_idx + 1) % UIO_MAXIOV != + nvq->done_idx + && vhost_net_tx_select_zcopy(net); + + /* use msg_control to pass vhost zerocopy ubuf + * info to skb + */ if (zcopy_used) { - vhost_net_ubuf_put(ubufs); - nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) - % UIO_MAXIOV; + struct ubuf_info *ubuf; + ubuf = nvq->ubuf_info + nvq->upend_idx; + + vq->heads[nvq->upend_idx].id = + cpu_to_vhost32(vq, head); + vq->heads[nvq->upend_idx].len = + VHOST_DMA_IN_PROGRESS; + ubuf->callback = vhost_zerocopy_callback; + ubuf->ctx = nvq->ubufs; + ubuf->desc = nvq->upend_idx; + refcount_set(&ubuf->refcnt, 1); + msg.msg_control = ubuf; + msg.msg_controllen = sizeof(ubuf); + ubufs = nvq->ubufs; + atomic_inc(&ubufs->refcount); + nvq->upend_idx = + (nvq->upend_idx + 1) % UIO_MAXIOV; + } else { + msg.msg_control = NULL; + ubufs = NULL; + } + + total_len += len; + if (total_len < VHOST_NET_WEIGHT && + !vhost_vq_avail_empty(&net->dev, vq) && + likely(!vhost_exceeds_maxpend(net))) { + msg.msg_flags |= MSG_MORE; + } else { + msg.msg_flags &= ~MSG_MORE; + } + + /* TODO: Check specific error and bomb out + * unless ENOBUFS? + */ + err = sock->ops->sendmsg(sock, &msg, len); + if (unlikely(err < 0)) { + if (zcopy_used) { + vhost_net_ubuf_put(ubufs); + nvq->upend_idx = + ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV; + } + vhost_discard_vq_desc(vq, 1); + goto out; + } + if (err != len) + pr_debug("Truncated TX packet: " + " len %d != %zd\n", err, len); + if (!zcopy) { + vhost_add_used_idx(vq, 1); + vhost_signal(&net->dev, vq); + } else if (!zcopy_used) { + vhost_add_used_and_signal(&net->dev, + vq, head, 0); + } else + vhost_zerocopy_signal_used(net, vq); + vhost_net_tx_packet(net); + if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + vhost_poll_queue(&vq->poll); + goto out; } - vhost_discard_vq_desc(vq, 1); - break; - } - if (err != len) - pr_debug("Truncated TX packet: " - " len %d != %zd\n", err, len); - if (!zcopy_used) - vhost_add_used_and_signal(&net->dev, vq, head, 0); - else - vhost_zerocopy_signal_used(net, vq); - vhost_net_tx_packet(net); - if (unlikely(total_len >= VHOST_NET_WEIGHT)) { - vhost_poll_queue(&vq->poll); - break; } } out: diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 6532cda..8764df5 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -392,7 +392,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) vq->indirect = kmalloc(sizeof *vq->indirect * UIO_MAXIOV, GFP_KERNEL); vq->log = kmalloc(sizeof *vq->log * UIO_MAXIOV, GFP_KERNEL); - vq->heads = kmalloc(sizeof *vq->heads * UIO_MAXIOV, GFP_KERNEL); + vq->heads = kzalloc(sizeof *vq->heads * UIO_MAXIOV, GFP_KERNEL); if (!vq->indirect || !vq->log || !vq->heads) goto err_nomem; }