From patchwork Fri Sep 22 08:02:31 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Jason Wang <jasowang@redhat.com>
X-Patchwork-Id: 817376
X-Patchwork-Delegate: davem@davemloft.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming@ozlabs.org
Delivered-To: patchwork-incoming@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 3xz5bs0XBcz9s7C
	for <patchwork-incoming@ozlabs.org>;
	Fri, 22 Sep 2017 18:04:25 +1000 (AEST)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1751979AbdIVICv (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);
	Fri, 22 Sep 2017 04:02:51 -0400
Received: from mx1.redhat.com ([209.132.183.28]:36254 "EHLO mx1.redhat.com"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1751704AbdIVICr (ORCPT <rfc822;netdev@vger.kernel.org>);
	Fri, 22 Sep 2017 04:02:47 -0400
Received: from smtp.corp.redhat.com
	(int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15])
	(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
	(No client certificate requested)
	by mx1.redhat.com (Postfix) with ESMTPS id 48ACD25B9A;
	Fri, 22 Sep 2017 08:02:47 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 48ACD25B9A
Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com;
	dmarc=none (p=none dis=none) header.from=redhat.com
Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com;
	spf=fail smtp.mailfrom=jasowang@redhat.com
Received: from jason-ThinkPad-T450s.redhat.com (ovpn-12-19.pek2.redhat.com
	[10.72.12.19])
	by smtp.corp.redhat.com (Postfix) with ESMTP id C615763621;
	Fri, 22 Sep 2017 08:02:44 +0000 (UTC)
From: Jason Wang <jasowang@redhat.com>
To: mst@redhat.com, jasowang@redhat.com,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
Subject: [PATCH net-next RFC 1/5] vhost: split out ring head fetching logic
Date: Fri, 22 Sep 2017 16:02:31 +0800
Message-Id: <1506067355-5771-2-git-send-email-jasowang@redhat.com>
In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com>
References: <1506067355-5771-1-git-send-email-jasowang@redhat.com>
X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15
X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16
	(mx1.redhat.com [10.5.110.39]);
	Fri, 22 Sep 2017 08:02:47 +0000 (UTC)
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

This patch splits out ring head fetching logic and leave the
descriptor fetching and translation logic. This makes it is possible
to batch fetching the descriptor indices.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.c | 75 +++++++++++++++++++++++++++++++++------------------
 drivers/vhost/vhost.h |  5 ++++
 2 files changed, 54 insertions(+), 26 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index d6dbb28..f87ec75 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1984,41 +1984,23 @@ static int get_indirect(struct vhost_virtqueue *vq,
 	return 0;
 }
 
-/* This looks in the virtqueue and for the first available buffer, and converts
- * it to an iovec for convenient access.  Since descriptors consist of some
- * number of output then some number of input descriptors, it's actually two
- * iovecs, but we pack them into one and note how many of each there were.
- *
- * This function returns the descriptor number found, or vq->num (which is
- * never a valid descriptor number) if none was found.  A negative code is
- * returned on error. */
-int vhost_get_vq_desc(struct vhost_virtqueue *vq,
-		      struct iovec iov[], unsigned int iov_size,
-		      unsigned int *out_num, unsigned int *in_num,
-		      struct vhost_log *log, unsigned int *log_num)
+static unsigned int vhost_get_vq_head(struct vhost_virtqueue *vq, int *err)
 {
-	struct vring_desc desc;
-	unsigned int i, head, found = 0;
-	u16 last_avail_idx;
-	__virtio16 avail_idx;
-	__virtio16 ring_head;
-	int ret, access;
-
-	/* Check it isn't doing very strange things with descriptor numbers. */
-	last_avail_idx = vq->last_avail_idx;
+	u16 last_avail_idx = vq->last_avail_idx;
+	__virtio16 avail_idx, ring_head;
 
 	if (vq->avail_idx == vq->last_avail_idx) {
 		if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) {
 			vq_err(vq, "Failed to access avail idx at %p\n",
 				&vq->avail->idx);
-			return -EFAULT;
+			goto err;
 		}
 		vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
 
 		if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
 			vq_err(vq, "Guest moved used index from %u to %u",
 				last_avail_idx, vq->avail_idx);
-			return -EFAULT;
+			goto err;
 		}
 
 		/* If there's nothing new since last we looked, return
@@ -2040,13 +2022,35 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 		vq_err(vq, "Failed to read head: idx %d address %p\n",
 		       last_avail_idx,
 		       &vq->avail->ring[last_avail_idx % vq->num]);
-		return -EFAULT;
+		goto err;
 	}
 
-	head = vhost16_to_cpu(vq, ring_head);
+	return vhost16_to_cpu(vq, ring_head);
+err:
+	*err = -EFAULT;
+	return 0;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access.  Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found.  A negative code is
+ * returned on error. */
+int __vhost_get_vq_desc(struct vhost_virtqueue *vq,
+			struct iovec iov[], unsigned int iov_size,
+			unsigned int *out_num, unsigned int *in_num,
+			struct vhost_log *log, unsigned int *log_num,
+			__virtio16 head)
+{
+	struct vring_desc desc;
+	unsigned int i, found = 0;
+	int ret = 0, access;
 
 	/* If their number is silly, that's an error. */
-	if (unlikely(head >= vq->num)) {
+	if (unlikely(head > vq->num)) {
 		vq_err(vq, "Guest says index %u > %u is available",
 		       head, vq->num);
 		return -EINVAL;
@@ -2133,6 +2137,25 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 	BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
 	return head;
 }
+EXPORT_SYMBOL_GPL(__vhost_get_vq_desc);
+
+int vhost_get_vq_desc(struct vhost_virtqueue *vq,
+		      struct iovec iov[], unsigned int iov_size,
+		      unsigned int *out_num, unsigned int *in_num,
+		      struct vhost_log *log, unsigned int *log_num)
+{
+	int ret = 0;
+	unsigned int head = vhost_get_vq_head(vq, &ret);
+
+	if (ret)
+		return ret;
+
+	if (head == vq->num)
+		return head;
+
+	return __vhost_get_vq_desc(vq, iov, iov_size, out_num, in_num,
+				   log, log_num, head);
+}
 EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
 
 /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index d59a9cc..39ff897 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -191,6 +191,11 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
 		      struct iovec iov[], unsigned int iov_count,
 		      unsigned int *out_num, unsigned int *in_num,
 		      struct vhost_log *log, unsigned int *log_num);
+int __vhost_get_vq_desc(struct vhost_virtqueue *vq,
+			struct iovec iov[], unsigned int iov_count,
+			unsigned int *out_num, unsigned int *in_num,
+			struct vhost_log *log, unsigned int *log_num,
+			__virtio16 ring_head);
 void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
 
 int vhost_vq_init_access(struct vhost_virtqueue *);

From patchwork Fri Sep 22 08:02:32 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Jason Wang <jasowang@redhat.com>
X-Patchwork-Id: 817374
X-Patchwork-Delegate: davem@davemloft.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming@ozlabs.org
Delivered-To: patchwork-incoming@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 3xz5bB6KJlz9sNc
	for <patchwork-incoming@ozlabs.org>;
	Fri, 22 Sep 2017 18:03:50 +1000 (AEST)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1752013AbdIVIC4 (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);
	Fri, 22 Sep 2017 04:02:56 -0400
Received: from mx1.redhat.com ([209.132.183.28]:55846 "EHLO mx1.redhat.com"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1751995AbdIVICv (ORCPT <rfc822;netdev@vger.kernel.org>);
	Fri, 22 Sep 2017 04:02:51 -0400
Received: from smtp.corp.redhat.com
	(int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15])
	(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
	(No client certificate requested)
	by mx1.redhat.com (Postfix) with ESMTPS id 2DBA3C0546D1;
	Fri, 22 Sep 2017 08:02:51 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 2DBA3C0546D1
Authentication-Results: ext-mx08.extmail.prod.ext.phx2.redhat.com;
	dmarc=none (p=none dis=none) header.from=redhat.com
Authentication-Results: ext-mx08.extmail.prod.ext.phx2.redhat.com;
	spf=fail smtp.mailfrom=jasowang@redhat.com
Received: from jason-ThinkPad-T450s.redhat.com (ovpn-12-19.pek2.redhat.com
	[10.72.12.19])
	by smtp.corp.redhat.com (Postfix) with ESMTP id D7AA962679;
	Fri, 22 Sep 2017 08:02:47 +0000 (UTC)
From: Jason Wang <jasowang@redhat.com>
To: mst@redhat.com, jasowang@redhat.com,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
Subject: [PATCH net-next RFC 2/5] vhost: introduce helper to prefetch desc
	index
Date: Fri, 22 Sep 2017 16:02:32 +0800
Message-Id: <1506067355-5771-3-git-send-email-jasowang@redhat.com>
In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com>
References: <1506067355-5771-1-git-send-email-jasowang@redhat.com>
X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15
X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16
	(mx1.redhat.com [10.5.110.32]);
	Fri, 22 Sep 2017 08:02:51 +0000 (UTC)
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

This patch introduces vhost_prefetch_desc_indices() which could batch
descriptor indices fetching and used ring updating. This intends to
reduce the cache misses of indices fetching and updating and reduce
cache line bounce when virtqueue is almost full. copy_to_user() was
used in order to benefit from modern cpus that support fast string
copy. Batched virtqueue processing will be the first user.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/vhost/vhost.h |  3 +++
 2 files changed, 58 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index f87ec75..8424166d 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2437,6 +2437,61 @@ struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
 }
 EXPORT_SYMBOL_GPL(vhost_dequeue_msg);
 
+int vhost_prefetch_desc_indices(struct vhost_virtqueue *vq,
+				struct vring_used_elem *heads,
+				u16 num, bool used_update)
+{
+	int ret, ret2;
+	u16 last_avail_idx, last_used_idx, total, copied;
+	__virtio16 avail_idx;
+	struct vring_used_elem __user *used;
+	int i;
+
+	if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) {
+		vq_err(vq, "Failed to access avail idx at %p\n",
+		       &vq->avail->idx);
+		return -EFAULT;
+	}
+	last_avail_idx = vq->last_avail_idx & (vq->num - 1);
+	vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+	total = vq->avail_idx - vq->last_avail_idx;
+	ret = total = min(total, num);
+
+	for (i = 0; i < ret; i++) {
+		ret2 = vhost_get_avail(vq, heads[i].id,
+				      &vq->avail->ring[last_avail_idx]);
+		if (unlikely(ret2)) {
+			vq_err(vq, "Failed to get descriptors\n");
+			return -EFAULT;
+		}
+		last_avail_idx = (last_avail_idx + 1) & (vq->num - 1);
+	}
+
+	if (!used_update)
+		return ret;
+
+	last_used_idx = vq->last_used_idx & (vq->num - 1);
+	while (total) {
+		copied = min((u16)(vq->num - last_used_idx), total);
+		ret2 = vhost_copy_to_user(vq,
+					  &vq->used->ring[last_used_idx],
+					  &heads[ret - total],
+					  copied * sizeof(*used));
+
+		if (unlikely(ret2)) {
+			vq_err(vq, "Failed to update used ring!\n");
+			return -EFAULT;
+		}
+
+		last_used_idx = 0;
+		total -= copied;
+	}
+
+	/* Only get avail ring entries after they have been exposed by guest. */
+	smp_rmb();
+	return ret;
+}
+EXPORT_SYMBOL(vhost_prefetch_desc_indices);
 
 static int __init vhost_init(void)
 {
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 39ff897..16c2cb6 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -228,6 +228,9 @@ ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,
 ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
 			     struct iov_iter *from);
 int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
+int vhost_prefetch_desc_indices(struct vhost_virtqueue *vq,
+				struct vring_used_elem *heads,
+				u16 num, bool used_update);
 
 #define vq_err(vq, fmt, ...) do {                                  \
 		pr_debug(pr_fmt(fmt), ##__VA_ARGS__);       \

From patchwork Fri Sep 22 08:02:33 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Jason Wang <jasowang@redhat.com>
X-Patchwork-Id: 817375
X-Patchwork-Delegate: davem@davemloft.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming@ozlabs.org
Delivered-To: patchwork-incoming@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 3xz5bf2zcWz9sP1
	for <patchwork-incoming@ozlabs.org>;
	Fri, 22 Sep 2017 18:04:13 +1000 (AEST)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1751875AbdIVID7 (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);
	Fri, 22 Sep 2017 04:03:59 -0400
Received: from mx1.redhat.com ([209.132.183.28]:57206 "EHLO mx1.redhat.com"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1751704AbdIVICy (ORCPT <rfc822;netdev@vger.kernel.org>);
	Fri, 22 Sep 2017 04:02:54 -0400
Received: from smtp.corp.redhat.com
	(int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15])
	(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
	(No client certificate requested)
	by mx1.redhat.com (Postfix) with ESMTPS id 405DD1F57C;
	Fri, 22 Sep 2017 08:02:54 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 405DD1F57C
Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com;
	dmarc=none (p=none dis=none) header.from=redhat.com
Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com;
	spf=fail smtp.mailfrom=jasowang@redhat.com
Received: from jason-ThinkPad-T450s.redhat.com (ovpn-12-19.pek2.redhat.com
	[10.72.12.19])
	by smtp.corp.redhat.com (Postfix) with ESMTP id BDA926266E;
	Fri, 22 Sep 2017 08:02:51 +0000 (UTC)
From: Jason Wang <jasowang@redhat.com>
To: mst@redhat.com, jasowang@redhat.com,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
Subject: [PATCH net-next RFC 3/5] vhost: introduce vhost_add_used_idx()
Date: Fri, 22 Sep 2017 16:02:33 +0800
Message-Id: <1506067355-5771-4-git-send-email-jasowang@redhat.com>
In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com>
References: <1506067355-5771-1-git-send-email-jasowang@redhat.com>
X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15
X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16
	(mx1.redhat.com [10.5.110.30]);
	Fri, 22 Sep 2017 08:02:54 +0000 (UTC)
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

This patch introduces a helper which just increase the used idx. This
will be used in pair with vhost_prefetch_desc_indices() by batching
code.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 drivers/vhost/vhost.c | 33 +++++++++++++++++++++++++++++++++
 drivers/vhost/vhost.h |  1 +
 2 files changed, 34 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 8424166d..6532cda 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2178,6 +2178,39 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
 }
 EXPORT_SYMBOL_GPL(vhost_add_used);
 
+int vhost_add_used_idx(struct vhost_virtqueue *vq, int n)
+{
+	u16 old, new;
+
+	old = vq->last_used_idx;
+	new = (vq->last_used_idx += n);
+	/* If the driver never bothers to signal in a very long while,
+	 * used index might wrap around. If that happens, invalidate
+	 * signalled_used index we stored. TODO: make sure driver
+	 * signals at least once in 2^16 and remove this.
+	 */
+	if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
+		vq->signalled_used_valid = false;
+
+	/* Make sure buffer is written before we update index. */
+	smp_wmb();
+	if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
+			   &vq->used->idx)) {
+		vq_err(vq, "Failed to increment used idx");
+		return -EFAULT;
+	}
+	if (unlikely(vq->log_used)) {
+		/* Log used index update. */
+		log_write(vq->log_base,
+			  vq->log_addr + offsetof(struct vring_used, idx),
+			  sizeof(vq->used->idx));
+		if (vq->log_ctx)
+			eventfd_signal(vq->log_ctx, 1);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vhost_add_used_idx);
+
 static int __vhost_add_used_n(struct vhost_virtqueue *vq,
 			    struct vring_used_elem *heads,
 			    unsigned count)
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 16c2cb6..5dd6c05 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -199,6 +199,7 @@ int __vhost_get_vq_desc(struct vhost_virtqueue *vq,
 void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
 
 int vhost_vq_init_access(struct vhost_virtqueue *);
+int vhost_add_used_idx(struct vhost_virtqueue *vq, int n);
 int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
 int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
 		     unsigned count);

From patchwork Fri Sep 22 08:02:34 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Jason Wang <jasowang@redhat.com>
X-Patchwork-Id: 817373
X-Patchwork-Delegate: davem@davemloft.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming@ozlabs.org
Delivered-To: patchwork-incoming@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 3xz5Zw5wyCz9sNc
	for <patchwork-incoming@ozlabs.org>;
	Fri, 22 Sep 2017 18:03:36 +1000 (AEST)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1752052AbdIVIDA (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);
	Fri, 22 Sep 2017 04:03:00 -0400
Received: from mx1.redhat.com ([209.132.183.28]:36662 "EHLO mx1.redhat.com"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1751995AbdIVIC5 (ORCPT <rfc822;netdev@vger.kernel.org>);
	Fri, 22 Sep 2017 04:02:57 -0400
Received: from smtp.corp.redhat.com
	(int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15])
	(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
	(No client certificate requested)
	by mx1.redhat.com (Postfix) with ESMTPS id 71C8B61462;
	Fri, 22 Sep 2017 08:02:57 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 71C8B61462
Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com;
	dmarc=none (p=none dis=none) header.from=redhat.com
Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com;
	spf=fail smtp.mailfrom=jasowang@redhat.com
Received: from jason-ThinkPad-T450s.redhat.com (ovpn-12-19.pek2.redhat.com
	[10.72.12.19])
	by smtp.corp.redhat.com (Postfix) with ESMTP id 0E5E16266E;
	Fri, 22 Sep 2017 08:02:54 +0000 (UTC)
From: Jason Wang <jasowang@redhat.com>
To: mst@redhat.com, jasowang@redhat.com,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
Subject: [PATCH net-next RFC 4/5] vhost_net: rename VHOST_RX_BATCH to
	VHOST_NET_BATCH
Date: Fri, 22 Sep 2017 16:02:34 +0800
Message-Id: <1506067355-5771-5-git-send-email-jasowang@redhat.com>
In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com>
References: <1506067355-5771-1-git-send-email-jasowang@redhat.com>
X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15
X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16
	(mx1.redhat.com [10.5.110.39]);
	Fri, 22 Sep 2017 08:02:57 +0000 (UTC)
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/net.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 58585ec..c89640e 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -87,7 +87,7 @@ struct vhost_net_ubuf_ref {
 	struct vhost_virtqueue *vq;
 };
 
-#define VHOST_RX_BATCH 64
+#define VHOST_NET_BATCH 64
 struct vhost_net_buf {
 	struct sk_buff **queue;
 	int tail;
@@ -159,7 +159,7 @@ static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
 
 	rxq->head = 0;
 	rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue,
-					      VHOST_RX_BATCH);
+					      VHOST_NET_BATCH);
 	return rxq->tail;
 }
 
@@ -912,7 +912,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 		return -ENOMEM;
 	}
 
-	queue = kmalloc_array(VHOST_RX_BATCH, sizeof(struct sk_buff *),
+	queue = kmalloc_array(VHOST_NET_BATCH, sizeof(struct sk_buff *),
 			      GFP_KERNEL);
 	if (!queue) {
 		kfree(vqs);

From patchwork Fri Sep 22 08:02:35 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Jason Wang <jasowang@redhat.com>
X-Patchwork-Id: 817372
X-Patchwork-Delegate: davem@davemloft.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming@ozlabs.org
Delivered-To: patchwork-incoming@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 3xz5Zb3hncz9sNc
	for <patchwork-incoming@ozlabs.org>;
	Fri, 22 Sep 2017 18:03:19 +1000 (AEST)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1752076AbdIVIDG (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);
	Fri, 22 Sep 2017 04:03:06 -0400
Received: from mx1.redhat.com ([209.132.183.28]:35108 "EHLO mx1.redhat.com"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1751995AbdIVIDC (ORCPT <rfc822;netdev@vger.kernel.org>);
	Fri, 22 Sep 2017 04:03:02 -0400
Received: from smtp.corp.redhat.com
	(int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15])
	(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
	(No client certificate requested)
	by mx1.redhat.com (Postfix) with ESMTPS id AA2BB7E45B;
	Fri, 22 Sep 2017 08:03:02 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com AA2BB7E45B
Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com;
	dmarc=none (p=none dis=none) header.from=redhat.com
Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com;
	spf=fail smtp.mailfrom=jasowang@redhat.com
Received: from jason-ThinkPad-T450s.redhat.com (ovpn-12-19.pek2.redhat.com
	[10.72.12.19])
	by smtp.corp.redhat.com (Postfix) with ESMTP id 148C062675;
	Fri, 22 Sep 2017 08:02:57 +0000 (UTC)
From: Jason Wang <jasowang@redhat.com>
To: mst@redhat.com, jasowang@redhat.com,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
Subject: [PATCH net-next RFC 5/5] vhost_net: basic tx virtqueue batched
	processing
Date: Fri, 22 Sep 2017 16:02:35 +0800
Message-Id: <1506067355-5771-6-git-send-email-jasowang@redhat.com>
In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com>
References: <1506067355-5771-1-git-send-email-jasowang@redhat.com>
X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15
X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16
	(mx1.redhat.com [10.5.110.27]);
	Fri, 22 Sep 2017 08:03:02 +0000 (UTC)
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

This patch implements basic batched processing of tx virtqueue by
prefetching desc indices and updating used ring in a batch. For
non-zerocopy case, vq->heads were used for storing the prefetched
indices and updating used ring. It is also a requirement for doing
more batching on top. For zerocopy case and for simplicity, batched
processing were simply disabled by only fetching and processing one
descriptor at a time, this could be optimized in the future.

XDP_DROP (without touching skb) on tun (with Moongen in guest) with
zercopy disabled:

Intel(R) Xeon(R) CPU E5-2650 0 @ 2.00GHz:
Before: 3.20Mpps
After:  3.90Mpps (+22%)

No differences were seen with zerocopy enabled.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/net.c   | 215 ++++++++++++++++++++++++++++----------------------
 drivers/vhost/vhost.c |   2 +-
 2 files changed, 121 insertions(+), 96 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c89640e..c439892 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -408,27 +408,25 @@ static int vhost_net_enable_vq(struct vhost_net *n,
 	return vhost_poll_start(poll, sock->file);
 }
 
-static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
-				    struct vhost_virtqueue *vq,
-				    struct iovec iov[], unsigned int iov_size,
-				    unsigned int *out_num, unsigned int *in_num)
+static bool vhost_net_tx_avail(struct vhost_net *net,
+			       struct vhost_virtqueue *vq)
 {
 	unsigned long uninitialized_var(endtime);
-	int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
-				  out_num, in_num, NULL, NULL);
 
-	if (r == vq->num && vq->busyloop_timeout) {
-		preempt_disable();
-		endtime = busy_clock() + vq->busyloop_timeout;
-		while (vhost_can_busy_poll(vq->dev, endtime) &&
-		       vhost_vq_avail_empty(vq->dev, vq))
-			cpu_relax();
-		preempt_enable();
-		r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
-				      out_num, in_num, NULL, NULL);
-	}
+	if (!vq->busyloop_timeout)
+		return false;
 
-	return r;
+	if (!vhost_vq_avail_empty(vq->dev, vq))
+		return true;
+
+	preempt_disable();
+	endtime = busy_clock() + vq->busyloop_timeout;
+	while (vhost_can_busy_poll(vq->dev, endtime) &&
+		vhost_vq_avail_empty(vq->dev, vq))
+		cpu_relax();
+	preempt_enable();
+
+	return !vhost_vq_avail_empty(vq->dev, vq);
 }
 
 static bool vhost_exceeds_maxpend(struct vhost_net *net)
@@ -446,8 +444,9 @@ static void handle_tx(struct vhost_net *net)
 {
 	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
 	struct vhost_virtqueue *vq = &nvq->vq;
+	struct vring_used_elem used, *heads = vq->heads;
 	unsigned out, in;
-	int head;
+	int avails, head;
 	struct msghdr msg = {
 		.msg_name = NULL,
 		.msg_namelen = 0,
@@ -461,6 +460,7 @@ static void handle_tx(struct vhost_net *net)
 	struct socket *sock;
 	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
 	bool zcopy, zcopy_used;
+	int i, batched = VHOST_NET_BATCH;
 
 	mutex_lock(&vq->mutex);
 	sock = vq->private_data;
@@ -475,6 +475,12 @@ static void handle_tx(struct vhost_net *net)
 	hdr_size = nvq->vhost_hlen;
 	zcopy = nvq->ubufs;
 
+	/* Disable zerocopy batched fetching for simplicity */
+	if (zcopy) {
+		heads = &used;
+		batched = 1;
+	}
+
 	for (;;) {
 		/* Release DMAs done buffers first */
 		if (zcopy)
@@ -486,95 +492,114 @@ static void handle_tx(struct vhost_net *net)
 		if (unlikely(vhost_exceeds_maxpend(net)))
 			break;
 
-		head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
-						ARRAY_SIZE(vq->iov),
-						&out, &in);
+		avails = vhost_prefetch_desc_indices(vq, heads, batched, !zcopy);
 		/* On error, stop handling until the next kick. */
-		if (unlikely(head < 0))
+		if (unlikely(avails < 0))
 			break;
-		/* Nothing new?  Wait for eventfd to tell us they refilled. */
-		if (head == vq->num) {
+		/* Nothing new?  Busy poll for a while or wait for
+		 * eventfd to tell us they refilled. */
+		if (!avails) {
+			if (vhost_net_tx_avail(net, vq))
+				continue;
 			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
 				vhost_disable_notify(&net->dev, vq);
 				continue;
 			}
 			break;
 		}
-		if (in) {
-			vq_err(vq, "Unexpected descriptor format for TX: "
-			       "out %d, int %d\n", out, in);
-			break;
-		}
-		/* Skip header. TODO: support TSO. */
-		len = iov_length(vq->iov, out);
-		iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
-		iov_iter_advance(&msg.msg_iter, hdr_size);
-		/* Sanity check */
-		if (!msg_data_left(&msg)) {
-			vq_err(vq, "Unexpected header len for TX: "
-			       "%zd expected %zd\n",
-			       len, hdr_size);
-			break;
-		}
-		len = msg_data_left(&msg);
-
-		zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
-				   && (nvq->upend_idx + 1) % UIO_MAXIOV !=
-				      nvq->done_idx
-				   && vhost_net_tx_select_zcopy(net);
-
-		/* use msg_control to pass vhost zerocopy ubuf info to skb */
-		if (zcopy_used) {
-			struct ubuf_info *ubuf;
-			ubuf = nvq->ubuf_info + nvq->upend_idx;
-
-			vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
-			vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
-			ubuf->callback = vhost_zerocopy_callback;
-			ubuf->ctx = nvq->ubufs;
-			ubuf->desc = nvq->upend_idx;
-			refcount_set(&ubuf->refcnt, 1);
-			msg.msg_control = ubuf;
-			msg.msg_controllen = sizeof(ubuf);
-			ubufs = nvq->ubufs;
-			atomic_inc(&ubufs->refcount);
-			nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
-		} else {
-			msg.msg_control = NULL;
-			ubufs = NULL;
-		}
+		for (i = 0; i < avails; i++) {
+			head = __vhost_get_vq_desc(vq, vq->iov,
+						   ARRAY_SIZE(vq->iov),
+						   &out, &in, NULL, NULL,
+					       vhost16_to_cpu(vq, heads[i].id));
+			if (in) {
+				vq_err(vq, "Unexpected descriptor format for "
+					   "TX: out %d, int %d\n", out, in);
+				goto out;
+			}
 
-		total_len += len;
-		if (total_len < VHOST_NET_WEIGHT &&
-		    !vhost_vq_avail_empty(&net->dev, vq) &&
-		    likely(!vhost_exceeds_maxpend(net))) {
-			msg.msg_flags |= MSG_MORE;
-		} else {
-			msg.msg_flags &= ~MSG_MORE;
-		}
+			/* Skip header. TODO: support TSO. */
+			len = iov_length(vq->iov, out);
+			iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
+			iov_iter_advance(&msg.msg_iter, hdr_size);
+			/* Sanity check */
+			if (!msg_data_left(&msg)) {
+				vq_err(vq, "Unexpected header len for TX: "
+					"%zd expected %zd\n",
+					len, hdr_size);
+				goto out;
+			}
+			len = msg_data_left(&msg);
 
-		/* TODO: Check specific error and bomb out unless ENOBUFS? */
-		err = sock->ops->sendmsg(sock, &msg, len);
-		if (unlikely(err < 0)) {
+			zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
+				     && (nvq->upend_idx + 1) % UIO_MAXIOV !=
+					nvq->done_idx
+				     && vhost_net_tx_select_zcopy(net);
+
+			/* use msg_control to pass vhost zerocopy ubuf
+			 * info to skb
+			 */
 			if (zcopy_used) {
-				vhost_net_ubuf_put(ubufs);
-				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
-					% UIO_MAXIOV;
+				struct ubuf_info *ubuf;
+				ubuf = nvq->ubuf_info + nvq->upend_idx;
+
+				vq->heads[nvq->upend_idx].id =
+					cpu_to_vhost32(vq, head);
+				vq->heads[nvq->upend_idx].len =
+					VHOST_DMA_IN_PROGRESS;
+				ubuf->callback = vhost_zerocopy_callback;
+				ubuf->ctx = nvq->ubufs;
+				ubuf->desc = nvq->upend_idx;
+				refcount_set(&ubuf->refcnt, 1);
+				msg.msg_control = ubuf;
+				msg.msg_controllen = sizeof(ubuf);
+				ubufs = nvq->ubufs;
+				atomic_inc(&ubufs->refcount);
+				nvq->upend_idx =
+					(nvq->upend_idx + 1) % UIO_MAXIOV;
+			} else {
+				msg.msg_control = NULL;
+				ubufs = NULL;
+			}
+
+			total_len += len;
+			if (total_len < VHOST_NET_WEIGHT &&
+				!vhost_vq_avail_empty(&net->dev, vq) &&
+				likely(!vhost_exceeds_maxpend(net))) {
+				msg.msg_flags |= MSG_MORE;
+			} else {
+				msg.msg_flags &= ~MSG_MORE;
+			}
+
+			/* TODO: Check specific error and bomb out
+			 * unless ENOBUFS?
+			 */
+			err = sock->ops->sendmsg(sock, &msg, len);
+			if (unlikely(err < 0)) {
+				if (zcopy_used) {
+					vhost_net_ubuf_put(ubufs);
+					nvq->upend_idx =
+				   ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV;
+				}
+				vhost_discard_vq_desc(vq, 1);
+				goto out;
+			}
+			if (err != len)
+				pr_debug("Truncated TX packet: "
+					" len %d != %zd\n", err, len);
+			if (!zcopy) {
+				vhost_add_used_idx(vq, 1);
+				vhost_signal(&net->dev, vq);
+			} else if (!zcopy_used) {
+				vhost_add_used_and_signal(&net->dev,
+							  vq, head, 0);
+			} else
+				vhost_zerocopy_signal_used(net, vq);
+			vhost_net_tx_packet(net);
+			if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+				vhost_poll_queue(&vq->poll);
+				goto out;
 			}
-			vhost_discard_vq_desc(vq, 1);
-			break;
-		}
-		if (err != len)
-			pr_debug("Truncated TX packet: "
-				 " len %d != %zd\n", err, len);
-		if (!zcopy_used)
-			vhost_add_used_and_signal(&net->dev, vq, head, 0);
-		else
-			vhost_zerocopy_signal_used(net, vq);
-		vhost_net_tx_packet(net);
-		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
-			vhost_poll_queue(&vq->poll);
-			break;
 		}
 	}
 out:
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 6532cda..8764df5 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -392,7 +392,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
 		vq->indirect = kmalloc(sizeof *vq->indirect * UIO_MAXIOV,
 				       GFP_KERNEL);
 		vq->log = kmalloc(sizeof *vq->log * UIO_MAXIOV, GFP_KERNEL);
-		vq->heads = kmalloc(sizeof *vq->heads * UIO_MAXIOV, GFP_KERNEL);
+		vq->heads = kzalloc(sizeof *vq->heads * UIO_MAXIOV, GFP_KERNEL);
 		if (!vq->indirect || !vq->log || !vq->heads)
 			goto err_nomem;
 	}