Patchwork [1/2,kvm/virtio] : make virtio support NUMA attr

login
register
mail settings
Submitter Liu Ping Fan
Date May 17, 2012, 9:20 a.m.
Message ID <1337246456-30909-4-git-send-email-kernelfans@gmail.com>
Download mbox | patch
Permalink /patch/159868/
State New
Headers show

Comments

Liu Ping Fan - May 17, 2012, 9:20 a.m.
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

For each numa node reported by vhost, we alloc a pair of i/o vq,
and assign them msix IRQ, and set irq affinity to a set of vcpu
in the same node.
Also we alloc vqs on PAGE_SIZE align, so they will be allocated by
host when pg fault happen on different node.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 drivers/virtio/virtio.c       |    2 +-
 drivers/virtio/virtio_pci.c   |   35 +++++++++++++++++++++++++++++++++--
 drivers/virtio/virtio_ring.c  |    9 ++++++---
 include/linux/virtio.h        |    9 +++++++++
 include/linux/virtio_config.h |    1 +
 include/linux/virtio_pci.h    |    9 +++++++++
 6 files changed, 59 insertions(+), 6 deletions(-)

Patch

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 984c501..79e873f 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -136,7 +136,7 @@  static int virtio_dev_probe(struct device *_d)
 			set_bit(i, dev->features);
 
 	dev->config->finalize_features(dev);
-
+	dev->config->get_numa_map(dev);
 	err = drv->probe(dev);
 	if (err)
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 2e03d41..5bb8a97 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -129,6 +129,24 @@  static void vp_finalize_features(struct virtio_device *vdev)
 	iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
 }
 
+static void vp_get_numa_map(struct virtio_device *vdev)
+{
+	int i, cnt,  sz = 32;
+	int cur, prev = 0;
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	/* We only support 32 numa bits. */
+	vdev->allow_map = ioread32(vp_dev->ioaddr+VIRTIO_PCI_NUMA_MAP);
+	for (i = 0; i < sz; i++) {
+		cur = find_next_bit(&vdev->allow_map, sz, prev);
+		prev = cur;
+		if (cur >= sz)
+			break;
+		cnt++;
+	}
+	vdev->node_cnt = cnt;
+}
+
 /* virtio config->get() implementation */
 static void vp_get(struct virtio_device *vdev, unsigned offset,
 		   void *buf, unsigned len)
@@ -516,6 +534,8 @@  static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	u16 msix_vec;
 	int i, err, nvectors, allocated_vectors;
+	int irq, next, prev = 0;
+	struct cpumask *mask;
 
 	if (!use_msix) {
 		/* Old style: one normal interrupt for change and all vqs. */
@@ -562,14 +582,24 @@  static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			 sizeof *vp_dev->msix_names,
 			 "%s-%s",
 			 dev_name(&vp_dev->vdev.dev), names[i]);
-		err = request_irq(vp_dev->msix_entries[msix_vec].vector,
-				  vring_interrupt, 0,
+		irq = vp_dev->msix_entries[msix_vec].vector;
+		err = request_irq(irq, vring_interrupt, 0,
 				  vp_dev->msix_names[msix_vec],
 				  vqs[i]);
 		if (err) {
 			vp_del_vq(vqs[i]);
 			goto error_find;
 		}
+		if (i == vdev->node_cnt)
+			prev = 0;
+		/* fix me the @size */
+		next = find_next_bit(vdev->allow_map, 64, prev);
+		prev = next;
+		if (next < 64) {
+			mask = vnode_to_vcpumask(next);
+			mask = cpumask_and(mask, cpu_online_mask, mask);
+			irq_set_affinity(irq, mask);
+		}
 	}
 	return 0;
 
@@ -619,6 +649,7 @@  static struct virtio_config_ops virtio_pci_config_ops = {
 	.del_vqs	= vp_del_vqs,
 	.get_features	= vp_get_features,
 	.finalize_features = vp_finalize_features,
+	.get_numa_map = vp_get_numa_map,
 	.bus_name	= vp_bus_name,
 };
 
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5aa43c3..5baa949 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -626,15 +626,18 @@  struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      const char *name)
 {
 	struct vring_virtqueue *vq;
-	unsigned int i;
+	unsigned int i, size, max;
 
 	/* We assume num is a power of 2. */
 	if (num & (num - 1)) {
 		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
 		return NULL;
 	}
-
-	vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
+	size = PAGE_ALIGN (sizeof(*vq) + sizeof(void *)*num);
+	/* Allocate on PAGE boundary, so host can locate them at proper
+	 * node
+	 */
+	vq = kmalloc(size, GFP_KERNEL);
 	if (!vq)
 		return NULL;
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 8efd28a..ec992c9 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -9,6 +9,12 @@ 
 #include <linux/mod_devicetable.h>
 #include <linux/gfp.h>
 
+struct virtio_node {
+	int node_id;
+	struct virtqueue *rvq;
+	struct virtqueue *svq;
+};
+
 /**
  * virtqueue - a queue to register buffers for sending or receiving.
  * @list: the chain of virtqueues for this device
@@ -22,6 +28,7 @@  struct virtqueue {
 	void (*callback)(struct virtqueue *vq);
 	const char *name;
 	struct virtio_device *vdev;
+	struct virtio_node *node;
 	void *priv;
 };
 
@@ -66,6 +73,8 @@  struct virtio_device {
 	struct virtio_device_id id;
 	struct virtio_config_ops *config;
 	struct list_head vqs;
+	int node_cnt;
+	unsigned long allow_map;
 	/* Note that this is a Linux set_bit-style bitmap. */
 	unsigned long features[1];
 	void *priv;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7323a33..5e2fd77 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -124,6 +124,7 @@  struct virtio_config_ops {
 	void (*del_vqs)(struct virtio_device *);
 	u32 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
+	void (*get_numa_map)(struct virtio_device *vdev);
 	const char *(*bus_name)(struct virtio_device *vdev);
 };
 
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index ea66f3f..1426717 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -78,9 +78,18 @@ 
 /* Vector value used to disable MSI for queue */
 #define VIRTIO_MSI_NO_VECTOR            0xffff
 
+#ifdef VIRTIO_NUMA
+/* 32bits to show allowed numa */
+#define VIRTIO_PCI_NUMA_MAP         24
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG(dev)		28
+#else
 /* The remaining space is defined by each driver as the per-driver
  * configuration space */
 #define VIRTIO_PCI_CONFIG(dev)		((dev)->msix_enabled ? 24 : 20)
+#endif
 
 /* Virtio ABI version, this must match exactly */
 #define VIRTIO_PCI_ABI_VERSION		0