From patchwork Wed Aug 4 16:15:59 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Krishna Kumar X-Patchwork-Id: 60870 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3562EB6F10 for ; Thu, 5 Aug 2010 02:16:17 +1000 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756437Ab0HDQQI (ORCPT ); Wed, 4 Aug 2010 12:16:08 -0400 Received: from e28smtp05.in.ibm.com ([122.248.162.5]:49649 "EHLO e28smtp05.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755659Ab0HDQQF (ORCPT ); Wed, 4 Aug 2010 12:16:05 -0400 Received: from d28relay05.in.ibm.com (d28relay05.in.ibm.com [9.184.220.62]) by e28smtp05.in.ibm.com (8.14.4/8.13.1) with ESMTP id o74GG1Vs012882 for ; Wed, 4 Aug 2010 21:46:01 +0530 Received: from d28av03.in.ibm.com (d28av03.in.ibm.com [9.184.220.65]) by d28relay05.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id o74GG1as3293426 for ; Wed, 4 Aug 2010 21:46:01 +0530 Received: from d28av03.in.ibm.com (loopback [127.0.0.1]) by d28av03.in.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id o74GG0gF009014 for ; Thu, 5 Aug 2010 02:16:01 +1000 Received: from krkumar2.in.ibm.com ([9.124.220.122]) by d28av03.in.ibm.com (8.14.4/8.13.1/NCO v10.0 AVin) with ESMTP id o74GFxiC009002; Thu, 5 Aug 2010 02:15:59 +1000 From: Krishna Kumar To: davem@davemloft.net, arnd@arndb.de Cc: mst@redhat.com, netdev@vger.kernel.org, xiaosuo@gmail.com, bhutchings@solarflare.com, Krishna Kumar , therbert@google.com Date: Wed, 04 Aug 2010 21:45:59 +0530 Message-Id: <20100804161559.3814.52060.sendpatchset@krkumar2.in.ibm.com> In-Reply-To: <20100804161552.3814.56839.sendpatchset@krkumar2.in.ibm.com> References: <20100804161552.3814.56839.sendpatchset@krkumar2.in.ibm.com> Subject: [PATCH v5 2/2] macvtap: Implement multiqueue for macvtap driver Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Krishna Kumar Implement multiqueue facility for macvtap driver. The idea is that a macvtap device can be opened multiple times and the fd's can be used to register eg, as backend for vhost. Signed-off-by: Krishna Kumar --- drivers/net/macvtap.c | 99 ++++++++++++++++++++++++++++------- include/linux/if_macvlan.h | 9 ++- 2 files changed, 90 insertions(+), 18 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff -ruNp org/include/linux/if_macvlan.h new/include/linux/if_macvlan.h --- org/include/linux/if_macvlan.h 2010-08-04 20:45:19.000000000 +0530 +++ new/include/linux/if_macvlan.h 2010-08-04 20:45:19.000000000 +0530 @@ -40,6 +40,12 @@ struct macvlan_rx_stats { unsigned long rx_errors; }; +/* + * Maximum times a macvtap device can be opened. This can be used to + * configure the number of receive queue, e.g. for multiqueue virtio. + */ +#define MAX_MACVTAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16) + struct macvlan_dev { struct net_device *dev; struct list_head list; @@ -50,7 +56,8 @@ struct macvlan_dev { enum macvlan_mode mode; int (*receive)(struct sk_buff *skb); int (*forward)(struct net_device *dev, struct sk_buff *skb); - struct macvtap_queue *tap; + struct macvtap_queue *taps[MAX_MACVTAP_QUEUES]; + int numvtaps; }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, diff -ruNp org/drivers/net/macvtap.c new/drivers/net/macvtap.c --- org/drivers/net/macvtap.c 2010-08-04 20:45:19.000000000 +0530 +++ new/drivers/net/macvtap.c 2010-08-04 21:20:49.000000000 +0530 @@ -84,26 +84,45 @@ static const struct proto_ops macvtap_so static DEFINE_SPINLOCK(macvtap_lock); /* - * Choose the next free queue, for now there is only one + * get_slot: return a [unused/occupied] slot in vlan->taps[]: + * - if 'q' is NULL, return the first empty slot; + * - otherwise, return the slot this pointer occupies. */ +static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q) +{ + int i; + + for (i = 0; i < MAX_MACVTAP_QUEUES; i++) { + if (rcu_dereference(vlan->taps[i]) == q) + return i; + } + + /* Should never happen */ + BUG_ON(1); +} + static int macvtap_set_queue(struct net_device *dev, struct file *file, struct macvtap_queue *q) { struct macvlan_dev *vlan = netdev_priv(dev); + int index; int err = -EBUSY; spin_lock(&macvtap_lock); - if (rcu_dereference(vlan->tap)) + if (vlan->numvtaps == MAX_MACVTAP_QUEUES) goto out; err = 0; + index = get_slot(vlan, NULL); rcu_assign_pointer(q->vlan, vlan); - rcu_assign_pointer(vlan->tap, q); + rcu_assign_pointer(vlan->taps[index], q); sock_hold(&q->sk); q->file = file; file->private_data = q; + vlan->numvtaps++; + out: spin_unlock(&macvtap_lock); return err; @@ -124,9 +143,12 @@ static void macvtap_put_queue(struct mac spin_lock(&macvtap_lock); vlan = rcu_dereference(q->vlan); if (vlan) { - rcu_assign_pointer(vlan->tap, NULL); + int index = get_slot(vlan, q); + + rcu_assign_pointer(vlan->taps[index], NULL); rcu_assign_pointer(q->vlan, NULL); sock_put(&q->sk); + --vlan->numvtaps; } spin_unlock(&macvtap_lock); @@ -136,39 +158,82 @@ static void macvtap_put_queue(struct mac } /* - * Since we only support one queue, just dereference the pointer. + * Select a queue based on the rxq of the device on which this packet + * arrived. If the incoming device is not mq, calculate a flow hash + * to select a queue. If all fails, find the first available queue. + * Cache vlan->numvtaps since it can become zero during the execution + * of this function. */ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, struct sk_buff *skb) { struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_queue *tap = NULL; + int numvtaps = vlan->numvtaps; + __u32 rxq; + + if (!numvtaps) + goto out; + + if (likely(skb_rx_queue_recorded(skb))) { + rxq = skb_get_rx_queue(skb); + + while (unlikely(rxq >= numvtaps)) + rxq -= numvtaps; + + tap = rcu_dereference(vlan->taps[rxq]); + if (tap) + goto out; + } + + /* Check if we can use flow to select a queue */ + rxq = skb_get_rxhash(skb); + if (rxq) { + tap = rcu_dereference(vlan->taps[rxq % numvtaps]); + if (tap) + goto out; + } - return rcu_dereference(vlan->tap); + /* Everything failed - find first available queue */ + for (rxq = 0; rxq < MAX_MACVTAP_QUEUES; rxq++) { + tap = rcu_dereference(vlan->taps[rxq]); + if (tap) + break; + } + +out: + return tap; } /* * The net_device is going away, give up the reference - * that it holds on the queue (all the queues one day) - * and safely set the pointer from the queues to NULL. + * that it holds on all queues and safely set the pointer + * from the queues to NULL. */ static void macvtap_del_queues(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); - struct macvtap_queue *q; + struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES]; + int i, j = 0; + /* macvtap_put_queue can free some slots, so go through all slots */ spin_lock(&macvtap_lock); - q = rcu_dereference(vlan->tap); - if (!q) { - spin_unlock(&macvtap_lock); - return; + for (i = 0; i < MAX_MACVTAP_QUEUES && vlan->numvtaps; i++) { + q = rcu_dereference(vlan->taps[i]); + if (q) { + qlist[j++] = q; + rcu_assign_pointer(vlan->taps[i], NULL); + rcu_assign_pointer(q->vlan, NULL); + vlan->numvtaps--; + } } - - rcu_assign_pointer(vlan->tap, NULL); - rcu_assign_pointer(q->vlan, NULL); + BUG_ON(vlan->numvtaps != 0); spin_unlock(&macvtap_lock); synchronize_rcu(); - sock_put(&q->sk); + + for (--j; j >= 0; j--) + sock_put(&qlist[j]->sk); } /*