diff mbox

[6/8] netfront: multi-page ring support

Message ID 1360944010-15336-7-git-send-email-wei.liu2@citrix.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Wei Liu Feb. 15, 2013, 4 p.m. UTC
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netfront.c |  246 +++++++++++++++++++++++++++++++-------------
 1 file changed, 174 insertions(+), 72 deletions(-)

Comments

Annie.li Feb. 26, 2013, 6:52 a.m. UTC | #1
On 2013-2-16 0:00, Wei Liu wrote:
> Signed-off-by: Wei Liu<wei.liu2@citrix.com>
> ---
>   drivers/net/xen-netfront.c |  246 +++++++++++++++++++++++++++++++-------------
>   1 file changed, 174 insertions(+), 72 deletions(-)
>
> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> index 8bd75a1..de73a71 100644
> --- a/drivers/net/xen-netfront.c
> +++ b/drivers/net/xen-netfront.c
> @@ -67,9 +67,19 @@ struct netfront_cb {
>
>   #define GRANT_INVALID_REF	0
>
> -#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
> -#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
> -#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
> +#define XENNET_MAX_RING_PAGE_ORDER XENBUS_MAX_RING_PAGE_ORDER
> +#define XENNET_MAX_RING_PAGES      (1U<<  XENNET_MAX_RING_PAGE_ORDER)
> +
> +
> +#define NET_TX_RING_SIZE(_nr_pages)			\
> +	__CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE * (_nr_pages))
> +#define NET_RX_RING_SIZE(_nr_pages)			\
> +	__CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE * (_nr_pages))
> +
> +#define XENNET_MAX_TX_RING_SIZE NET_TX_RING_SIZE(XENNET_MAX_RING_PAGES)
> +#define XENNET_MAX_RX_RING_SIZE NET_RX_RING_SIZE(XENNET_MAX_RING_PAGES)
> +
> +#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE(1), 256)

Not using multi-page ring here?
In xennet_create_dev, gnttab_alloc_grant_references allocates 
TX_MAX_TARGET number of grant reference for tx. In 
xennet_release_tx_bufs, NET_TX_RING_SIZE(np->tx_ring_pages) numbers of 
grants are processed. And NET_RX_RING_SIZE(np->tx_ring_pages) is totally 
different from TX_MAX_TARGET if np->rx_ring_pages is not 1. Although 
skb_entry_is_link helps to not release invalid grants, lots of null loop 
seems unnecessary. I think TX_MAX_TARGET should be changed into some 
variableconnected with np->tx_ring_pages. Or you intended to use one 
page ring here?

>
>   struct netfront_stats {
>   	u64			rx_packets;
> @@ -80,6 +90,11 @@ struct netfront_stats {
>   };
>
>   struct netfront_info {
> +	/* Statistics */
> +	struct netfront_stats __percpu *stats;
> +
> +	unsigned long rx_gso_checksum_fixup;
> +
>   	struct list_head list;
>   	struct net_device *netdev;
>
> @@ -90,7 +105,9 @@ struct netfront_info {
>
>   	spinlock_t   tx_lock;
>   	struct xen_netif_tx_front_ring tx;
> -	int tx_ring_ref;
> +	int tx_ring_ref[XENNET_MAX_RING_PAGES];
> +	unsigned int tx_ring_page_order;
> +	unsigned int tx_ring_pages;
>
>   	/*
>   	 * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
> @@ -104,36 +121,33 @@ struct netfront_info {
>   	union skb_entry {
>   		struct sk_buff *skb;
>   		unsigned long link;
> -	} tx_skbs[NET_TX_RING_SIZE];
> +	} tx_skbs[XENNET_MAX_TX_RING_SIZE];
>   	grant_ref_t gref_tx_head;
> -	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
> +	grant_ref_t grant_tx_ref[XENNET_MAX_TX_RING_SIZE];
>   	unsigned tx_skb_freelist;
>
>   	spinlock_t   rx_lock ____cacheline_aligned_in_smp;
>   	struct xen_netif_rx_front_ring rx;
> -	int rx_ring_ref;
> +	int rx_ring_ref[XENNET_MAX_RING_PAGES];
> +	unsigned int rx_ring_page_order;
> +	unsigned int rx_ring_pages;
>
>   	/* Receive-ring batched refills. */
>   #define RX_MIN_TARGET 8
>   #define RX_DFL_MIN_TARGET 64
> -#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
> +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE(1), 256)

Not using multi-page ring here?
(See comments of tx side above)

Thanks
Annie

>   	unsigned rx_min_target, rx_max_target, rx_target;
>   	struct sk_buff_head rx_batch;
>
>   	struct timer_list rx_refill_timer;
>
> -	struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
> +	struct sk_buff *rx_skbs[XENNET_MAX_RX_RING_SIZE];
>   	grant_ref_t gref_rx_head;
> -	grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
> -
> -	unsigned long rx_pfn_array[NET_RX_RING_SIZE];
> -	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
> -	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
> -
> -	/* Statistics */
> -	struct netfront_stats __percpu *stats;
> +	grant_ref_t grant_rx_ref[XENNET_MAX_RX_RING_SIZE];
>
> -	unsigned long rx_gso_checksum_fixup;
> +	unsigned long rx_pfn_array[XENNET_MAX_RX_RING_SIZE];
> +	struct multicall_entry rx_mcl[XENNET_MAX_RX_RING_SIZE+1];
> +	struct mmu_update rx_mmu[XENNET_MAX_RX_RING_SIZE];
>   };
>
>   struct netfront_rx_info {
> @@ -171,15 +185,15 @@ static unsigned short get_id_from_freelist(unsigned *head,
>   	return id;
>   }
>
> -static int xennet_rxidx(RING_IDX idx)
> +static int xennet_rxidx(RING_IDX idx, struct netfront_info *info)
>   {
> -	return idx&  (NET_RX_RING_SIZE - 1);
> +	return idx&  (NET_RX_RING_SIZE(info->rx_ring_pages) - 1);
>   }
>
>   static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
>   					 RING_IDX ri)
>   {
> -	int i = xennet_rxidx(ri);
> +	int i = xennet_rxidx(ri, np);
>   	struct sk_buff *skb = np->rx_skbs[i];
>   	np->rx_skbs[i] = NULL;
>   	return skb;
> @@ -188,7 +202,7 @@ static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
>   static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
>   					    RING_IDX ri)
>   {
> -	int i = xennet_rxidx(ri);
> +	int i = xennet_rxidx(ri, np);
>   	grant_ref_t ref = np->grant_rx_ref[i];
>   	np->grant_rx_ref[i] = GRANT_INVALID_REF;
>   	return ref;
> @@ -301,7 +315,7 @@ no_skb:
>
>   		skb->dev = dev;
>
> -		id = xennet_rxidx(req_prod + i);
> +		id = xennet_rxidx(req_prod + i, np);
>
>   		BUG_ON(np->rx_skbs[id]);
>   		np->rx_skbs[id] = skb;
> @@ -653,7 +667,7 @@ static int xennet_close(struct net_device *dev)
>   static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
>   				grant_ref_t ref)
>   {
> -	int new = xennet_rxidx(np->rx.req_prod_pvt);
> +	int new = xennet_rxidx(np->rx.req_prod_pvt, np);
>
>   	BUG_ON(np->rx_skbs[new]);
>   	np->rx_skbs[new] = skb;
> @@ -1109,7 +1123,7 @@ static void xennet_release_tx_bufs(struct netfront_info *np)
>   	struct sk_buff *skb;
>   	int i;
>
> -	for (i = 0; i<  NET_TX_RING_SIZE; i++) {
> +	for (i = 0; i<  NET_TX_RING_SIZE(np->tx_ring_pages); i++) {
>   		/* Skip over entries which are actually freelist references */
>   		if (skb_entry_is_link(&np->tx_skbs[i]))
>   			continue;
> @@ -1143,7 +1157,7 @@ static void xennet_release_rx_bufs(struct netfront_info *np)
>
>   	spin_lock_bh(&np->rx_lock);
>
> -	for (id = 0; id<  NET_RX_RING_SIZE; id++) {
> +	for (id = 0; id<  NET_RX_RING_SIZE(np->rx_ring_pages); id++) {
>   		ref = np->grant_rx_ref[id];
>   		if (ref == GRANT_INVALID_REF) {
>   			unused++;
> @@ -1324,13 +1338,13 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
>
>   	/* Initialise tx_skbs as a free chain containing every entry. */
>   	np->tx_skb_freelist = 0;
> -	for (i = 0; i<  NET_TX_RING_SIZE; i++) {
> +	for (i = 0; i<  XENNET_MAX_TX_RING_SIZE; i++) {
>   		skb_entry_set_link(&np->tx_skbs[i], i+1);
>   		np->grant_tx_ref[i] = GRANT_INVALID_REF;
>   	}
>
>   	/* Clear out rx_skbs */
> -	for (i = 0; i<  NET_RX_RING_SIZE; i++) {
> +	for (i = 0; i<  XENNET_MAX_RX_RING_SIZE; i++) {
>   		np->rx_skbs[i] = NULL;
>   		np->grant_rx_ref[i] = GRANT_INVALID_REF;
>   	}
> @@ -1428,13 +1442,6 @@ static int netfront_probe(struct xenbus_device *dev,
>   	return err;
>   }
>
> -static void xennet_end_access(int ref, void *page)
> -{
> -	/* This frees the page as a side-effect */
> -	if (ref != GRANT_INVALID_REF)
> -		gnttab_end_foreign_access(ref, 0, (unsigned long)page);
> -}
> -
>   static void xennet_disconnect_backend(struct netfront_info *info)
>   {
>   	/* Stop old i/f to prevent errors whilst we rebuild the state. */
> @@ -1448,12 +1455,12 @@ static void xennet_disconnect_backend(struct netfront_info *info)
>   		unbind_from_irqhandler(info->netdev->irq, info->netdev);
>   	info->evtchn = info->netdev->irq = 0;
>
> -	/* End access and free the pages */
> -	xennet_end_access(info->tx_ring_ref, info->tx.sring);
> -	xennet_end_access(info->rx_ring_ref, info->rx.sring);
> +	xenbus_unmap_ring_vfree(info->xbdev, (void *)info->tx.sring);
> +	free_pages((unsigned long)info->tx.sring, info->tx_ring_page_order);
> +
> +	xenbus_unmap_ring_vfree(info->xbdev, (void *)info->rx.sring);
> +	free_pages((unsigned long)info->rx.sring, info->rx_ring_page_order);
>
> -	info->tx_ring_ref = GRANT_INVALID_REF;
> -	info->rx_ring_ref = GRANT_INVALID_REF;
>   	info->tx.sring = NULL;
>   	info->rx.sring = NULL;
>   }
> @@ -1501,11 +1508,14 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
>   	struct xen_netif_tx_sring *txs;
>   	struct xen_netif_rx_sring *rxs;
>   	int err;
> -	int grefs[1];
>   	struct net_device *netdev = info->netdev;
> +	unsigned int max_tx_ring_page_order, max_rx_ring_page_order;
> +	int i;
>
> -	info->tx_ring_ref = GRANT_INVALID_REF;
> -	info->rx_ring_ref = GRANT_INVALID_REF;
> +	for (i = 0; i<  XENNET_MAX_RING_PAGES; i++) {
> +		info->tx_ring_ref[i] = GRANT_INVALID_REF;
> +		info->rx_ring_ref[i] = GRANT_INVALID_REF;
> +	}
>   	info->rx.sring = NULL;
>   	info->tx.sring = NULL;
>   	netdev->irq = 0;
> @@ -1516,50 +1526,100 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
>   		goto fail;
>   	}
>
> -	txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
> +	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
> +			   "max-tx-ring-page-order", "%u",
> +			&max_tx_ring_page_order);
> +	if (err<  0) {
> +		info->tx_ring_page_order = 0;
> +		dev_info(&dev->dev, "single tx ring\n");
> +	} else {
> +		if (max_tx_ring_page_order>  XENNET_MAX_RING_PAGE_ORDER) {
> +			dev_info(&dev->dev,
> +				 "backend ring page order %d too large, clamp to %d\n",
> +				 max_tx_ring_page_order,
> +				 XENNET_MAX_RING_PAGE_ORDER);
> +			max_tx_ring_page_order = XENNET_MAX_RING_PAGE_ORDER;
> +		}
> +		info->tx_ring_page_order = max_tx_ring_page_order;
> +		dev_info(&dev->dev, "multi-page tx ring, order = %d\n",
> +			 info->tx_ring_page_order);
> +	}
> +	info->tx_ring_pages = (1U<<  info->tx_ring_page_order);
> +
> +	txs = (struct xen_netif_tx_sring *)
> +		__get_free_pages(__GFP_ZERO | GFP_NOIO | __GFP_HIGH,
> +				 info->tx_ring_page_order);
>   	if (!txs) {
>   		err = -ENOMEM;
>   		xenbus_dev_fatal(dev, err, "allocating tx ring page");
>   		goto fail;
>   	}
>   	SHARED_RING_INIT(txs);
> -	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
> +	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE * info->tx_ring_pages);
> +
> +	err = xenbus_grant_ring(dev, txs, info->tx_ring_pages,
> +				info->tx_ring_ref);
> +	if (err<  0)
> +		goto grant_tx_ring_fail;
>
> -	err = xenbus_grant_ring(dev, txs, 1, grefs);
> +	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
> +			   "max-rx-ring-page-order", "%u",
> +			&max_rx_ring_page_order);
>   	if (err<  0) {
> -		free_page((unsigned long)txs);
> -		goto fail;
> +		info->rx_ring_page_order = 0;
> +		dev_info(&dev->dev, "single rx ring\n");
> +	} else {
> +		if (max_rx_ring_page_order>  XENNET_MAX_RING_PAGE_ORDER) {
> +			dev_info(&dev->dev,
> +				 "backend ring page order %d too large, clamp to %d\n",
> +				 max_rx_ring_page_order,
> +				 XENNET_MAX_RING_PAGE_ORDER);
> +			max_rx_ring_page_order = XENNET_MAX_RING_PAGE_ORDER;
> +		}
> +		info->rx_ring_page_order = max_rx_ring_page_order;
> +		dev_info(&dev->dev, "multi-page rx ring, order = %d\n",
> +			 info->rx_ring_page_order);
>   	}
> +	info->rx_ring_pages = (1U<<  info->rx_ring_page_order);
>
> -	info->tx_ring_ref = grefs[0];
> -	rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
> +	rxs = (struct xen_netif_rx_sring *)
> +		__get_free_pages(__GFP_ZERO | GFP_NOIO | __GFP_HIGH,
> +				 info->rx_ring_page_order);
>   	if (!rxs) {
>   		err = -ENOMEM;
>   		xenbus_dev_fatal(dev, err, "allocating rx ring page");
> -		goto fail;
> +		goto alloc_rx_ring_fail;
>   	}
>   	SHARED_RING_INIT(rxs);
> -	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
> +	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE * info->rx_ring_pages);
>
> -	err = xenbus_grant_ring(dev, rxs, 1, grefs);
> -	if (err<  0) {
> -		free_page((unsigned long)rxs);
> -		goto fail;
> -	}
> -	info->rx_ring_ref = grefs[0];
> +	err = xenbus_grant_ring(dev, rxs, info->rx_ring_pages,
> +				info->rx_ring_ref);
> +	if (err<  0)
> +		goto grant_rx_ring_fail;
>
>   	err = xenbus_alloc_evtchn(dev,&info->evtchn);
>   	if (err)
> -		goto fail;
> +		goto alloc_evtchn_fail;
>
>   	err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt,
>   					0, netdev->name, netdev);
>   	if (err<  0)
> -		goto fail;
> +		goto bind_fail;
>   	netdev->irq = err;
>   	return 0;
>
> - fail:
> +bind_fail:
> +	xenbus_free_evtchn(dev, info->evtchn);
> +alloc_evtchn_fail:
> +	xenbus_unmap_ring_vfree(info->xbdev, (void *)info->rx.sring);
> +grant_rx_ring_fail:
> +	free_pages((unsigned long)info->rx.sring, info->rx_ring_page_order);
> +alloc_rx_ring_fail:
> +	xenbus_unmap_ring_vfree(info->xbdev, (void *)info->tx.sring);
> +grant_tx_ring_fail:
> +	free_pages((unsigned long)info->tx.sring, info->tx_ring_page_order);
> +fail:
>   	return err;
>   }
>
> @@ -1570,6 +1630,7 @@ static int talk_to_netback(struct xenbus_device *dev,
>   	const char *message;
>   	struct xenbus_transaction xbt;
>   	int err;
> +	int i;
>
>   	/* Create shared ring, alloc event channel. */
>   	err = setup_netfront(dev, info);
> @@ -1583,18 +1644,58 @@ again:
>   		goto destroy_ring;
>   	}
>
> -	err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
> -			    info->tx_ring_ref);
> -	if (err) {
> -		message = "writing tx ring-ref";
> -		goto abort_transaction;
> +	if (info->tx_ring_page_order == 0) {
> +		err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
> +				    info->tx_ring_ref[0]);
> +		if (err) {
> +			message = "writing tx ring-ref";
> +			goto abort_transaction;
> +		}
> +	} else {
> +		err = xenbus_printf(xbt, dev->nodename, "tx-ring-order", "%u",
> +				    info->tx_ring_page_order);
> +		if (err) {
> +			message = "writing tx-ring-order";
> +			goto abort_transaction;
> +		}
> +		for (i = 0; i<  info->tx_ring_pages; i++) {
> +			char name[sizeof("tx-ring-ref")+3];
> +			snprintf(name, sizeof(name), "tx-ring-ref%u", i);
> +			err = xenbus_printf(xbt, dev->nodename, name, "%u",
> +					    info->tx_ring_ref[i]);
> +			if (err) {
> +				message = "writing tx ring-ref";
> +				goto abort_transaction;
> +			}
> +		}
>   	}
> -	err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
> -			    info->rx_ring_ref);
> -	if (err) {
> -		message = "writing rx ring-ref";
> -		goto abort_transaction;
> +
> +	if (info->rx_ring_page_order == 0) {
> +		err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
> +				    info->rx_ring_ref[0]);
> +		if (err) {
> +			message = "writing rx ring-ref";
> +			goto abort_transaction;
> +		}
> +	} else {
> +		err = xenbus_printf(xbt, dev->nodename, "rx-ring-order", "%u",
> +				    info->rx_ring_page_order);
> +		if (err) {
> +			message = "writing rx-ring-order";
> +			goto abort_transaction;
> +		}
> +		for (i = 0; i<  info->rx_ring_pages; i++) {
> +			char name[sizeof("rx-ring-ref")+3];
> +			snprintf(name, sizeof(name), "rx-ring-ref%u", i);
> +			err = xenbus_printf(xbt, dev->nodename, name, "%u",
> +					    info->rx_ring_ref[i]);
> +			if (err) {
> +				message = "writing rx ring-ref";
> +				goto abort_transaction;
> +			}
> +		}
>   	}
> +
>   	err = xenbus_printf(xbt, dev->nodename,
>   			    "event-channel", "%u", info->evtchn);
>   	if (err) {
> @@ -1681,7 +1782,8 @@ static int xennet_connect(struct net_device *dev)
>   	xennet_release_tx_bufs(np);
>
>   	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
> -	for (requeue_idx = 0, i = 0; i<  NET_RX_RING_SIZE; i++) {
> +	for (requeue_idx = 0, i = 0; i<  NET_RX_RING_SIZE(np->rx_ring_pages);
> +	     i++) {
>   		skb_frag_t *frag;
>   		const struct page *page;
>   		if (!np->rx_skbs[i])
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Liu Feb. 26, 2013, 12:35 p.m. UTC | #2
On Tue, 2013-02-26 at 06:52 +0000, ANNIE LI wrote:
> 
> On 2013-2-16 0:00, Wei Liu wrote:
> > Signed-off-by: Wei Liu<wei.liu2@citrix.com>
> > ---
> >   drivers/net/xen-netfront.c |  246 +++++++++++++++++++++++++++++++-------------
> >   1 file changed, 174 insertions(+), 72 deletions(-)
> >
> > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> > index 8bd75a1..de73a71 100644
> > --- a/drivers/net/xen-netfront.c
> > +++ b/drivers/net/xen-netfront.c
> > @@ -67,9 +67,19 @@ struct netfront_cb {
> >
> >   #define GRANT_INVALID_REF   0
> >
> > -#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
> > -#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
> > -#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
> > +#define XENNET_MAX_RING_PAGE_ORDER XENBUS_MAX_RING_PAGE_ORDER
> > +#define XENNET_MAX_RING_PAGES      (1U<<  XENNET_MAX_RING_PAGE_ORDER)
> > +
> > +
> > +#define NET_TX_RING_SIZE(_nr_pages)                  \
> > +     __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE * (_nr_pages))
> > +#define NET_RX_RING_SIZE(_nr_pages)                  \
> > +     __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE * (_nr_pages))
> > +
> > +#define XENNET_MAX_TX_RING_SIZE NET_TX_RING_SIZE(XENNET_MAX_RING_PAGES)
> > +#define XENNET_MAX_RX_RING_SIZE NET_RX_RING_SIZE(XENNET_MAX_RING_PAGES)
> > +
> > +#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE(1), 256)
> 
> Not using multi-page ring here?
> In xennet_create_dev, gnttab_alloc_grant_references allocates
> TX_MAX_TARGET number of grant reference for tx. In
> xennet_release_tx_bufs, NET_TX_RING_SIZE(np->tx_ring_pages) numbers of
> grants are processed. And NET_RX_RING_SIZE(np->tx_ring_pages) is totally
> different from TX_MAX_TARGET if np->rx_ring_pages is not 1. Although
> skb_entry_is_link helps to not release invalid grants, lots of null loop
> seems unnecessary. I think TX_MAX_TARGET should be changed into some
> variableconnected with np->tx_ring_pages. Or you intended to use one
> page ring here?
> 

Looking back my history, this limitation was introduced because if we
have a multi-page backend and single page frontend, the backend skb
processing could overlap.

I agree with you that this limit should be variable, but as we still use
M:N model, the safe option is to cap this limit to 1 page.

Another option is to check validity of skbs before processing them. I
will look into that as well.

The same reason applies to the RX ring as well.


Wei.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Annie.li Feb. 27, 2013, 7:39 a.m. UTC | #3
On 2013-2-26 20:35, Wei Liu wrote:
> On Tue, 2013-02-26 at 06:52 +0000, ANNIE LI wrote:
>> On 2013-2-16 0:00, Wei Liu wrote:
>>> Signed-off-by: Wei Liu<wei.liu2@citrix.com>
>>> ---
>>>    drivers/net/xen-netfront.c |  246 +++++++++++++++++++++++++++++++-------------
>>>    1 file changed, 174 insertions(+), 72 deletions(-)
>>>
>>> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
>>> index 8bd75a1..de73a71 100644
>>> --- a/drivers/net/xen-netfront.c
>>> +++ b/drivers/net/xen-netfront.c
>>> @@ -67,9 +67,19 @@ struct netfront_cb {
>>>
>>>    #define GRANT_INVALID_REF   0
>>>
>>> -#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
>>> -#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
>>> -#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
>>> +#define XENNET_MAX_RING_PAGE_ORDER XENBUS_MAX_RING_PAGE_ORDER
>>> +#define XENNET_MAX_RING_PAGES      (1U<<   XENNET_MAX_RING_PAGE_ORDER)
>>> +
>>> +
>>> +#define NET_TX_RING_SIZE(_nr_pages)                  \
>>> +     __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE * (_nr_pages))
>>> +#define NET_RX_RING_SIZE(_nr_pages)                  \
>>> +     __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE * (_nr_pages))
>>> +
>>> +#define XENNET_MAX_TX_RING_SIZE NET_TX_RING_SIZE(XENNET_MAX_RING_PAGES)
>>> +#define XENNET_MAX_RX_RING_SIZE NET_RX_RING_SIZE(XENNET_MAX_RING_PAGES)
>>> +
>>> +#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE(1), 256)
>> Not using multi-page ring here?
>> In xennet_create_dev, gnttab_alloc_grant_references allocates
>> TX_MAX_TARGET number of grant reference for tx. In
>> xennet_release_tx_bufs, NET_TX_RING_SIZE(np->tx_ring_pages) numbers of
>> grants are processed. And NET_RX_RING_SIZE(np->tx_ring_pages) is totally
>> different from TX_MAX_TARGET if np->rx_ring_pages is not 1. Although
>> skb_entry_is_link helps to not release invalid grants, lots of null loop
>> seems unnecessary. I think TX_MAX_TARGET should be changed into some
>> variableconnected with np->tx_ring_pages. Or you intended to use one
>> page ring here?
>>
> Looking back my history, this limitation was introduced because if we
> have a multi-page backend and single page frontend, the backend skb
> processing could overlap.

I did not see the overlap you mentioned here in netback. Although 
netback supports multi-page, netback->vif still uses single page if the 
frontend only supports single page. Netfront and netback negotiate this 
through xenstore in your 5/8 patch. The requests and response should not 
have any overlap between netback and netfront. Am I missing something?

>
> I agree with you that this limit should be variable, but as we still use
> M:N model, the safe option is to cap this limit to 1 page.

Yes, M:N model is still used here. But the share ring should be same for 
netback->vif and netfront.

Thanks
Annie

>
> Another option is to check validity of skbs before processing them. I
> will look into that as well.
>
> The same reason applies to the RX ring as well.
>
>
> Wei.
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Liu Feb. 27, 2013, 3:49 p.m. UTC | #4
On Wed, 2013-02-27 at 07:39 +0000, ANNIE LI wrote:
> 
> On 2013-2-26 20:35, Wei Liu wrote:
> > On Tue, 2013-02-26 at 06:52 +0000, ANNIE LI wrote:
> >> On 2013-2-16 0:00, Wei Liu wrote:
> >>> Signed-off-by: Wei Liu<wei.liu2@citrix.com>
> >>> ---
> >>>    drivers/net/xen-netfront.c |  246 +++++++++++++++++++++++++++++++-------------
> >>>    1 file changed, 174 insertions(+), 72 deletions(-)
> >>>
> >>> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> >>> index 8bd75a1..de73a71 100644
> >>> --- a/drivers/net/xen-netfront.c
> >>> +++ b/drivers/net/xen-netfront.c
> >>> @@ -67,9 +67,19 @@ struct netfront_cb {
> >>>
> >>>    #define GRANT_INVALID_REF   0
> >>>
> >>> -#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
> >>> -#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
> >>> -#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
> >>> +#define XENNET_MAX_RING_PAGE_ORDER XENBUS_MAX_RING_PAGE_ORDER
> >>> +#define XENNET_MAX_RING_PAGES      (1U<<   XENNET_MAX_RING_PAGE_ORDER)
> >>> +
> >>> +
> >>> +#define NET_TX_RING_SIZE(_nr_pages)                  \
> >>> +     __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE * (_nr_pages))
> >>> +#define NET_RX_RING_SIZE(_nr_pages)                  \
> >>> +     __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE * (_nr_pages))
> >>> +
> >>> +#define XENNET_MAX_TX_RING_SIZE NET_TX_RING_SIZE(XENNET_MAX_RING_PAGES)
> >>> +#define XENNET_MAX_RX_RING_SIZE NET_RX_RING_SIZE(XENNET_MAX_RING_PAGES)
> >>> +
> >>> +#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE(1), 256)
> >> Not using multi-page ring here?
> >> In xennet_create_dev, gnttab_alloc_grant_references allocates
> >> TX_MAX_TARGET number of grant reference for tx. In
> >> xennet_release_tx_bufs, NET_TX_RING_SIZE(np->tx_ring_pages) numbers of
> >> grants are processed. And NET_RX_RING_SIZE(np->tx_ring_pages) is totally
> >> different from TX_MAX_TARGET if np->rx_ring_pages is not 1. Although
> >> skb_entry_is_link helps to not release invalid grants, lots of null loop
> >> seems unnecessary. I think TX_MAX_TARGET should be changed into some
> >> variableconnected with np->tx_ring_pages. Or you intended to use one
> >> page ring here?
> >>
> > Looking back my history, this limitation was introduced because if we
> > have a multi-page backend and single page frontend, the backend skb
> > processing could overlap.
> 
> I did not see the overlap you mentioned here in netback. Although 
> netback supports multi-page, netback->vif still uses single page if the 
> frontend only supports single page. Netfront and netback negotiate this 
> through xenstore in your 5/8 patch. The requests and response should not 
> have any overlap between netback and netfront. Am I missing something?
> 

I tried to dig up mail archive just now and realized that the bug report
was in private mail exchange with Konrad.

I don't really remember the details now since it is more than one year
old, but you can find trace in Konrad's tree, CS 5b4c3dd5b255. All I can
remember is that this bug was triggered by mixed old/new
frontend/backend.

I think this cap can be removed if we make all buffers in netfront
dynamically allocated.


Wei.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Annie.li Feb. 28, 2013, 5:19 a.m. UTC | #5
On 2013-2-27 23:49, Wei Liu wrote:
> On Wed, 2013-02-27 at 07:39 +0000, ANNIE LI wrote:
>> On 2013-2-26 20:35, Wei Liu wrote:
>>> On Tue, 2013-02-26 at 06:52 +0000, ANNIE LI wrote:
>>>> On 2013-2-16 0:00, Wei Liu wrote:
>>>>> Signed-off-by: Wei Liu<wei.liu2@citrix.com>
>>>>> ---
>>>>>     drivers/net/xen-netfront.c |  246 +++++++++++++++++++++++++++++++-------------
>>>>>     1 file changed, 174 insertions(+), 72 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
>>>>> index 8bd75a1..de73a71 100644
>>>>> --- a/drivers/net/xen-netfront.c
>>>>> +++ b/drivers/net/xen-netfront.c
>>>>> @@ -67,9 +67,19 @@ struct netfront_cb {
>>>>>
>>>>>     #define GRANT_INVALID_REF   0
>>>>>
>>>>> -#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
>>>>> -#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
>>>>> -#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
>>>>> +#define XENNET_MAX_RING_PAGE_ORDER XENBUS_MAX_RING_PAGE_ORDER
>>>>> +#define XENNET_MAX_RING_PAGES      (1U<<    XENNET_MAX_RING_PAGE_ORDER)
>>>>> +
>>>>> +
>>>>> +#define NET_TX_RING_SIZE(_nr_pages)                  \
>>>>> +     __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE * (_nr_pages))
>>>>> +#define NET_RX_RING_SIZE(_nr_pages)                  \
>>>>> +     __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE * (_nr_pages))
>>>>> +
>>>>> +#define XENNET_MAX_TX_RING_SIZE NET_TX_RING_SIZE(XENNET_MAX_RING_PAGES)
>>>>> +#define XENNET_MAX_RX_RING_SIZE NET_RX_RING_SIZE(XENNET_MAX_RING_PAGES)
>>>>> +
>>>>> +#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE(1), 256)
>>>> Not using multi-page ring here?
>>>> In xennet_create_dev, gnttab_alloc_grant_references allocates
>>>> TX_MAX_TARGET number of grant reference for tx. In
>>>> xennet_release_tx_bufs, NET_TX_RING_SIZE(np->tx_ring_pages) numbers of
>>>> grants are processed. And NET_RX_RING_SIZE(np->tx_ring_pages) is totally
>>>> different from TX_MAX_TARGET if np->rx_ring_pages is not 1. Although
>>>> skb_entry_is_link helps to not release invalid grants, lots of null loop
>>>> seems unnecessary. I think TX_MAX_TARGET should be changed into some
>>>> variableconnected with np->tx_ring_pages. Or you intended to use one
>>>> page ring here?
>>>>
>>> Looking back my history, this limitation was introduced because if we
>>> have a multi-page backend and single page frontend, the backend skb
>>> processing could overlap.
>> I did not see the overlap you mentioned here in netback. Although
>> netback supports multi-page, netback->vif still uses single page if the
>> frontend only supports single page. Netfront and netback negotiate this
>> through xenstore in your 5/8 patch. The requests and response should not
>> have any overlap between netback and netfront. Am I missing something?
>>
> I tried to dig up mail archive just now and realized that the bug report
> was in private mail exchange with Konrad.
>
> I don't really remember the details now since it is more than one year
> old, but you can find trace in Konrad's tree, CS 5b4c3dd5b255. All I can
> remember is that this bug was triggered by mixed old/new
> frontend/backend.

I checked the code in Konrad's tree and am thinking this overlap issue 
you mentioned existing in original netback(without multi-ring) and newer 
netfront. Original netback does not support multi-ring, and your newer 
netfront before this bug fix used "#define TX_MAX_TARGET 
XENNET_MAX_TX_RING_SIZE" directly. So that would cause overlap when 
netfront allocating rx skbs.
"#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE(1), 256)" limits the 
netfront to single ring, it fixed the overlap issue, but not enough.

>
> I think this cap can be removed if we make all buffers in netfront
> dynamically allocated.

Yes, making TX_MAX_TARGET dynamically would fix this issue.

Thanks
Annie
>
>
> Wei.
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Liu Feb. 28, 2013, 11:02 a.m. UTC | #6
On Thu, Feb 28, 2013 at 05:19:43AM +0000, ANNIE LI wrote:
> 
> 
> On 2013-2-27 23:49, Wei Liu wrote:
> > On Wed, 2013-02-27 at 07:39 +0000, ANNIE LI wrote:
> >> On 2013-2-26 20:35, Wei Liu wrote:
> >>> On Tue, 2013-02-26 at 06:52 +0000, ANNIE LI wrote:
> >>>> On 2013-2-16 0:00, Wei Liu wrote:
> >>>>> Signed-off-by: Wei Liu<wei.liu2@citrix.com>
> >>>>> ---
> >>>>>     drivers/net/xen-netfront.c |  246 +++++++++++++++++++++++++++++++-------------
> >>>>>     1 file changed, 174 insertions(+), 72 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> >>>>> index 8bd75a1..de73a71 100644
> >>>>> --- a/drivers/net/xen-netfront.c
> >>>>> +++ b/drivers/net/xen-netfront.c
> >>>>> @@ -67,9 +67,19 @@ struct netfront_cb {
> >>>>>
> >>>>>     #define GRANT_INVALID_REF   0
> >>>>>
> >>>>> -#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
> >>>>> -#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
> >>>>> -#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
> >>>>> +#define XENNET_MAX_RING_PAGE_ORDER XENBUS_MAX_RING_PAGE_ORDER
> >>>>> +#define XENNET_MAX_RING_PAGES      (1U<<    XENNET_MAX_RING_PAGE_ORDER)
> >>>>> +
> >>>>> +
> >>>>> +#define NET_TX_RING_SIZE(_nr_pages)                  \
> >>>>> +     __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE * (_nr_pages))
> >>>>> +#define NET_RX_RING_SIZE(_nr_pages)                  \
> >>>>> +     __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE * (_nr_pages))
> >>>>> +
> >>>>> +#define XENNET_MAX_TX_RING_SIZE NET_TX_RING_SIZE(XENNET_MAX_RING_PAGES)
> >>>>> +#define XENNET_MAX_RX_RING_SIZE NET_RX_RING_SIZE(XENNET_MAX_RING_PAGES)
> >>>>> +
> >>>>> +#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE(1), 256)
> >>>> Not using multi-page ring here?
> >>>> In xennet_create_dev, gnttab_alloc_grant_references allocates
> >>>> TX_MAX_TARGET number of grant reference for tx. In
> >>>> xennet_release_tx_bufs, NET_TX_RING_SIZE(np->tx_ring_pages) numbers of
> >>>> grants are processed. And NET_RX_RING_SIZE(np->tx_ring_pages) is totally
> >>>> different from TX_MAX_TARGET if np->rx_ring_pages is not 1. Although
> >>>> skb_entry_is_link helps to not release invalid grants, lots of null loop
> >>>> seems unnecessary. I think TX_MAX_TARGET should be changed into some
> >>>> variableconnected with np->tx_ring_pages. Or you intended to use one
> >>>> page ring here?
> >>>>
> >>> Looking back my history, this limitation was introduced because if we
> >>> have a multi-page backend and single page frontend, the backend skb
> >>> processing could overlap.
> >> I did not see the overlap you mentioned here in netback. Although
> >> netback supports multi-page, netback->vif still uses single page if the
> >> frontend only supports single page. Netfront and netback negotiate this
> >> through xenstore in your 5/8 patch. The requests and response should not
> >> have any overlap between netback and netfront. Am I missing something?
> >>
> > I tried to dig up mail archive just now and realized that the bug report
> > was in private mail exchange with Konrad.
> >
> > I don't really remember the details now since it is more than one year
> > old, but you can find trace in Konrad's tree, CS 5b4c3dd5b255. All I can
> > remember is that this bug was triggered by mixed old/new
> > frontend/backend.
> 
> I checked the code in Konrad's tree and am thinking this overlap issue 
> you mentioned existing in original netback(without multi-ring) and newer 
> netfront. Original netback does not support multi-ring, and your newer 
> netfront before this bug fix used "#define TX_MAX_TARGET 
> XENNET_MAX_TX_RING_SIZE" directly. So that would cause overlap when 
> netfront allocating rx skbs.
> "#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE(1), 256)" limits the 
> netfront to single ring, it fixed the overlap issue, but not enough.
> 

Yes. I just saw a bug report from Xen-user list yesterday for the same
issue in original netback (1 page ring), so the overlap issue is not
introduced by multi-page ring implementation. If your team also sees that
issue, do you have patch to fix that?


Wei.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Annie.li Feb. 28, 2013, 12:55 p.m. UTC | #7
On 2013-2-28 19:02, Wei Liu wrote:
> On Thu, Feb 28, 2013 at 05:19:43AM +0000, ANNIE LI wrote:
>> I checked the code in Konrad's tree and am thinking this overlap issue
>> you mentioned existing in original netback(without multi-ring) and newer
>> netfront. Original netback does not support multi-ring, and your newer
>> netfront before this bug fix used "#define TX_MAX_TARGET
>> XENNET_MAX_TX_RING_SIZE" directly. So that would cause overlap when
>> netfront allocating rx skbs.
>> "#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE(1), 256)" limits the
>> netfront to single ring, it fixed the overlap issue, but not enough.
>>
> Yes. I just saw a bug report from Xen-user list yesterday for the same
> issue in original netback (1 page ring), so the overlap issue is not
> introduced by multi-page ring implementation. If your team also sees that
> issue, do you have patch to fix that?

No. We thought your patch fixed it, and I did not check it further at 
that time.
Are you sure they are same? What is the thread title in Xen-user?
The overlap issue here exists in netfront when netfront allocates skb 
greedily. In Konrad's tree merged with your patch, netfront with 
"#define TX_MAX_TARGET XENNET_MAX_TX_RING_SIZE" should hit this overlap 
issue when it runs with single ring netback.

Thanks
Annie
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Konrad Rzeszutek Wilk March 4, 2013, 9:16 p.m. UTC | #8
On Fri, Feb 15, 2013 at 04:00:07PM +0000, Wei Liu wrote:

Please:
 1) Explain the new PV protocol (you could just do a copy-n-paste
    from what you had in the backend).
 2).Also submit a patch to Xen hypervisor tree for the new XenBus
    extension.
 3). Explain in which scenarios this benefits the user.
 4). Also provide a Documentation/ABI/stable/sysfs-bus-xen-frontend
to explain the new parameter.
 
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> ---
>  drivers/net/xen-netfront.c |  246 +++++++++++++++++++++++++++++++-------------
>  1 file changed, 174 insertions(+), 72 deletions(-)
> 
> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> index 8bd75a1..de73a71 100644
> --- a/drivers/net/xen-netfront.c
> +++ b/drivers/net/xen-netfront.c
> @@ -67,9 +67,19 @@ struct netfront_cb {
>  
>  #define GRANT_INVALID_REF	0
>  
> -#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
> -#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
> -#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
> +#define XENNET_MAX_RING_PAGE_ORDER XENBUS_MAX_RING_PAGE_ORDER
> +#define XENNET_MAX_RING_PAGES      (1U << XENNET_MAX_RING_PAGE_ORDER)
> +
> +
> +#define NET_TX_RING_SIZE(_nr_pages)			\
> +	__CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE * (_nr_pages))
> +#define NET_RX_RING_SIZE(_nr_pages)			\
> +	__CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE * (_nr_pages))
> +
> +#define XENNET_MAX_TX_RING_SIZE NET_TX_RING_SIZE(XENNET_MAX_RING_PAGES)
> +#define XENNET_MAX_RX_RING_SIZE NET_RX_RING_SIZE(XENNET_MAX_RING_PAGES)
> +
> +#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE(1), 256)
>  
>  struct netfront_stats {
>  	u64			rx_packets;
> @@ -80,6 +90,11 @@ struct netfront_stats {
>  };
>  
>  struct netfront_info {
> +	/* Statistics */
> +	struct netfront_stats __percpu *stats;
> +
> +	unsigned long rx_gso_checksum_fixup;
> +
>  	struct list_head list;
>  	struct net_device *netdev;
>  
> @@ -90,7 +105,9 @@ struct netfront_info {
>  
>  	spinlock_t   tx_lock;
>  	struct xen_netif_tx_front_ring tx;
> -	int tx_ring_ref;
> +	int tx_ring_ref[XENNET_MAX_RING_PAGES];
> +	unsigned int tx_ring_page_order;
> +	unsigned int tx_ring_pages;
>  
>  	/*
>  	 * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
> @@ -104,36 +121,33 @@ struct netfront_info {
>  	union skb_entry {
>  		struct sk_buff *skb;
>  		unsigned long link;
> -	} tx_skbs[NET_TX_RING_SIZE];
> +	} tx_skbs[XENNET_MAX_TX_RING_SIZE];
>  	grant_ref_t gref_tx_head;
> -	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
> +	grant_ref_t grant_tx_ref[XENNET_MAX_TX_RING_SIZE];
>  	unsigned tx_skb_freelist;
>  
>  	spinlock_t   rx_lock ____cacheline_aligned_in_smp;
>  	struct xen_netif_rx_front_ring rx;
> -	int rx_ring_ref;
> +	int rx_ring_ref[XENNET_MAX_RING_PAGES];
> +	unsigned int rx_ring_page_order;
> +	unsigned int rx_ring_pages;
>  
>  	/* Receive-ring batched refills. */
>  #define RX_MIN_TARGET 8
>  #define RX_DFL_MIN_TARGET 64
> -#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
> +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE(1), 256)
>  	unsigned rx_min_target, rx_max_target, rx_target;
>  	struct sk_buff_head rx_batch;
>  
>  	struct timer_list rx_refill_timer;
>  
> -	struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
> +	struct sk_buff *rx_skbs[XENNET_MAX_RX_RING_SIZE];
>  	grant_ref_t gref_rx_head;
> -	grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
> -
> -	unsigned long rx_pfn_array[NET_RX_RING_SIZE];
> -	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
> -	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
> -
> -	/* Statistics */
> -	struct netfront_stats __percpu *stats;
> +	grant_ref_t grant_rx_ref[XENNET_MAX_RX_RING_SIZE];
>  
> -	unsigned long rx_gso_checksum_fixup;
> +	unsigned long rx_pfn_array[XENNET_MAX_RX_RING_SIZE];
> +	struct multicall_entry rx_mcl[XENNET_MAX_RX_RING_SIZE+1];
> +	struct mmu_update rx_mmu[XENNET_MAX_RX_RING_SIZE];
>  };
>  
>  struct netfront_rx_info {
> @@ -171,15 +185,15 @@ static unsigned short get_id_from_freelist(unsigned *head,
>  	return id;
>  }
>  
> -static int xennet_rxidx(RING_IDX idx)
> +static int xennet_rxidx(RING_IDX idx, struct netfront_info *info)
>  {
> -	return idx & (NET_RX_RING_SIZE - 1);
> +	return idx & (NET_RX_RING_SIZE(info->rx_ring_pages) - 1);
>  }
>  
>  static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
>  					 RING_IDX ri)
>  {
> -	int i = xennet_rxidx(ri);
> +	int i = xennet_rxidx(ri, np);
>  	struct sk_buff *skb = np->rx_skbs[i];
>  	np->rx_skbs[i] = NULL;
>  	return skb;
> @@ -188,7 +202,7 @@ static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
>  static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
>  					    RING_IDX ri)
>  {
> -	int i = xennet_rxidx(ri);
> +	int i = xennet_rxidx(ri, np);
>  	grant_ref_t ref = np->grant_rx_ref[i];
>  	np->grant_rx_ref[i] = GRANT_INVALID_REF;
>  	return ref;
> @@ -301,7 +315,7 @@ no_skb:
>  
>  		skb->dev = dev;
>  
> -		id = xennet_rxidx(req_prod + i);
> +		id = xennet_rxidx(req_prod + i, np);
>  
>  		BUG_ON(np->rx_skbs[id]);
>  		np->rx_skbs[id] = skb;
> @@ -653,7 +667,7 @@ static int xennet_close(struct net_device *dev)
>  static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
>  				grant_ref_t ref)
>  {
> -	int new = xennet_rxidx(np->rx.req_prod_pvt);
> +	int new = xennet_rxidx(np->rx.req_prod_pvt, np);
>  
>  	BUG_ON(np->rx_skbs[new]);
>  	np->rx_skbs[new] = skb;
> @@ -1109,7 +1123,7 @@ static void xennet_release_tx_bufs(struct netfront_info *np)
>  	struct sk_buff *skb;
>  	int i;
>  
> -	for (i = 0; i < NET_TX_RING_SIZE; i++) {
> +	for (i = 0; i < NET_TX_RING_SIZE(np->tx_ring_pages); i++) {
>  		/* Skip over entries which are actually freelist references */
>  		if (skb_entry_is_link(&np->tx_skbs[i]))
>  			continue;
> @@ -1143,7 +1157,7 @@ static void xennet_release_rx_bufs(struct netfront_info *np)
>  
>  	spin_lock_bh(&np->rx_lock);
>  
> -	for (id = 0; id < NET_RX_RING_SIZE; id++) {
> +	for (id = 0; id < NET_RX_RING_SIZE(np->rx_ring_pages); id++) {
>  		ref = np->grant_rx_ref[id];
>  		if (ref == GRANT_INVALID_REF) {
>  			unused++;
> @@ -1324,13 +1338,13 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
>  
>  	/* Initialise tx_skbs as a free chain containing every entry. */
>  	np->tx_skb_freelist = 0;
> -	for (i = 0; i < NET_TX_RING_SIZE; i++) {
> +	for (i = 0; i < XENNET_MAX_TX_RING_SIZE; i++) {
>  		skb_entry_set_link(&np->tx_skbs[i], i+1);
>  		np->grant_tx_ref[i] = GRANT_INVALID_REF;
>  	}
>  
>  	/* Clear out rx_skbs */
> -	for (i = 0; i < NET_RX_RING_SIZE; i++) {
> +	for (i = 0; i < XENNET_MAX_RX_RING_SIZE; i++) {
>  		np->rx_skbs[i] = NULL;
>  		np->grant_rx_ref[i] = GRANT_INVALID_REF;
>  	}
> @@ -1428,13 +1442,6 @@ static int netfront_probe(struct xenbus_device *dev,
>  	return err;
>  }
>  
> -static void xennet_end_access(int ref, void *page)
> -{
> -	/* This frees the page as a side-effect */
> -	if (ref != GRANT_INVALID_REF)
> -		gnttab_end_foreign_access(ref, 0, (unsigned long)page);
> -}
> -
>  static void xennet_disconnect_backend(struct netfront_info *info)
>  {
>  	/* Stop old i/f to prevent errors whilst we rebuild the state. */
> @@ -1448,12 +1455,12 @@ static void xennet_disconnect_backend(struct netfront_info *info)
>  		unbind_from_irqhandler(info->netdev->irq, info->netdev);
>  	info->evtchn = info->netdev->irq = 0;
>  
> -	/* End access and free the pages */
> -	xennet_end_access(info->tx_ring_ref, info->tx.sring);
> -	xennet_end_access(info->rx_ring_ref, info->rx.sring);
> +	xenbus_unmap_ring_vfree(info->xbdev, (void *)info->tx.sring);
> +	free_pages((unsigned long)info->tx.sring, info->tx_ring_page_order);
> +
> +	xenbus_unmap_ring_vfree(info->xbdev, (void *)info->rx.sring);
> +	free_pages((unsigned long)info->rx.sring, info->rx_ring_page_order);
>  
> -	info->tx_ring_ref = GRANT_INVALID_REF;
> -	info->rx_ring_ref = GRANT_INVALID_REF;
>  	info->tx.sring = NULL;
>  	info->rx.sring = NULL;
>  }
> @@ -1501,11 +1508,14 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
>  	struct xen_netif_tx_sring *txs;
>  	struct xen_netif_rx_sring *rxs;
>  	int err;
> -	int grefs[1];
>  	struct net_device *netdev = info->netdev;
> +	unsigned int max_tx_ring_page_order, max_rx_ring_page_order;
> +	int i;
>  
> -	info->tx_ring_ref = GRANT_INVALID_REF;
> -	info->rx_ring_ref = GRANT_INVALID_REF;
> +	for (i = 0; i < XENNET_MAX_RING_PAGES; i++) {
> +		info->tx_ring_ref[i] = GRANT_INVALID_REF;
> +		info->rx_ring_ref[i] = GRANT_INVALID_REF;
> +	}
>  	info->rx.sring = NULL;
>  	info->tx.sring = NULL;
>  	netdev->irq = 0;
> @@ -1516,50 +1526,100 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
>  		goto fail;
>  	}
>  
> -	txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
> +	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
> +			   "max-tx-ring-page-order", "%u",
> +			   &max_tx_ring_page_order);
> +	if (err < 0) {
> +		info->tx_ring_page_order = 0;
> +		dev_info(&dev->dev, "single tx ring\n");
> +	} else {
> +		if (max_tx_ring_page_order > XENNET_MAX_RING_PAGE_ORDER) {
> +			dev_info(&dev->dev,
> +				 "backend ring page order %d too large, clamp to %d\n",
> +				 max_tx_ring_page_order,
> +				 XENNET_MAX_RING_PAGE_ORDER);
> +			max_tx_ring_page_order = XENNET_MAX_RING_PAGE_ORDER;
> +		}
> +		info->tx_ring_page_order = max_tx_ring_page_order;
> +		dev_info(&dev->dev, "multi-page tx ring, order = %d\n",
> +			 info->tx_ring_page_order);
> +	}
> +	info->tx_ring_pages = (1U << info->tx_ring_page_order);
> +
> +	txs = (struct xen_netif_tx_sring *)
> +		__get_free_pages(__GFP_ZERO | GFP_NOIO | __GFP_HIGH,
> +				 info->tx_ring_page_order);
>  	if (!txs) {
>  		err = -ENOMEM;
>  		xenbus_dev_fatal(dev, err, "allocating tx ring page");
>  		goto fail;
>  	}
>  	SHARED_RING_INIT(txs);
> -	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
> +	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE * info->tx_ring_pages);
> +
> +	err = xenbus_grant_ring(dev, txs, info->tx_ring_pages,
> +				info->tx_ring_ref);
> +	if (err < 0)
> +		goto grant_tx_ring_fail;
>  
> -	err = xenbus_grant_ring(dev, txs, 1, grefs);
> +	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
> +			   "max-rx-ring-page-order", "%u",
> +			   &max_rx_ring_page_order);
>  	if (err < 0) {
> -		free_page((unsigned long)txs);
> -		goto fail;
> +		info->rx_ring_page_order = 0;
> +		dev_info(&dev->dev, "single rx ring\n");
> +	} else {
> +		if (max_rx_ring_page_order > XENNET_MAX_RING_PAGE_ORDER) {
> +			dev_info(&dev->dev,
> +				 "backend ring page order %d too large, clamp to %d\n",
> +				 max_rx_ring_page_order,
> +				 XENNET_MAX_RING_PAGE_ORDER);
> +			max_rx_ring_page_order = XENNET_MAX_RING_PAGE_ORDER;
> +		}
> +		info->rx_ring_page_order = max_rx_ring_page_order;
> +		dev_info(&dev->dev, "multi-page rx ring, order = %d\n",
> +			 info->rx_ring_page_order);
>  	}
> +	info->rx_ring_pages = (1U << info->rx_ring_page_order);
>  
> -	info->tx_ring_ref = grefs[0];
> -	rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
> +	rxs = (struct xen_netif_rx_sring *)
> +		__get_free_pages(__GFP_ZERO | GFP_NOIO | __GFP_HIGH,
> +				 info->rx_ring_page_order);
>  	if (!rxs) {
>  		err = -ENOMEM;
>  		xenbus_dev_fatal(dev, err, "allocating rx ring page");
> -		goto fail;
> +		goto alloc_rx_ring_fail;
>  	}
>  	SHARED_RING_INIT(rxs);
> -	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
> +	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE * info->rx_ring_pages);
>  
> -	err = xenbus_grant_ring(dev, rxs, 1, grefs);
> -	if (err < 0) {
> -		free_page((unsigned long)rxs);
> -		goto fail;
> -	}
> -	info->rx_ring_ref = grefs[0];
> +	err = xenbus_grant_ring(dev, rxs, info->rx_ring_pages,
> +				info->rx_ring_ref);
> +	if (err < 0)
> +		goto grant_rx_ring_fail;
>  
>  	err = xenbus_alloc_evtchn(dev, &info->evtchn);
>  	if (err)
> -		goto fail;
> +		goto alloc_evtchn_fail;
>  
>  	err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt,
>  					0, netdev->name, netdev);
>  	if (err < 0)
> -		goto fail;
> +		goto bind_fail;
>  	netdev->irq = err;
>  	return 0;
>  
> - fail:
> +bind_fail:
> +	xenbus_free_evtchn(dev, info->evtchn);
> +alloc_evtchn_fail:
> +	xenbus_unmap_ring_vfree(info->xbdev, (void *)info->rx.sring);
> +grant_rx_ring_fail:
> +	free_pages((unsigned long)info->rx.sring, info->rx_ring_page_order);
> +alloc_rx_ring_fail:
> +	xenbus_unmap_ring_vfree(info->xbdev, (void *)info->tx.sring);
> +grant_tx_ring_fail:
> +	free_pages((unsigned long)info->tx.sring, info->tx_ring_page_order);
> +fail:
>  	return err;
>  }
>  
> @@ -1570,6 +1630,7 @@ static int talk_to_netback(struct xenbus_device *dev,
>  	const char *message;
>  	struct xenbus_transaction xbt;
>  	int err;
> +	int i;
>  
>  	/* Create shared ring, alloc event channel. */
>  	err = setup_netfront(dev, info);
> @@ -1583,18 +1644,58 @@ again:
>  		goto destroy_ring;
>  	}
>  
> -	err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
> -			    info->tx_ring_ref);
> -	if (err) {
> -		message = "writing tx ring-ref";
> -		goto abort_transaction;
> +	if (info->tx_ring_page_order == 0) {
> +		err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
> +				    info->tx_ring_ref[0]);
> +		if (err) {
> +			message = "writing tx ring-ref";
> +			goto abort_transaction;
> +		}
> +	} else {
> +		err = xenbus_printf(xbt, dev->nodename, "tx-ring-order", "%u",
> +				    info->tx_ring_page_order);
> +		if (err) {
> +			message = "writing tx-ring-order";
> +			goto abort_transaction;
> +		}
> +		for (i = 0; i < info->tx_ring_pages; i++) {
> +			char name[sizeof("tx-ring-ref")+3];
> +			snprintf(name, sizeof(name), "tx-ring-ref%u", i);
> +			err = xenbus_printf(xbt, dev->nodename, name, "%u",
> +					    info->tx_ring_ref[i]);
> +			if (err) {
> +				message = "writing tx ring-ref";
> +				goto abort_transaction;
> +			}
> +		}
>  	}
> -	err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
> -			    info->rx_ring_ref);
> -	if (err) {
> -		message = "writing rx ring-ref";
> -		goto abort_transaction;
> +
> +	if (info->rx_ring_page_order == 0) {
> +		err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
> +				    info->rx_ring_ref[0]);
> +		if (err) {
> +			message = "writing rx ring-ref";
> +			goto abort_transaction;
> +		}
> +	} else {
> +		err = xenbus_printf(xbt, dev->nodename, "rx-ring-order", "%u",
> +				    info->rx_ring_page_order);
> +		if (err) {
> +			message = "writing rx-ring-order";
> +			goto abort_transaction;
> +		}
> +		for (i = 0; i < info->rx_ring_pages; i++) {
> +			char name[sizeof("rx-ring-ref")+3];
> +			snprintf(name, sizeof(name), "rx-ring-ref%u", i);
> +			err = xenbus_printf(xbt, dev->nodename, name, "%u",
> +					    info->rx_ring_ref[i]);
> +			if (err) {
> +				message = "writing rx ring-ref";
> +				goto abort_transaction;
> +			}
> +		}
>  	}
> +
>  	err = xenbus_printf(xbt, dev->nodename,
>  			    "event-channel", "%u", info->evtchn);
>  	if (err) {
> @@ -1681,7 +1782,8 @@ static int xennet_connect(struct net_device *dev)
>  	xennet_release_tx_bufs(np);
>  
>  	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
> -	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
> +	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE(np->rx_ring_pages);
> +	     i++) {
>  		skb_frag_t *frag;
>  		const struct page *page;
>  		if (!np->rx_skbs[i])
> -- 
> 1.7.10.4
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 8bd75a1..de73a71 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -67,9 +67,19 @@  struct netfront_cb {
 
 #define GRANT_INVALID_REF	0
 
-#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
-#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
-#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
+#define XENNET_MAX_RING_PAGE_ORDER XENBUS_MAX_RING_PAGE_ORDER
+#define XENNET_MAX_RING_PAGES      (1U << XENNET_MAX_RING_PAGE_ORDER)
+
+
+#define NET_TX_RING_SIZE(_nr_pages)			\
+	__CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE * (_nr_pages))
+#define NET_RX_RING_SIZE(_nr_pages)			\
+	__CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE * (_nr_pages))
+
+#define XENNET_MAX_TX_RING_SIZE NET_TX_RING_SIZE(XENNET_MAX_RING_PAGES)
+#define XENNET_MAX_RX_RING_SIZE NET_RX_RING_SIZE(XENNET_MAX_RING_PAGES)
+
+#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE(1), 256)
 
 struct netfront_stats {
 	u64			rx_packets;
@@ -80,6 +90,11 @@  struct netfront_stats {
 };
 
 struct netfront_info {
+	/* Statistics */
+	struct netfront_stats __percpu *stats;
+
+	unsigned long rx_gso_checksum_fixup;
+
 	struct list_head list;
 	struct net_device *netdev;
 
@@ -90,7 +105,9 @@  struct netfront_info {
 
 	spinlock_t   tx_lock;
 	struct xen_netif_tx_front_ring tx;
-	int tx_ring_ref;
+	int tx_ring_ref[XENNET_MAX_RING_PAGES];
+	unsigned int tx_ring_page_order;
+	unsigned int tx_ring_pages;
 
 	/*
 	 * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
@@ -104,36 +121,33 @@  struct netfront_info {
 	union skb_entry {
 		struct sk_buff *skb;
 		unsigned long link;
-	} tx_skbs[NET_TX_RING_SIZE];
+	} tx_skbs[XENNET_MAX_TX_RING_SIZE];
 	grant_ref_t gref_tx_head;
-	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
+	grant_ref_t grant_tx_ref[XENNET_MAX_TX_RING_SIZE];
 	unsigned tx_skb_freelist;
 
 	spinlock_t   rx_lock ____cacheline_aligned_in_smp;
 	struct xen_netif_rx_front_ring rx;
-	int rx_ring_ref;
+	int rx_ring_ref[XENNET_MAX_RING_PAGES];
+	unsigned int rx_ring_page_order;
+	unsigned int rx_ring_pages;
 
 	/* Receive-ring batched refills. */
 #define RX_MIN_TARGET 8
 #define RX_DFL_MIN_TARGET 64
-#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE(1), 256)
 	unsigned rx_min_target, rx_max_target, rx_target;
 	struct sk_buff_head rx_batch;
 
 	struct timer_list rx_refill_timer;
 
-	struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
+	struct sk_buff *rx_skbs[XENNET_MAX_RX_RING_SIZE];
 	grant_ref_t gref_rx_head;
-	grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
-
-	unsigned long rx_pfn_array[NET_RX_RING_SIZE];
-	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
-	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
-
-	/* Statistics */
-	struct netfront_stats __percpu *stats;
+	grant_ref_t grant_rx_ref[XENNET_MAX_RX_RING_SIZE];
 
-	unsigned long rx_gso_checksum_fixup;
+	unsigned long rx_pfn_array[XENNET_MAX_RX_RING_SIZE];
+	struct multicall_entry rx_mcl[XENNET_MAX_RX_RING_SIZE+1];
+	struct mmu_update rx_mmu[XENNET_MAX_RX_RING_SIZE];
 };
 
 struct netfront_rx_info {
@@ -171,15 +185,15 @@  static unsigned short get_id_from_freelist(unsigned *head,
 	return id;
 }
 
-static int xennet_rxidx(RING_IDX idx)
+static int xennet_rxidx(RING_IDX idx, struct netfront_info *info)
 {
-	return idx & (NET_RX_RING_SIZE - 1);
+	return idx & (NET_RX_RING_SIZE(info->rx_ring_pages) - 1);
 }
 
 static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
 					 RING_IDX ri)
 {
-	int i = xennet_rxidx(ri);
+	int i = xennet_rxidx(ri, np);
 	struct sk_buff *skb = np->rx_skbs[i];
 	np->rx_skbs[i] = NULL;
 	return skb;
@@ -188,7 +202,7 @@  static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
 static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
 					    RING_IDX ri)
 {
-	int i = xennet_rxidx(ri);
+	int i = xennet_rxidx(ri, np);
 	grant_ref_t ref = np->grant_rx_ref[i];
 	np->grant_rx_ref[i] = GRANT_INVALID_REF;
 	return ref;
@@ -301,7 +315,7 @@  no_skb:
 
 		skb->dev = dev;
 
-		id = xennet_rxidx(req_prod + i);
+		id = xennet_rxidx(req_prod + i, np);
 
 		BUG_ON(np->rx_skbs[id]);
 		np->rx_skbs[id] = skb;
@@ -653,7 +667,7 @@  static int xennet_close(struct net_device *dev)
 static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
 				grant_ref_t ref)
 {
-	int new = xennet_rxidx(np->rx.req_prod_pvt);
+	int new = xennet_rxidx(np->rx.req_prod_pvt, np);
 
 	BUG_ON(np->rx_skbs[new]);
 	np->rx_skbs[new] = skb;
@@ -1109,7 +1123,7 @@  static void xennet_release_tx_bufs(struct netfront_info *np)
 	struct sk_buff *skb;
 	int i;
 
-	for (i = 0; i < NET_TX_RING_SIZE; i++) {
+	for (i = 0; i < NET_TX_RING_SIZE(np->tx_ring_pages); i++) {
 		/* Skip over entries which are actually freelist references */
 		if (skb_entry_is_link(&np->tx_skbs[i]))
 			continue;
@@ -1143,7 +1157,7 @@  static void xennet_release_rx_bufs(struct netfront_info *np)
 
 	spin_lock_bh(&np->rx_lock);
 
-	for (id = 0; id < NET_RX_RING_SIZE; id++) {
+	for (id = 0; id < NET_RX_RING_SIZE(np->rx_ring_pages); id++) {
 		ref = np->grant_rx_ref[id];
 		if (ref == GRANT_INVALID_REF) {
 			unused++;
@@ -1324,13 +1338,13 @@  static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 
 	/* Initialise tx_skbs as a free chain containing every entry. */
 	np->tx_skb_freelist = 0;
-	for (i = 0; i < NET_TX_RING_SIZE; i++) {
+	for (i = 0; i < XENNET_MAX_TX_RING_SIZE; i++) {
 		skb_entry_set_link(&np->tx_skbs[i], i+1);
 		np->grant_tx_ref[i] = GRANT_INVALID_REF;
 	}
 
 	/* Clear out rx_skbs */
-	for (i = 0; i < NET_RX_RING_SIZE; i++) {
+	for (i = 0; i < XENNET_MAX_RX_RING_SIZE; i++) {
 		np->rx_skbs[i] = NULL;
 		np->grant_rx_ref[i] = GRANT_INVALID_REF;
 	}
@@ -1428,13 +1442,6 @@  static int netfront_probe(struct xenbus_device *dev,
 	return err;
 }
 
-static void xennet_end_access(int ref, void *page)
-{
-	/* This frees the page as a side-effect */
-	if (ref != GRANT_INVALID_REF)
-		gnttab_end_foreign_access(ref, 0, (unsigned long)page);
-}
-
 static void xennet_disconnect_backend(struct netfront_info *info)
 {
 	/* Stop old i/f to prevent errors whilst we rebuild the state. */
@@ -1448,12 +1455,12 @@  static void xennet_disconnect_backend(struct netfront_info *info)
 		unbind_from_irqhandler(info->netdev->irq, info->netdev);
 	info->evtchn = info->netdev->irq = 0;
 
-	/* End access and free the pages */
-	xennet_end_access(info->tx_ring_ref, info->tx.sring);
-	xennet_end_access(info->rx_ring_ref, info->rx.sring);
+	xenbus_unmap_ring_vfree(info->xbdev, (void *)info->tx.sring);
+	free_pages((unsigned long)info->tx.sring, info->tx_ring_page_order);
+
+	xenbus_unmap_ring_vfree(info->xbdev, (void *)info->rx.sring);
+	free_pages((unsigned long)info->rx.sring, info->rx_ring_page_order);
 
-	info->tx_ring_ref = GRANT_INVALID_REF;
-	info->rx_ring_ref = GRANT_INVALID_REF;
 	info->tx.sring = NULL;
 	info->rx.sring = NULL;
 }
@@ -1501,11 +1508,14 @@  static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
 	struct xen_netif_tx_sring *txs;
 	struct xen_netif_rx_sring *rxs;
 	int err;
-	int grefs[1];
 	struct net_device *netdev = info->netdev;
+	unsigned int max_tx_ring_page_order, max_rx_ring_page_order;
+	int i;
 
-	info->tx_ring_ref = GRANT_INVALID_REF;
-	info->rx_ring_ref = GRANT_INVALID_REF;
+	for (i = 0; i < XENNET_MAX_RING_PAGES; i++) {
+		info->tx_ring_ref[i] = GRANT_INVALID_REF;
+		info->rx_ring_ref[i] = GRANT_INVALID_REF;
+	}
 	info->rx.sring = NULL;
 	info->tx.sring = NULL;
 	netdev->irq = 0;
@@ -1516,50 +1526,100 @@  static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
 		goto fail;
 	}
 
-	txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "max-tx-ring-page-order", "%u",
+			   &max_tx_ring_page_order);
+	if (err < 0) {
+		info->tx_ring_page_order = 0;
+		dev_info(&dev->dev, "single tx ring\n");
+	} else {
+		if (max_tx_ring_page_order > XENNET_MAX_RING_PAGE_ORDER) {
+			dev_info(&dev->dev,
+				 "backend ring page order %d too large, clamp to %d\n",
+				 max_tx_ring_page_order,
+				 XENNET_MAX_RING_PAGE_ORDER);
+			max_tx_ring_page_order = XENNET_MAX_RING_PAGE_ORDER;
+		}
+		info->tx_ring_page_order = max_tx_ring_page_order;
+		dev_info(&dev->dev, "multi-page tx ring, order = %d\n",
+			 info->tx_ring_page_order);
+	}
+	info->tx_ring_pages = (1U << info->tx_ring_page_order);
+
+	txs = (struct xen_netif_tx_sring *)
+		__get_free_pages(__GFP_ZERO | GFP_NOIO | __GFP_HIGH,
+				 info->tx_ring_page_order);
 	if (!txs) {
 		err = -ENOMEM;
 		xenbus_dev_fatal(dev, err, "allocating tx ring page");
 		goto fail;
 	}
 	SHARED_RING_INIT(txs);
-	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
+	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE * info->tx_ring_pages);
+
+	err = xenbus_grant_ring(dev, txs, info->tx_ring_pages,
+				info->tx_ring_ref);
+	if (err < 0)
+		goto grant_tx_ring_fail;
 
-	err = xenbus_grant_ring(dev, txs, 1, grefs);
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "max-rx-ring-page-order", "%u",
+			   &max_rx_ring_page_order);
 	if (err < 0) {
-		free_page((unsigned long)txs);
-		goto fail;
+		info->rx_ring_page_order = 0;
+		dev_info(&dev->dev, "single rx ring\n");
+	} else {
+		if (max_rx_ring_page_order > XENNET_MAX_RING_PAGE_ORDER) {
+			dev_info(&dev->dev,
+				 "backend ring page order %d too large, clamp to %d\n",
+				 max_rx_ring_page_order,
+				 XENNET_MAX_RING_PAGE_ORDER);
+			max_rx_ring_page_order = XENNET_MAX_RING_PAGE_ORDER;
+		}
+		info->rx_ring_page_order = max_rx_ring_page_order;
+		dev_info(&dev->dev, "multi-page rx ring, order = %d\n",
+			 info->rx_ring_page_order);
 	}
+	info->rx_ring_pages = (1U << info->rx_ring_page_order);
 
-	info->tx_ring_ref = grefs[0];
-	rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
+	rxs = (struct xen_netif_rx_sring *)
+		__get_free_pages(__GFP_ZERO | GFP_NOIO | __GFP_HIGH,
+				 info->rx_ring_page_order);
 	if (!rxs) {
 		err = -ENOMEM;
 		xenbus_dev_fatal(dev, err, "allocating rx ring page");
-		goto fail;
+		goto alloc_rx_ring_fail;
 	}
 	SHARED_RING_INIT(rxs);
-	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
+	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE * info->rx_ring_pages);
 
-	err = xenbus_grant_ring(dev, rxs, 1, grefs);
-	if (err < 0) {
-		free_page((unsigned long)rxs);
-		goto fail;
-	}
-	info->rx_ring_ref = grefs[0];
+	err = xenbus_grant_ring(dev, rxs, info->rx_ring_pages,
+				info->rx_ring_ref);
+	if (err < 0)
+		goto grant_rx_ring_fail;
 
 	err = xenbus_alloc_evtchn(dev, &info->evtchn);
 	if (err)
-		goto fail;
+		goto alloc_evtchn_fail;
 
 	err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt,
 					0, netdev->name, netdev);
 	if (err < 0)
-		goto fail;
+		goto bind_fail;
 	netdev->irq = err;
 	return 0;
 
- fail:
+bind_fail:
+	xenbus_free_evtchn(dev, info->evtchn);
+alloc_evtchn_fail:
+	xenbus_unmap_ring_vfree(info->xbdev, (void *)info->rx.sring);
+grant_rx_ring_fail:
+	free_pages((unsigned long)info->rx.sring, info->rx_ring_page_order);
+alloc_rx_ring_fail:
+	xenbus_unmap_ring_vfree(info->xbdev, (void *)info->tx.sring);
+grant_tx_ring_fail:
+	free_pages((unsigned long)info->tx.sring, info->tx_ring_page_order);
+fail:
 	return err;
 }
 
@@ -1570,6 +1630,7 @@  static int talk_to_netback(struct xenbus_device *dev,
 	const char *message;
 	struct xenbus_transaction xbt;
 	int err;
+	int i;
 
 	/* Create shared ring, alloc event channel. */
 	err = setup_netfront(dev, info);
@@ -1583,18 +1644,58 @@  again:
 		goto destroy_ring;
 	}
 
-	err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
-			    info->tx_ring_ref);
-	if (err) {
-		message = "writing tx ring-ref";
-		goto abort_transaction;
+	if (info->tx_ring_page_order == 0) {
+		err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
+				    info->tx_ring_ref[0]);
+		if (err) {
+			message = "writing tx ring-ref";
+			goto abort_transaction;
+		}
+	} else {
+		err = xenbus_printf(xbt, dev->nodename, "tx-ring-order", "%u",
+				    info->tx_ring_page_order);
+		if (err) {
+			message = "writing tx-ring-order";
+			goto abort_transaction;
+		}
+		for (i = 0; i < info->tx_ring_pages; i++) {
+			char name[sizeof("tx-ring-ref")+3];
+			snprintf(name, sizeof(name), "tx-ring-ref%u", i);
+			err = xenbus_printf(xbt, dev->nodename, name, "%u",
+					    info->tx_ring_ref[i]);
+			if (err) {
+				message = "writing tx ring-ref";
+				goto abort_transaction;
+			}
+		}
 	}
-	err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
-			    info->rx_ring_ref);
-	if (err) {
-		message = "writing rx ring-ref";
-		goto abort_transaction;
+
+	if (info->rx_ring_page_order == 0) {
+		err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
+				    info->rx_ring_ref[0]);
+		if (err) {
+			message = "writing rx ring-ref";
+			goto abort_transaction;
+		}
+	} else {
+		err = xenbus_printf(xbt, dev->nodename, "rx-ring-order", "%u",
+				    info->rx_ring_page_order);
+		if (err) {
+			message = "writing rx-ring-order";
+			goto abort_transaction;
+		}
+		for (i = 0; i < info->rx_ring_pages; i++) {
+			char name[sizeof("rx-ring-ref")+3];
+			snprintf(name, sizeof(name), "rx-ring-ref%u", i);
+			err = xenbus_printf(xbt, dev->nodename, name, "%u",
+					    info->rx_ring_ref[i]);
+			if (err) {
+				message = "writing rx ring-ref";
+				goto abort_transaction;
+			}
+		}
 	}
+
 	err = xenbus_printf(xbt, dev->nodename,
 			    "event-channel", "%u", info->evtchn);
 	if (err) {
@@ -1681,7 +1782,8 @@  static int xennet_connect(struct net_device *dev)
 	xennet_release_tx_bufs(np);
 
 	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
-	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
+	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE(np->rx_ring_pages);
+	     i++) {
 		skb_frag_t *frag;
 		const struct page *page;
 		if (!np->rx_skbs[i])