diff mbox

[1/2,net-next] net: fec: add napi support to improve proformance

Message ID 1358914330-3768-1-git-send-email-Frank.Li@freescale.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Frank Li Jan. 23, 2013, 4:12 a.m. UTC
Add napi support

Before this patch

 iperf -s -i 1
 ------------------------------------------------------------
 Server listening on TCP port 5001
 TCP window size: 85.3 KByte (default)
 ------------------------------------------------------------
 [  4] local 10.192.242.153 port 5001 connected with 10.192.242.138 port 50004
 [ ID] Interval       Transfer     Bandwidth
 [  4]  0.0- 1.0 sec  41.2 MBytes   345 Mbits/sec
 [  4]  1.0- 2.0 sec  43.7 MBytes   367 Mbits/sec
 [  4]  2.0- 3.0 sec  42.8 MBytes   359 Mbits/sec
 [  4]  3.0- 4.0 sec  43.7 MBytes   367 Mbits/sec
 [  4]  4.0- 5.0 sec  42.7 MBytes   359 Mbits/sec
 [  4]  5.0- 6.0 sec  43.8 MBytes   367 Mbits/sec
 [  4]  6.0- 7.0 sec  43.0 MBytes   361 Mbits/sec

After this patch
 [  4]  2.0- 3.0 sec  51.6 MBytes   433 Mbits/sec
 [  4]  3.0- 4.0 sec  51.8 MBytes   435 Mbits/sec
 [  4]  4.0- 5.0 sec  52.2 MBytes   438 Mbits/sec
 [  4]  5.0- 6.0 sec  52.1 MBytes   437 Mbits/sec
 [  4]  6.0- 7.0 sec  52.1 MBytes   437 Mbits/sec
 [  4]  7.0- 8.0 sec  52.3 MBytes   439 Mbits/sec

Signed-off-by: Frank Li <Frank.Li@freescale.com>
Signed-off-by: Fugang Duan <B38611@freescale.com>
---
 drivers/net/ethernet/freescale/fec.c |   68 ++++++++++++++++++++++++++++++---
 drivers/net/ethernet/freescale/fec.h |    4 ++
 2 files changed, 65 insertions(+), 7 deletions(-)

Comments

Eric Dumazet Jan. 23, 2013, 6:21 a.m. UTC | #1
On Wed, 2013-01-23 at 12:12 +0800, Frank Li wrote:
> Add napi support
> 
> Before this patch
> 
>  iperf -s -i 1
>  ------------------------------------------------------------
>  Server listening on TCP port 5001
>  TCP window size: 85.3 KByte (default)
>  ------------------------------------------------------------
>  [  4] local 10.192.242.153 port 5001 connected with 10.192.242.138 port 50004
>  [ ID] Interval       Transfer     Bandwidth
>  [  4]  0.0- 1.0 sec  41.2 MBytes   345 Mbits/sec
>  [  4]  1.0- 2.0 sec  43.7 MBytes   367 Mbits/sec
>  [  4]  2.0- 3.0 sec  42.8 MBytes   359 Mbits/sec
>  [  4]  3.0- 4.0 sec  43.7 MBytes   367 Mbits/sec
>  [  4]  4.0- 5.0 sec  42.7 MBytes   359 Mbits/sec
>  [  4]  5.0- 6.0 sec  43.8 MBytes   367 Mbits/sec
>  [  4]  6.0- 7.0 sec  43.0 MBytes   361 Mbits/sec
> 
> After this patch
>  [  4]  2.0- 3.0 sec  51.6 MBytes   433 Mbits/sec
>  [  4]  3.0- 4.0 sec  51.8 MBytes   435 Mbits/sec
>  [  4]  4.0- 5.0 sec  52.2 MBytes   438 Mbits/sec
>  [  4]  5.0- 6.0 sec  52.1 MBytes   437 Mbits/sec
>  [  4]  6.0- 7.0 sec  52.1 MBytes   437 Mbits/sec
>  [  4]  7.0- 8.0 sec  52.3 MBytes   439 Mbits/sec

Strange, as you still call netif_rx()

NAPI should call netif_receive_skb() instead



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Waskiewicz Jr, Peter P Jan. 23, 2013, 6:37 a.m. UTC | #2
On Wed, Jan 23, 2013 at 12:12:10PM +0800, Frank Li wrote:

[...]

> diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
> index f52ba33..8fa420c 100644
> --- a/drivers/net/ethernet/freescale/fec.c
> +++ b/drivers/net/ethernet/freescale/fec.c
> @@ -67,6 +67,8 @@
>  #endif
>  
>  #define DRIVER_NAME	"fec"
> +#define FEC_NAPI_WEIGHT	64
> +#define INT32_MAX	0x7FFFFFFF

This seems awfully big as an upper-limit to your ISR processing.  See below
for more comments.

>  
>  /* Pause frame feild and FIFO threshold */
>  #define FEC_ENET_FCE	(1 << 5)
> @@ -565,6 +567,20 @@ fec_timeout(struct net_device *ndev)
>  }
>  
>  static void
> +fec_enet_rx_int_is_enabled(struct net_device *ndev, bool enabled)
> +{
> +	struct fec_enet_private *fep = netdev_priv(ndev);
> +	uint    int_events;
> +
> +	int_events = readl(fep->hwp + FEC_IMASK);
> +	if (enabled)
> +		int_events |= FEC_ENET_RXF;
> +	else
> +		int_events &= ~FEC_ENET_RXF;
> +	writel(int_events, fep->hwp + FEC_IMASK);
> +}
> +
> +static void
>  fec_enet_tx(struct net_device *ndev)
>  {
>  	struct	fec_enet_private *fep;
> @@ -656,8 +672,8 @@ fec_enet_tx(struct net_device *ndev)
>   * not been given to the system, we just set the empty indicator,
>   * effectively tossing the packet.
>   */
> -static void
> -fec_enet_rx(struct net_device *ndev)
> +static int
> +fec_enet_rx(struct net_device *ndev, int budget)
>  {
>  	struct fec_enet_private *fep = netdev_priv(ndev);
>  	const struct platform_device_id *id_entry =
> @@ -667,13 +683,12 @@ fec_enet_rx(struct net_device *ndev)
>  	struct	sk_buff	*skb;
>  	ushort	pkt_len;
>  	__u8 *data;
> +	int	pkt_received = 0;
>  
>  #ifdef CONFIG_M532x
>  	flush_cache_all();
>  #endif
>  
> -	spin_lock(&fep->hw_lock);
> -
>  	/* First, grab all of the stats for the incoming packet.
>  	 * These get messed up if we get called due to a busy condition.
>  	 */
> @@ -681,6 +696,10 @@ fec_enet_rx(struct net_device *ndev)
>  
>  	while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
>  
> +		if (pkt_received >= budget)
> +			break;
> +		pkt_received++;
> +

This seems wrong given how it's called from the ISR when NAPI isn't being
used.  If you have continuous packets coming in, you won't exit your ISR
and release your spin_lock until you hit INT32_MAX.

>  		/* Since we have allocated space to hold a complete frame,
>  		 * the last indicator should be set.
>  		 */
> @@ -796,7 +815,7 @@ rx_processing_done:
>  	}
>  	fep->cur_rx = bdp;
>  
> -	spin_unlock(&fep->hw_lock);
> +	return pkt_received;
>  }
>  
>  static irqreturn_t
> @@ -805,6 +824,7 @@ fec_enet_interrupt(int irq, void *dev_id)
>  	struct net_device *ndev = dev_id;
>  	struct fec_enet_private *fep = netdev_priv(ndev);
>  	uint int_events;
> +	ulong flags;
>  	irqreturn_t ret = IRQ_NONE;
>  
>  	do {
> @@ -813,7 +833,18 @@ fec_enet_interrupt(int irq, void *dev_id)
>  
>  		if (int_events & FEC_ENET_RXF) {
>  			ret = IRQ_HANDLED;
> -			fec_enet_rx(ndev);
> +			spin_lock_irqsave(&fep->hw_lock, flags);
> +
> +			if (fep->use_napi) {
> +				/* Disable the RX interrupt */
> +				if (napi_schedule_prep(&fep->napi)) {
> +					fec_enet_rx_int_is_enabled(ndev, false);
> +					__napi_schedule(&fep->napi);
> +				}
> +			} else
> +				fec_enet_rx(ndev, INT32_MAX);

As mentioned above, you may want to check this a bit closer since this
can run for a very, very long time if you have constant packets flowing
when not using NAPI.

Also, the code formatting here should also have braces around the else
clause since the if clause has them.

> +
> +			spin_unlock_irqrestore(&fep->hw_lock, flags);
>  		}
>  
>  		/* Transmit OK, or non-fatal error. Update the buffer
> @@ -834,7 +865,16 @@ fec_enet_interrupt(int irq, void *dev_id)
>  	return ret;
>  }
>  
> -
> +static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
> +{
> +	struct net_device *ndev = napi->dev;
> +	int pkgs = fec_enet_rx(ndev, budget);
> +	if (pkgs < budget) {
> +		napi_complete(napi);
> +		fec_enet_rx_int_is_enabled(ndev, true);
> +	}
> +	return pkgs;
> +}
>  
>  /* ------------------------------------------------------------------------- */
>  static void fec_get_mac(struct net_device *ndev)
> @@ -1392,6 +1432,9 @@ fec_enet_open(struct net_device *ndev)
>  	struct fec_enet_private *fep = netdev_priv(ndev);
>  	int ret;
>  
> +	if (fep->use_napi)
> +		napi_enable(&fep->napi);
> +
>  	/* I should reset the ring buffers here, but I don't yet know
>  	 * a simple way to do that.
>  	 */
> @@ -1604,6 +1647,11 @@ static int fec_enet_init(struct net_device *ndev)
>  	ndev->netdev_ops = &fec_netdev_ops;
>  	ndev->ethtool_ops = &fec_enet_ethtool_ops;
>  
> +	if (fep->use_napi) {
> +		fec_enet_rx_int_is_enabled(ndev, false);
> +		netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, fep->napi_weight);

Make sure not to exceed 80 characters here.

> +	}
> +
>  	/* Initialize the receive buffer descriptors. */
>  	bdp = fep->rx_bd_base;
>  	for (i = 0; i < RX_RING_SIZE; i++) {
> @@ -1698,6 +1746,7 @@ fec_probe(struct platform_device *pdev)
>  	static int dev_id;
>  	struct pinctrl *pinctrl;
>  	struct regulator *reg_phy;
> +	struct device_node *np = pdev->dev.of_node;
>  
>  	of_id = of_match_device(fec_dt_ids, &pdev->dev);
>  	if (of_id)
> @@ -1811,6 +1860,11 @@ fec_probe(struct platform_device *pdev)
>  		}
>  	}
>  
> +	fep->use_napi = !of_property_read_bool(np, "disable_napi");
> +
> +	if (of_property_read_u32(np, "napi_weight", &fep->napi_weight))
> +		fep->napi_weight = FEC_NAPI_WEIGHT; /*using default value*/
> +
>  	fec_reset_phy(pdev);
>  
>  	ret = fec_enet_init(ndev);
> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
> index 2ebedaf..31fcdd0 100644
> --- a/drivers/net/ethernet/freescale/fec.h
> +++ b/drivers/net/ethernet/freescale/fec.h
> @@ -249,6 +249,10 @@ struct fec_enet_private {
>  	int	bufdesc_ex;
>  	int	pause_flag;
>  
> +	struct	napi_struct napi;
> +	int	napi_weight;
> +	bool	use_napi;
> +
>  	struct ptp_clock *ptp_clock;
>  	struct ptp_clock_info ptp_caps;
>  	unsigned long last_overflow_check;
> -- 
> 1.7.1
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sascha Hauer Jan. 23, 2013, 7:26 a.m. UTC | #3
On Wed, Jan 23, 2013 at 12:12:10PM +0800, Frank Li wrote:
> Add napi support
> 
> Before this patch
> 
>  iperf -s -i 1
>  ------------------------------------------------------------
>  Server listening on TCP port 5001
>  TCP window size: 85.3 KByte (default)
>  ------------------------------------------------------------
>  [  4] local 10.192.242.153 port 5001 connected with 10.192.242.138 port 50004
>  [ ID] Interval       Transfer     Bandwidth
>  [  4]  0.0- 1.0 sec  41.2 MBytes   345 Mbits/sec
>  [  4]  1.0- 2.0 sec  43.7 MBytes   367 Mbits/sec
>  [  4]  2.0- 3.0 sec  42.8 MBytes   359 Mbits/sec
>  [  4]  3.0- 4.0 sec  43.7 MBytes   367 Mbits/sec
>  [  4]  4.0- 5.0 sec  42.7 MBytes   359 Mbits/sec
>  [  4]  5.0- 6.0 sec  43.8 MBytes   367 Mbits/sec
>  [  4]  6.0- 7.0 sec  43.0 MBytes   361 Mbits/sec
> 
> After this patch
>  [  4]  2.0- 3.0 sec  51.6 MBytes   433 Mbits/sec
>  [  4]  3.0- 4.0 sec  51.8 MBytes   435 Mbits/sec
>  [  4]  4.0- 5.0 sec  52.2 MBytes   438 Mbits/sec
>  [  4]  5.0- 6.0 sec  52.1 MBytes   437 Mbits/sec
>  [  4]  6.0- 7.0 sec  52.1 MBytes   437 Mbits/sec
>  [  4]  7.0- 8.0 sec  52.3 MBytes   439 Mbits/sec
> 
> Signed-off-by: Frank Li <Frank.Li@freescale.com>
> Signed-off-by: Fugang Duan <B38611@freescale.com>
> ---
>  drivers/net/ethernet/freescale/fec.c |   68 ++++++++++++++++++++++++++++++---
>  drivers/net/ethernet/freescale/fec.h |    4 ++
>  2 files changed, 65 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
> index f52ba33..8fa420c 100644
> --- a/drivers/net/ethernet/freescale/fec.c
> +++ b/drivers/net/ethernet/freescale/fec.c
> @@ -67,6 +67,8 @@
>  #endif
>  
>  #define DRIVER_NAME	"fec"
> +#define FEC_NAPI_WEIGHT	64
> +#define INT32_MAX	0x7FFFFFFF
>  
>  /* Pause frame feild and FIFO threshold */
>  #define FEC_ENET_FCE	(1 << 5)
> @@ -565,6 +567,20 @@ fec_timeout(struct net_device *ndev)
>  }
>  
>  static void
> +fec_enet_rx_int_is_enabled(struct net_device *ndev, bool enabled)
> +{
> +	struct fec_enet_private *fep = netdev_priv(ndev);
> +	uint    int_events;
> +
> +	int_events = readl(fep->hwp + FEC_IMASK);
> +	if (enabled)
> +		int_events |= FEC_ENET_RXF;
> +	else
> +		int_events &= ~FEC_ENET_RXF;
> +	writel(int_events, fep->hwp + FEC_IMASK);
> +}
> +
> +static void
>  fec_enet_tx(struct net_device *ndev)
>  {
>  	struct	fec_enet_private *fep;
> @@ -656,8 +672,8 @@ fec_enet_tx(struct net_device *ndev)
>   * not been given to the system, we just set the empty indicator,
>   * effectively tossing the packet.
>   */
> -static void
> -fec_enet_rx(struct net_device *ndev)
> +static int
> +fec_enet_rx(struct net_device *ndev, int budget)
>  {
>  	struct fec_enet_private *fep = netdev_priv(ndev);
>  	const struct platform_device_id *id_entry =
> @@ -667,13 +683,12 @@ fec_enet_rx(struct net_device *ndev)
>  	struct	sk_buff	*skb;
>  	ushort	pkt_len;
>  	__u8 *data;
> +	int	pkt_received = 0;
>  
>  #ifdef CONFIG_M532x
>  	flush_cache_all();
>  #endif
>  
> -	spin_lock(&fep->hw_lock);
> -
>  	/* First, grab all of the stats for the incoming packet.
>  	 * These get messed up if we get called due to a busy condition.
>  	 */
> @@ -681,6 +696,10 @@ fec_enet_rx(struct net_device *ndev)
>  
>  	while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
>  
> +		if (pkt_received >= budget)
> +			break;
> +		pkt_received++;
> +
>  		/* Since we have allocated space to hold a complete frame,
>  		 * the last indicator should be set.
>  		 */
> @@ -796,7 +815,7 @@ rx_processing_done:
>  	}
>  	fep->cur_rx = bdp;
>  
> -	spin_unlock(&fep->hw_lock);
> +	return pkt_received;
>  }
>  
>  static irqreturn_t
> @@ -805,6 +824,7 @@ fec_enet_interrupt(int irq, void *dev_id)
>  	struct net_device *ndev = dev_id;
>  	struct fec_enet_private *fep = netdev_priv(ndev);
>  	uint int_events;
> +	ulong flags;
>  	irqreturn_t ret = IRQ_NONE;
>  
>  	do {
> @@ -813,7 +833,18 @@ fec_enet_interrupt(int irq, void *dev_id)
>  
>  		if (int_events & FEC_ENET_RXF) {
>  			ret = IRQ_HANDLED;
> -			fec_enet_rx(ndev);
> +			spin_lock_irqsave(&fep->hw_lock, flags);
> +
> +			if (fep->use_napi) {
> +				/* Disable the RX interrupt */
> +				if (napi_schedule_prep(&fep->napi)) {
> +					fec_enet_rx_int_is_enabled(ndev, false);
> +					__napi_schedule(&fep->napi);
> +				}
> +			} else
> +				fec_enet_rx(ndev, INT32_MAX);
> +
> +			spin_unlock_irqrestore(&fep->hw_lock, flags);
>  		}
>  
>  		/* Transmit OK, or non-fatal error. Update the buffer
> @@ -834,7 +865,16 @@ fec_enet_interrupt(int irq, void *dev_id)
>  	return ret;
>  }
>  
> -
> +static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
> +{
> +	struct net_device *ndev = napi->dev;
> +	int pkgs = fec_enet_rx(ndev, budget);
> +	if (pkgs < budget) {
> +		napi_complete(napi);
> +		fec_enet_rx_int_is_enabled(ndev, true);
> +	}
> +	return pkgs;
> +}
>  
>  /* ------------------------------------------------------------------------- */
>  static void fec_get_mac(struct net_device *ndev)
> @@ -1392,6 +1432,9 @@ fec_enet_open(struct net_device *ndev)
>  	struct fec_enet_private *fep = netdev_priv(ndev);
>  	int ret;
>  
> +	if (fep->use_napi)
> +		napi_enable(&fep->napi);
> +
>  	/* I should reset the ring buffers here, but I don't yet know
>  	 * a simple way to do that.
>  	 */
> @@ -1604,6 +1647,11 @@ static int fec_enet_init(struct net_device *ndev)
>  	ndev->netdev_ops = &fec_netdev_ops;
>  	ndev->ethtool_ops = &fec_enet_ethtool_ops;
>  
> +	if (fep->use_napi) {
> +		fec_enet_rx_int_is_enabled(ndev, false);
> +		netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, fep->napi_weight);
> +	}
> +
>  	/* Initialize the receive buffer descriptors. */
>  	bdp = fep->rx_bd_base;
>  	for (i = 0; i < RX_RING_SIZE; i++) {
> @@ -1698,6 +1746,7 @@ fec_probe(struct platform_device *pdev)
>  	static int dev_id;
>  	struct pinctrl *pinctrl;
>  	struct regulator *reg_phy;
> +	struct device_node *np = pdev->dev.of_node;
>  
>  	of_id = of_match_device(fec_dt_ids, &pdev->dev);
>  	if (of_id)
> @@ -1811,6 +1860,11 @@ fec_probe(struct platform_device *pdev)
>  		}
>  	}
>  
> +	fep->use_napi = !of_property_read_bool(np, "disable_napi");
> +
> +	if (of_property_read_u32(np, "napi_weight", &fep->napi_weight))
> +		fep->napi_weight = FEC_NAPI_WEIGHT; /*using default value*/

This comment seems rather unnecessary. Also it should be

	/* space left and right /*

devicetree bindings should use '-' instead of '_'. The binding
Documentation should be in the patch adding the bindings. Also
make sure the devicetree parsing is outside of the path for platform
based probing.

With this particular binding I'm unsure it should exist anyway since
it's configuration rather than hardware description. The devicetree
normally is for hardware description only.

Sascha
Zhi Li Jan. 23, 2013, 7:37 a.m. UTC | #4
2013/1/23 Eric Dumazet <eric.dumazet@gmail.com>:
> On Wed, 2013-01-23 at 12:12 +0800, Frank Li wrote:
>> Add napi support
>>
>> Before this patch
>>
>>  iperf -s -i 1
>>  ------------------------------------------------------------
>>  Server listening on TCP port 5001
>>  TCP window size: 85.3 KByte (default)
>>  ------------------------------------------------------------
>>  [  4] local 10.192.242.153 port 5001 connected with 10.192.242.138 port 50004
>>  [ ID] Interval       Transfer     Bandwidth
>>  [  4]  0.0- 1.0 sec  41.2 MBytes   345 Mbits/sec
>>  [  4]  1.0- 2.0 sec  43.7 MBytes   367 Mbits/sec
>>  [  4]  2.0- 3.0 sec  42.8 MBytes   359 Mbits/sec
>>  [  4]  3.0- 4.0 sec  43.7 MBytes   367 Mbits/sec
>>  [  4]  4.0- 5.0 sec  42.7 MBytes   359 Mbits/sec
>>  [  4]  5.0- 6.0 sec  43.8 MBytes   367 Mbits/sec
>>  [  4]  6.0- 7.0 sec  43.0 MBytes   361 Mbits/sec
>>
>> After this patch
>>  [  4]  2.0- 3.0 sec  51.6 MBytes   433 Mbits/sec
>>  [  4]  3.0- 4.0 sec  51.8 MBytes   435 Mbits/sec
>>  [  4]  4.0- 5.0 sec  52.2 MBytes   438 Mbits/sec
>>  [  4]  5.0- 6.0 sec  52.1 MBytes   437 Mbits/sec
>>  [  4]  6.0- 7.0 sec  52.1 MBytes   437 Mbits/sec
>>  [  4]  7.0- 8.0 sec  52.3 MBytes   439 Mbits/sec
>
> Strange, as you still call netif_rx()
>
> NAPI should call netif_receive_skb() instead
>

Thank you point out.
After re-test, I found performance is almost no change if use netif_receive_skb.
I am not sure if it is my NAPI implement problem.

napi_gro_received is better than netif_receive_skb, but worse than netif_rx.

From performance point view,

netif_rx                    --- fastest
napi_gro_received   --- middle, near to netif_rx
netif_receive_skb    --- slowest, almost the same as original no-napi version.

Do you have any idea about this phenomena?

best regards
Frank Li

>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhi Li Jan. 23, 2013, 7:44 a.m. UTC | #5
> This comment seems rather unnecessary. Also it should be
>
>         /* space left and right /*
>
> devicetree bindings should use '-' instead of '_'. The binding
> Documentation should be in the patch adding the bindings. Also
> make sure the devicetree parsing is outside of the path for platform
> based probing.
>
> With this particular binding I'm unsure it should exist anyway since
> it's configuration rather than hardware description. The devicetree
> normally is for hardware description only.
>

What's your opinion?
Using module parameter, device-tree to config it or the other method?

best regards
Frank Li
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Jan. 23, 2013, 7:52 a.m. UTC | #6
NAPI should not be configurable, all device drivers should be NAPI
only.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sascha Hauer Jan. 23, 2013, 7:53 a.m. UTC | #7
On Wed, Jan 23, 2013 at 03:44:53PM +0800, Frank Li wrote:
> > This comment seems rather unnecessary. Also it should be
> >
> >         /* space left and right /*
> >
> > devicetree bindings should use '-' instead of '_'. The binding
> > Documentation should be in the patch adding the bindings. Also
> > make sure the devicetree parsing is outside of the path for platform
> > based probing.
> >
> > With this particular binding I'm unsure it should exist anyway since
> > it's configuration rather than hardware description. The devicetree
> > normally is for hardware description only.
> >
> 
> What's your opinion?
> Using module parameter, device-tree to config it or the other method?

Why do you want to make it configurable anyway? So far no other driver
seems to need this. My suggestion is to make it unconfigurable.

For adding properties like this to the devicetree remember that they
should be OS agnostic. napi is a implementation detail relevant only
for Linux. And even Linux may decide to drop napi support in favour for
something better in the future.

Sascha
Zhi Li Jan. 23, 2013, 7:56 a.m. UTC | #8
2013/1/23 Waskiewicz Jr, Peter P <peter.p.waskiewicz.jr@intel.com>:
> On Wed, Jan 23, 2013 at 12:12:10PM +0800, Frank Li wrote:
>
> [...]
>
>> diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
>> index f52ba33..8fa420c 100644
>> --- a/drivers/net/ethernet/freescale/fec.c
>> +++ b/drivers/net/ethernet/freescale/fec.c
>> @@ -67,6 +67,8 @@
>>  #endif
>>
>>  #define DRIVER_NAME  "fec"
>> +#define FEC_NAPI_WEIGHT      64
>> +#define INT32_MAX    0x7FFFFFFF
>
> This seems awfully big as an upper-limit to your ISR processing.  See below
> for more comments.
>
>>
>>  /* Pause frame feild and FIFO threshold */
>>  #define FEC_ENET_FCE (1 << 5)
>> @@ -565,6 +567,20 @@ fec_timeout(struct net_device *ndev)
>>  }
>>
>>  static void
>> +fec_enet_rx_int_is_enabled(struct net_device *ndev, bool enabled)
>> +{
>> +     struct fec_enet_private *fep = netdev_priv(ndev);
>> +     uint    int_events;
>> +
>> +     int_events = readl(fep->hwp + FEC_IMASK);
>> +     if (enabled)
>> +             int_events |= FEC_ENET_RXF;
>> +     else
>> +             int_events &= ~FEC_ENET_RXF;
>> +     writel(int_events, fep->hwp + FEC_IMASK);
>> +}
>> +
>> +static void
>>  fec_enet_tx(struct net_device *ndev)
>>  {
>>       struct  fec_enet_private *fep;
>> @@ -656,8 +672,8 @@ fec_enet_tx(struct net_device *ndev)
>>   * not been given to the system, we just set the empty indicator,
>>   * effectively tossing the packet.
>>   */
>> -static void
>> -fec_enet_rx(struct net_device *ndev)
>> +static int
>> +fec_enet_rx(struct net_device *ndev, int budget)
>>  {
>>       struct fec_enet_private *fep = netdev_priv(ndev);
>>       const struct platform_device_id *id_entry =
>> @@ -667,13 +683,12 @@ fec_enet_rx(struct net_device *ndev)
>>       struct  sk_buff *skb;
>>       ushort  pkt_len;
>>       __u8 *data;
>> +     int     pkt_received = 0;
>>
>>  #ifdef CONFIG_M532x
>>       flush_cache_all();
>>  #endif
>>
>> -     spin_lock(&fep->hw_lock);
>> -
>>       /* First, grab all of the stats for the incoming packet.
>>        * These get messed up if we get called due to a busy condition.
>>        */
>> @@ -681,6 +696,10 @@ fec_enet_rx(struct net_device *ndev)
>>
>>       while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
>>
>> +             if (pkt_received >= budget)
>> +                     break;
>> +             pkt_received++;
>> +
>
> This seems wrong given how it's called from the ISR when NAPI isn't being
> used.  If you have continuous packets coming in, you won't exit your ISR
> and release your spin_lock until you hit INT32_MAX.

I just the keep original execute path if napi disabled.
Original code will progress all packages in queue.
FEC is 1G network, must receive speed around 500Mbps.
CPU speed is much faster than received package.
and RX queue only 8 entry, I think it is impossible to run loop more than 10.

why I set to INT32_MAX is keep the original execute path when napi disable.
INT32_MAX is impossible value to exit loop for this case.

>
>>               /* Since we have allocated space to hold a complete frame,
>>                * the last indicator should be set.
>>                */
>> @@ -796,7 +815,7 @@ rx_processing_done:
>>       }
>>       fep->cur_rx = bdp;
>>
>> -     spin_unlock(&fep->hw_lock);
>> +     return pkt_received;
>>  }
>>
>>  static irqreturn_t
>> @@ -805,6 +824,7 @@ fec_enet_interrupt(int irq, void *dev_id)
>>       struct net_device *ndev = dev_id;
>>       struct fec_enet_private *fep = netdev_priv(ndev);
>>       uint int_events;
>> +     ulong flags;
>>       irqreturn_t ret = IRQ_NONE;
>>
>>       do {
>> @@ -813,7 +833,18 @@ fec_enet_interrupt(int irq, void *dev_id)
>>
>>               if (int_events & FEC_ENET_RXF) {
>>                       ret = IRQ_HANDLED;
>> -                     fec_enet_rx(ndev);
>> +                     spin_lock_irqsave(&fep->hw_lock, flags);
>> +
>> +                     if (fep->use_napi) {
>> +                             /* Disable the RX interrupt */
>> +                             if (napi_schedule_prep(&fep->napi)) {
>> +                                     fec_enet_rx_int_is_enabled(ndev, false);
>> +                                     __napi_schedule(&fep->napi);
>> +                             }
>> +                     } else
>> +                             fec_enet_rx(ndev, INT32_MAX);
>
> As mentioned above, you may want to check this a bit closer since this
> can run for a very, very long time if you have constant packets flowing
> when not using NAPI.
>
> Also, the code formatting here should also have braces around the else
> clause since the if clause has them.

Okay, I will update in v2 patch.

>
>> +
>> +                     spin_unlock_irqrestore(&fep->hw_lock, flags);
>>               }
>>
>>               /* Transmit OK, or non-fatal error. Update the buffer
>> @@ -834,7 +865,16 @@ fec_enet_interrupt(int irq, void *dev_id)
>>       return ret;
>>  }
>>
>> -
>> +static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
>> +{
>> +     struct net_device *ndev = napi->dev;
>> +     int pkgs = fec_enet_rx(ndev, budget);
>> +     if (pkgs < budget) {
>> +             napi_complete(napi);
>> +             fec_enet_rx_int_is_enabled(ndev, true);
>> +     }
>> +     return pkgs;
>> +}
>>
>>  /* ------------------------------------------------------------------------- */
>>  static void fec_get_mac(struct net_device *ndev)
>> @@ -1392,6 +1432,9 @@ fec_enet_open(struct net_device *ndev)
>>       struct fec_enet_private *fep = netdev_priv(ndev);
>>       int ret;
>>
>> +     if (fep->use_napi)
>> +             napi_enable(&fep->napi);
>> +
>>       /* I should reset the ring buffers here, but I don't yet know
>>        * a simple way to do that.
>>        */
>> @@ -1604,6 +1647,11 @@ static int fec_enet_init(struct net_device *ndev)
>>       ndev->netdev_ops = &fec_netdev_ops;
>>       ndev->ethtool_ops = &fec_enet_ethtool_ops;
>>
>> +     if (fep->use_napi) {
>> +             fec_enet_rx_int_is_enabled(ndev, false);
>> +             netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, fep->napi_weight);
>
> Make sure not to exceed 80 characters here.
>

Okay, I will update in v2 patch.

>> +     }
>> +
>>       /* Initialize the receive buffer descriptors. */
>>       bdp = fep->rx_bd_base;
>>       for (i = 0; i < RX_RING_SIZE; i++) {
>> @@ -1698,6 +1746,7 @@ fec_probe(struct platform_device *pdev)
>>       static int dev_id;
>>       struct pinctrl *pinctrl;
>>       struct regulator *reg_phy;
>> +     struct device_node *np = pdev->dev.of_node;
>>
>>       of_id = of_match_device(fec_dt_ids, &pdev->dev);
>>       if (of_id)
>> @@ -1811,6 +1860,11 @@ fec_probe(struct platform_device *pdev)
>>               }
>>       }
>>
>> +     fep->use_napi = !of_property_read_bool(np, "disable_napi");
>> +
>> +     if (of_property_read_u32(np, "napi_weight", &fep->napi_weight))
>> +             fep->napi_weight = FEC_NAPI_WEIGHT; /*using default value*/
>> +
>>       fec_reset_phy(pdev);
>>
>>       ret = fec_enet_init(ndev);
>> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
>> index 2ebedaf..31fcdd0 100644
>> --- a/drivers/net/ethernet/freescale/fec.h
>> +++ b/drivers/net/ethernet/freescale/fec.h
>> @@ -249,6 +249,10 @@ struct fec_enet_private {
>>       int     bufdesc_ex;
>>       int     pause_flag;
>>
>> +     struct  napi_struct napi;
>> +     int     napi_weight;
>> +     bool    use_napi;
>> +
>>       struct ptp_clock *ptp_clock;
>>       struct ptp_clock_info ptp_caps;
>>       unsigned long last_overflow_check;
>> --
>> 1.7.1
>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhi Li Jan. 23, 2013, 8 a.m. UTC | #9
> Why do you want to make it configurable anyway? So far no other driver
> seems to need this. My suggestion is to make it unconfigurable.
>
> For adding properties like this to the devicetree remember that they
> should be OS agnostic. napi is a implementation detail relevant only
> for Linux. And even Linux may decide to drop napi support in favour for
> something better in the future.
>

Okay, I will remove such config in v2 version.

best regards
Frank Li
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Jan. 23, 2013, 1:49 p.m. UTC | #10
On Wed, 2013-01-23 at 15:37 +0800, Frank Li wrote:
> 2013/1/23 Eric Dumazet <eric.dumazet@gmail.com>:
> > On Wed, 2013-01-23 at 12:12 +0800, Frank Li wrote:
> >> Add napi support
> >>
> >> Before this patch
> >>
> >>  iperf -s -i 1
> >>  ------------------------------------------------------------
> >>  Server listening on TCP port 5001
> >>  TCP window size: 85.3 KByte (default)
> >>  ------------------------------------------------------------
> >>  [  4] local 10.192.242.153 port 5001 connected with 10.192.242.138 port 50004
> >>  [ ID] Interval       Transfer     Bandwidth
> >>  [  4]  0.0- 1.0 sec  41.2 MBytes   345 Mbits/sec
> >>  [  4]  1.0- 2.0 sec  43.7 MBytes   367 Mbits/sec
> >>  [  4]  2.0- 3.0 sec  42.8 MBytes   359 Mbits/sec
> >>  [  4]  3.0- 4.0 sec  43.7 MBytes   367 Mbits/sec
> >>  [  4]  4.0- 5.0 sec  42.7 MBytes   359 Mbits/sec
> >>  [  4]  5.0- 6.0 sec  43.8 MBytes   367 Mbits/sec
> >>  [  4]  6.0- 7.0 sec  43.0 MBytes   361 Mbits/sec
> >>
> >> After this patch
> >>  [  4]  2.0- 3.0 sec  51.6 MBytes   433 Mbits/sec
> >>  [  4]  3.0- 4.0 sec  51.8 MBytes   435 Mbits/sec
> >>  [  4]  4.0- 5.0 sec  52.2 MBytes   438 Mbits/sec
> >>  [  4]  5.0- 6.0 sec  52.1 MBytes   437 Mbits/sec
> >>  [  4]  6.0- 7.0 sec  52.1 MBytes   437 Mbits/sec
> >>  [  4]  7.0- 8.0 sec  52.3 MBytes   439 Mbits/sec
> >
> > Strange, as you still call netif_rx()
> >
> > NAPI should call netif_receive_skb() instead
> >
> 
> Thank you point out.
> After re-test, I found performance is almost no change if use netif_receive_skb.
> I am not sure if it is my NAPI implement problem.
> 
> napi_gro_received is better than netif_receive_skb, but worse than netif_rx.
> 
> From performance point view,
> 
> netif_rx                    --- fastest
> napi_gro_received   --- middle, near to netif_rx
> netif_receive_skb    --- slowest, almost the same as original no-napi version.
> 
> Do you have any idea about this phenomena?

No idea, you'll have to find out using perf tool if available.

Is your machine SMP, and the application running on another cpu than the
softirq handler for your device ?

A NAPI driver must call netif_receive_skb(), especially if
the RX path does a full copy of the frame : Its hot in cpu cache and
should be processed at once.

Escaping to netif_rx() is only adding an extra softirq and risk of data
being evicted from cpu caches.

Here your performance increase only comes from hw_lock being not anymore
locked in RX path.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhi Li Jan. 24, 2013, 2:26 a.m. UTC | #11
2013/1/23 Eric Dumazet <eric.dumazet@gmail.com>:
> On Wed, 2013-01-23 at 15:37 +0800, Frank Li wrote:
>> 2013/1/23 Eric Dumazet <eric.dumazet@gmail.com>:
>> > On Wed, 2013-01-23 at 12:12 +0800, Frank Li wrote:
>> >> Add napi support
>> >>
>> >> Before this patch
>> >>
>> >>  iperf -s -i 1
>> >>  ------------------------------------------------------------
>> >>  Server listening on TCP port 5001
>> >>  TCP window size: 85.3 KByte (default)
>> >>  ------------------------------------------------------------
>> >>  [  4] local 10.192.242.153 port 5001 connected with 10.192.242.138 port 50004
>> >>  [ ID] Interval       Transfer     Bandwidth
>> >>  [  4]  0.0- 1.0 sec  41.2 MBytes   345 Mbits/sec
>> >>  [  4]  1.0- 2.0 sec  43.7 MBytes   367 Mbits/sec
>> >>  [  4]  2.0- 3.0 sec  42.8 MBytes   359 Mbits/sec
>> >>  [  4]  3.0- 4.0 sec  43.7 MBytes   367 Mbits/sec
>> >>  [  4]  4.0- 5.0 sec  42.7 MBytes   359 Mbits/sec
>> >>  [  4]  5.0- 6.0 sec  43.8 MBytes   367 Mbits/sec
>> >>  [  4]  6.0- 7.0 sec  43.0 MBytes   361 Mbits/sec
>> >>
>> >> After this patch
>> >>  [  4]  2.0- 3.0 sec  51.6 MBytes   433 Mbits/sec
>> >>  [  4]  3.0- 4.0 sec  51.8 MBytes   435 Mbits/sec
>> >>  [  4]  4.0- 5.0 sec  52.2 MBytes   438 Mbits/sec
>> >>  [  4]  5.0- 6.0 sec  52.1 MBytes   437 Mbits/sec
>> >>  [  4]  6.0- 7.0 sec  52.1 MBytes   437 Mbits/sec
>> >>  [  4]  7.0- 8.0 sec  52.3 MBytes   439 Mbits/sec
>> >
>> > Strange, as you still call netif_rx()
>> >
>> > NAPI should call netif_receive_skb() instead
>> >
>>
>> Thank you point out.
>> After re-test, I found performance is almost no change if use netif_receive_skb.
>> I am not sure if it is my NAPI implement problem.
>>
>> napi_gro_received is better than netif_receive_skb, but worse than netif_rx.
>>
>> From performance point view,
>>
>> netif_rx                    --- fastest
>> napi_gro_received   --- middle, near to netif_rx
>> netif_receive_skb    --- slowest, almost the same as original no-napi version.
>>
>> Do you have any idea about this phenomena?
>
> No idea, you'll have to find out using perf tool if available.
>
> Is your machine SMP, and the application running on another cpu than the
> softirq handler for your device ?

Yes, we support SMP.  Possibly run on another cpu. I will check.

>
> A NAPI driver must call netif_receive_skb(), especially if
> the RX path does a full copy of the frame : Its hot in cpu cache and
> should be processed at once.

How about napi_gro_receive? is it correct function? I found many driver use it.

>
> Escaping to netif_rx() is only adding an extra softirq and risk of data
> being evicted from cpu caches.
>
> Here your performance increase only comes from hw_lock being not anymore
> locked in RX path.
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Waskiewicz Jr, Peter P Jan. 24, 2013, 6:39 a.m. UTC | #12
On Thu, Jan 24, 2013 at 10:26:00AM +0800, Frank Li wrote:
> > A NAPI driver must call netif_receive_skb(), especially if
> > the RX path does a full copy of the frame : Its hot in cpu cache and
> > should be processed at once.
> 
> How about napi_gro_receive? is it correct function? I found many driver use it.

If you want to coalesce your Rx frames to reduce overhead, then yes.  It's
a good performance improvement overall.

-PJ
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
index f52ba33..8fa420c 100644
--- a/drivers/net/ethernet/freescale/fec.c
+++ b/drivers/net/ethernet/freescale/fec.c
@@ -67,6 +67,8 @@ 
 #endif
 
 #define DRIVER_NAME	"fec"
+#define FEC_NAPI_WEIGHT	64
+#define INT32_MAX	0x7FFFFFFF
 
 /* Pause frame feild and FIFO threshold */
 #define FEC_ENET_FCE	(1 << 5)
@@ -565,6 +567,20 @@  fec_timeout(struct net_device *ndev)
 }
 
 static void
+fec_enet_rx_int_is_enabled(struct net_device *ndev, bool enabled)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	uint    int_events;
+
+	int_events = readl(fep->hwp + FEC_IMASK);
+	if (enabled)
+		int_events |= FEC_ENET_RXF;
+	else
+		int_events &= ~FEC_ENET_RXF;
+	writel(int_events, fep->hwp + FEC_IMASK);
+}
+
+static void
 fec_enet_tx(struct net_device *ndev)
 {
 	struct	fec_enet_private *fep;
@@ -656,8 +672,8 @@  fec_enet_tx(struct net_device *ndev)
  * not been given to the system, we just set the empty indicator,
  * effectively tossing the packet.
  */
-static void
-fec_enet_rx(struct net_device *ndev)
+static int
+fec_enet_rx(struct net_device *ndev, int budget)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	const struct platform_device_id *id_entry =
@@ -667,13 +683,12 @@  fec_enet_rx(struct net_device *ndev)
 	struct	sk_buff	*skb;
 	ushort	pkt_len;
 	__u8 *data;
+	int	pkt_received = 0;
 
 #ifdef CONFIG_M532x
 	flush_cache_all();
 #endif
 
-	spin_lock(&fep->hw_lock);
-
 	/* First, grab all of the stats for the incoming packet.
 	 * These get messed up if we get called due to a busy condition.
 	 */
@@ -681,6 +696,10 @@  fec_enet_rx(struct net_device *ndev)
 
 	while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
 
+		if (pkt_received >= budget)
+			break;
+		pkt_received++;
+
 		/* Since we have allocated space to hold a complete frame,
 		 * the last indicator should be set.
 		 */
@@ -796,7 +815,7 @@  rx_processing_done:
 	}
 	fep->cur_rx = bdp;
 
-	spin_unlock(&fep->hw_lock);
+	return pkt_received;
 }
 
 static irqreturn_t
@@ -805,6 +824,7 @@  fec_enet_interrupt(int irq, void *dev_id)
 	struct net_device *ndev = dev_id;
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	uint int_events;
+	ulong flags;
 	irqreturn_t ret = IRQ_NONE;
 
 	do {
@@ -813,7 +833,18 @@  fec_enet_interrupt(int irq, void *dev_id)
 
 		if (int_events & FEC_ENET_RXF) {
 			ret = IRQ_HANDLED;
-			fec_enet_rx(ndev);
+			spin_lock_irqsave(&fep->hw_lock, flags);
+
+			if (fep->use_napi) {
+				/* Disable the RX interrupt */
+				if (napi_schedule_prep(&fep->napi)) {
+					fec_enet_rx_int_is_enabled(ndev, false);
+					__napi_schedule(&fep->napi);
+				}
+			} else
+				fec_enet_rx(ndev, INT32_MAX);
+
+			spin_unlock_irqrestore(&fep->hw_lock, flags);
 		}
 
 		/* Transmit OK, or non-fatal error. Update the buffer
@@ -834,7 +865,16 @@  fec_enet_interrupt(int irq, void *dev_id)
 	return ret;
 }
 
-
+static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
+{
+	struct net_device *ndev = napi->dev;
+	int pkgs = fec_enet_rx(ndev, budget);
+	if (pkgs < budget) {
+		napi_complete(napi);
+		fec_enet_rx_int_is_enabled(ndev, true);
+	}
+	return pkgs;
+}
 
 /* ------------------------------------------------------------------------- */
 static void fec_get_mac(struct net_device *ndev)
@@ -1392,6 +1432,9 @@  fec_enet_open(struct net_device *ndev)
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	int ret;
 
+	if (fep->use_napi)
+		napi_enable(&fep->napi);
+
 	/* I should reset the ring buffers here, but I don't yet know
 	 * a simple way to do that.
 	 */
@@ -1604,6 +1647,11 @@  static int fec_enet_init(struct net_device *ndev)
 	ndev->netdev_ops = &fec_netdev_ops;
 	ndev->ethtool_ops = &fec_enet_ethtool_ops;
 
+	if (fep->use_napi) {
+		fec_enet_rx_int_is_enabled(ndev, false);
+		netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, fep->napi_weight);
+	}
+
 	/* Initialize the receive buffer descriptors. */
 	bdp = fep->rx_bd_base;
 	for (i = 0; i < RX_RING_SIZE; i++) {
@@ -1698,6 +1746,7 @@  fec_probe(struct platform_device *pdev)
 	static int dev_id;
 	struct pinctrl *pinctrl;
 	struct regulator *reg_phy;
+	struct device_node *np = pdev->dev.of_node;
 
 	of_id = of_match_device(fec_dt_ids, &pdev->dev);
 	if (of_id)
@@ -1811,6 +1860,11 @@  fec_probe(struct platform_device *pdev)
 		}
 	}
 
+	fep->use_napi = !of_property_read_bool(np, "disable_napi");
+
+	if (of_property_read_u32(np, "napi_weight", &fep->napi_weight))
+		fep->napi_weight = FEC_NAPI_WEIGHT; /*using default value*/
+
 	fec_reset_phy(pdev);
 
 	ret = fec_enet_init(ndev);
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 2ebedaf..31fcdd0 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -249,6 +249,10 @@  struct fec_enet_private {
 	int	bufdesc_ex;
 	int	pause_flag;
 
+	struct	napi_struct napi;
+	int	napi_weight;
+	bool	use_napi;
+
 	struct ptp_clock *ptp_clock;
 	struct ptp_clock_info ptp_caps;
 	unsigned long last_overflow_check;