Message ID | 1405504302-21344-1-git-send-email-sonic.adi@gmail.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
On Wed, 2014-07-16 at 17:51 +0800, Sonic Zhang wrote: > From: Sonic Zhang <sonic.zhang@analog.com> > > Ethernet RX DMA buffers are polled in NAPI work queue other than received > directly in DMA RX interrupt handler. > > Signed-off-by: Sonic Zhang <sonic.zhang@analog.com> > > --- > v2-changes: > - avoid test NAPI_STATE_NPSVC bit in net device driver > > v3-changes: > - use tabs while indenting the code > > v4-changes: > - unconditionally complete the NAPI poll and re-enable the MAC_RX IRQ > > v5-changes: > - should match open parenthesis > > Signed-off-by: Sonic Zhang <sonic.zhang@analog.com> > --- > drivers/net/ethernet/adi/bfin_mac.c | 80 +++++++++++++++++++++++-------------- > drivers/net/ethernet/adi/bfin_mac.h | 1 + > 2 files changed, 51 insertions(+), 30 deletions(-) > > diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c > index 7ae74d4..6dde102 100644 > --- a/drivers/net/ethernet/adi/bfin_mac.c > +++ b/drivers/net/ethernet/adi/bfin_mac.c > @@ -1218,11 +1218,14 @@ out: > #define RX_ERROR_MASK (RX_LONG | RX_ALIGN | RX_CRC | RX_LEN | \ > RX_FRAG | RX_ADDR | RX_DMAO | RX_PHY | RX_LATE | RX_RANGE) > > -static void bfin_mac_rx(struct net_device *dev) > +static void bfin_mac_rx(struct napi_struct *napi, int budget) budget seems unused in this function. > { > + struct bfin_mac_local *lp = container_of(napi, > + struct bfin_mac_local, > + napi); > + struct net_device *dev = lp->ndev; > struct sk_buff *skb, *new_skb; > unsigned short len; > - struct bfin_mac_local *lp __maybe_unused = netdev_priv(dev); > #if defined(BFIN_MAC_CSUM_OFFLOAD) > unsigned int i; > unsigned char fcs[ETH_FCS_LEN + 1]; > @@ -1256,7 +1259,7 @@ static void bfin_mac_rx(struct net_device *dev) > current_rx_ptr->skb = new_skb; > current_rx_ptr->desc_a.start_addr = (unsigned long)new_skb->data - 2; > > - len = (unsigned short)((current_rx_ptr->status.status_word) & RX_FRLEN); > + len = (unsigned short)(current_rx_ptr->status.status_word & RX_FRLEN); > /* Deduce Ethernet FCS length from Ethernet payload length */ > len -= ETH_FCS_LEN; > skb_put(skb, len); > @@ -1294,7 +1297,8 @@ static void bfin_mac_rx(struct net_device *dev) > } > #endif > > - netif_rx(skb); > + napi_gro_receive(&lp->napi, skb); > + > dev->stats.rx_packets++; > dev->stats.rx_bytes += len; > out: > @@ -1302,41 +1306,51 @@ out: > current_rx_ptr = current_rx_ptr->next; > } > > +static int bfin_mac_poll(struct napi_struct *napi, int budget) > +{ > + int i = 0; > + unsigned long flags; > + > + while (current_rx_ptr->status.status_word != 0 && i < budget) { > + bfin_mac_rx(napi, budget); > + i++; > + } > + > + if (i < budget) { > + napi_gro_flush(napi, false); > + local_irq_save(flags); > + __napi_complete(napi); > + local_irq_restore(flags); > + enable_irq(IRQ_MAC_RX); I have no idea why you open-code napi_complete(); Why cant you just copy/paste what other drivers do here ? if (i < budget) { napi_complete(napi); enable_irq(IRQ_MAC_RX); } ... > > @@ -1689,6 +1704,8 @@ static int bfin_mac_probe(struct platform_device *pdev) > lp->tx_reclaim_timer.data = (unsigned long)lp; > lp->tx_reclaim_timer.function = tx_reclaim_skb_timeout; > > + netif_napi_add(ndev, &lp->napi, bfin_mac_poll, CONFIG_BFIN_RX_DESC_NUM); Have you checked kernel log ? You should have hit : if (weight > NAPI_POLL_WEIGHT) pr_err_once("netif_napi_add() called with weight %d on device %s\n", weight, dev->name); -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Eric Dumazet <eric.dumazet@gmail.com> Date: Wed, 16 Jul 2014 15:17:29 +0200 > On Wed, 2014-07-16 at 17:51 +0800, Sonic Zhang wrote: >> From: Sonic Zhang <sonic.zhang@analog.com> >> + if (i < budget) { >> + napi_gro_flush(napi, false); >> + local_irq_save(flags); >> + __napi_complete(napi); >> + local_irq_restore(flags); >> + enable_irq(IRQ_MAC_RX); > > I have no idea why you open-code napi_complete(); > > Why cant you just copy/paste what other drivers do here ? > > if (i < budget) { > napi_complete(napi); > enable_irq(IRQ_MAC_RX); > } Generally speaking, I am very frustrated at how this bfin_mac NAPI conversion tries to do things in special ways. Please mimick what other drivers do rather than trying to do things in a special way unique to your driver. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Eric, On Wed, Jul 16, 2014 at 9:17 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > On Wed, 2014-07-16 at 17:51 +0800, Sonic Zhang wrote: >> From: Sonic Zhang <sonic.zhang@analog.com> >> >> Ethernet RX DMA buffers are polled in NAPI work queue other than received >> directly in DMA RX interrupt handler. >> >> Signed-off-by: Sonic Zhang <sonic.zhang@analog.com> >> >> --- >> v2-changes: >> - avoid test NAPI_STATE_NPSVC bit in net device driver >> >> v3-changes: >> - use tabs while indenting the code >> >> v4-changes: >> - unconditionally complete the NAPI poll and re-enable the MAC_RX IRQ >> >> v5-changes: >> - should match open parenthesis >> >> Signed-off-by: Sonic Zhang <sonic.zhang@analog.com> >> --- >> drivers/net/ethernet/adi/bfin_mac.c | 80 +++++++++++++++++++++++-------------- >> drivers/net/ethernet/adi/bfin_mac.h | 1 + >> 2 files changed, 51 insertions(+), 30 deletions(-) >> >> diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c >> index 7ae74d4..6dde102 100644 >> --- a/drivers/net/ethernet/adi/bfin_mac.c >> +++ b/drivers/net/ethernet/adi/bfin_mac.c >> @@ -1218,11 +1218,14 @@ out: >> #define RX_ERROR_MASK (RX_LONG | RX_ALIGN | RX_CRC | RX_LEN | \ >> RX_FRAG | RX_ADDR | RX_DMAO | RX_PHY | RX_LATE | RX_RANGE) >> >> -static void bfin_mac_rx(struct net_device *dev) >> +static void bfin_mac_rx(struct napi_struct *napi, int budget) > > > budget seems unused in this function. > >> { >> + struct bfin_mac_local *lp = container_of(napi, >> + struct bfin_mac_local, >> + napi); >> + struct net_device *dev = lp->ndev; >> struct sk_buff *skb, *new_skb; >> unsigned short len; >> - struct bfin_mac_local *lp __maybe_unused = netdev_priv(dev); >> #if defined(BFIN_MAC_CSUM_OFFLOAD) >> unsigned int i; >> unsigned char fcs[ETH_FCS_LEN + 1]; >> @@ -1256,7 +1259,7 @@ static void bfin_mac_rx(struct net_device *dev) >> current_rx_ptr->skb = new_skb; >> current_rx_ptr->desc_a.start_addr = (unsigned long)new_skb->data - 2; >> >> - len = (unsigned short)((current_rx_ptr->status.status_word) & RX_FRLEN); >> + len = (unsigned short)(current_rx_ptr->status.status_word & RX_FRLEN); >> /* Deduce Ethernet FCS length from Ethernet payload length */ >> len -= ETH_FCS_LEN; >> skb_put(skb, len); >> @@ -1294,7 +1297,8 @@ static void bfin_mac_rx(struct net_device *dev) >> } >> #endif >> >> - netif_rx(skb); >> + napi_gro_receive(&lp->napi, skb); >> + >> dev->stats.rx_packets++; >> dev->stats.rx_bytes += len; >> out: >> @@ -1302,41 +1306,51 @@ out: >> current_rx_ptr = current_rx_ptr->next; >> } >> >> +static int bfin_mac_poll(struct napi_struct *napi, int budget) >> +{ >> + int i = 0; >> + unsigned long flags; >> + >> + while (current_rx_ptr->status.status_word != 0 && i < budget) { >> + bfin_mac_rx(napi, budget); >> + i++; >> + } >> + >> + if (i < budget) { >> + napi_gro_flush(napi, false); >> + local_irq_save(flags); >> + __napi_complete(napi); >> + local_irq_restore(flags); >> + enable_irq(IRQ_MAC_RX); > > I have no idea why you open-code napi_complete(); > In the case of netpoll_poll_dev() is invoked repeatedly from an exception handler, the NAPI poll() callback is always called with bit NAPI_STATE_NPSVC set in poll_one_napi(). But, when bit NAPI_STATE_NPSVC is set, napi_complete() doesn't deque and clear the NAPI_STATE_SCHED. The MAC_RX interrupt is never disabled in net poll loop when bit NAPI_STATE_SCHED is set. So, the call to enable_irq() in next NAPI poll() callback triggers the warning "Unbalanced enable for IRQ %d\n". The other Ethernet drivers don't call enable_irq() after napi_complete(). They disable the interrupt in their device controller. But, bfin mac has no such capability and has to disable the MAC_RX interrupt by enable_irq(). > Why cant you just copy/paste what other drivers do here ? > > if (i < budget) { > napi_complete(napi); > enable_irq(IRQ_MAC_RX); > } > I checked other other drivers in driver/net/ethernet/ In realtek/8139cp.c ------------------------------------------------ napi_gro_flush(napi, false); spin_lock_irqsave(&cp->lock, flags); __napi_complete(napi); cpw16_f(IntrMask, cp_intr_mask); spin_unlock_irqrestore(&cp->lock, flags); ------------------------------------------------ The same as my code. In intel/e1000/e1000_main.c ------------------------------------------------ napi_complete(napi); if (!test_bit(__E1000_DOWN, &adapter->flags)) e1000_irq_enable(adapter); static void e1000_irq_enable(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; ew32(IMS, IMS_ENABLE_MASK); E1000_WRITE_FLUSH(); } ------------------------------------------------- e1000_irq_enable() other than the system enable_irq() is called after napi_complete(). e1000_irq_enable() writes the register in e1000 ethernet controller directly. Even if it is invoked more timers than the e1000_irq_disable() is in the net_poll(), no unbalanced IRQ enabling warning is printed in kernel. > ... > >> >> @@ -1689,6 +1704,8 @@ static int bfin_mac_probe(struct platform_device *pdev) >> lp->tx_reclaim_timer.data = (unsigned long)lp; >> lp->tx_reclaim_timer.function = tx_reclaim_skb_timeout; >> >> + netif_napi_add(ndev, &lp->napi, bfin_mac_poll, CONFIG_BFIN_RX_DESC_NUM); > > > Have you checked kernel log ? > > You should have hit : > > if (weight > NAPI_POLL_WEIGHT) > pr_err_once("netif_napi_add() called with weight %d on device %s\n", > weight, dev->name); > > The default value of CONFIG_BFIN_RX_DESC_NUM is 20 while NAPI_POLL_WEIGHT is 64. I don't see this checking is hit. I can set the range of CONFIG_BFIN_RX_DESC_NUM to be 20 ~ 64 in Kconfig as well. Regards, Sonic -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi David, On Thu, Jul 17, 2014 at 5:03 AM, David Miller <davem@davemloft.net> wrote: > From: Eric Dumazet <eric.dumazet@gmail.com> > Date: Wed, 16 Jul 2014 15:17:29 +0200 > >> On Wed, 2014-07-16 at 17:51 +0800, Sonic Zhang wrote: >>> From: Sonic Zhang <sonic.zhang@analog.com> >>> + if (i < budget) { >>> + napi_gro_flush(napi, false); >>> + local_irq_save(flags); >>> + __napi_complete(napi); >>> + local_irq_restore(flags); >>> + enable_irq(IRQ_MAC_RX); >> >> I have no idea why you open-code napi_complete(); >> >> Why cant you just copy/paste what other drivers do here ? >> >> if (i < budget) { >> napi_complete(napi); >> enable_irq(IRQ_MAC_RX); >> } > > Generally speaking, I am very frustrated at how this bfin_mac NAPI > conversion tries to do things in special ways. > > Please mimick what other drivers do rather than trying to do things in > a special way unique to your driver. Driver realtek/8139cp.c does things in the same way as the bfin_mac NAPI code. Bfin EMAC device has no capability to disable the MAC_RX interrupt in its EMAC register directly. The bfin_mac driver has to do it in its system interrupt controller via the kernel API enable_irq()/disable_irq_nosync(). In net_poll loop, if napi_complete() is called, the EMAC IRQ is enabled more times than the it is disabled. This is not a problem to drivers which don't call enable_irq()/disable_irq_nosync(). But, the IRQ enabling and disabling operations have to be balanced in the bfin_mac driver via __napi_complete() to avoid the kernel warning. Regards, Sonic Zhang -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Sonic Zhang <sonic.adi@gmail.com> : > On Thu, Jul 17, 2014 at 5:03 AM, David Miller <davem@davemloft.net> wrote: [...] > > Please mimick what other drivers do rather than trying to do things in > > a special way unique to your driver. > > Driver realtek/8139cp.c does things in the same way as the bfin_mac > NAPI code. It wasn't your lucky day. > In net_poll loop, if napi_complete() is called, the EMAC IRQ is > enabled more times than the it is disabled. This is not a problem to > drivers which don't call enable_irq()/disable_irq_nosync(). But, the > IRQ enabling and disabling operations have to be balanced in the > bfin_mac driver via __napi_complete() to avoid the kernel warning. Thanks for the explanation. In order to use plain napi_complete, may I suggest keeping a mask of bfin_mac_interrupt disabled irq in the device private area then enable_irq relevant one - if any - in bfin_mac_poll ? It should keep things balanced.
Hi Francois, On Fri, Jul 18, 2014 at 6:29 AM, Francois Romieu <romieu@fr.zoreil.com> wrote: > Sonic Zhang <sonic.adi@gmail.com> : >> On Thu, Jul 17, 2014 at 5:03 AM, David Miller <davem@davemloft.net> wrote: > [...] >> > Please mimick what other drivers do rather than trying to do things in >> > a special way unique to your driver. >> >> Driver realtek/8139cp.c does things in the same way as the bfin_mac >> NAPI code. > > It wasn't your lucky day. > >> In net_poll loop, if napi_complete() is called, the EMAC IRQ is >> enabled more times than the it is disabled. This is not a problem to >> drivers which don't call enable_irq()/disable_irq_nosync(). But, the >> IRQ enabling and disabling operations have to be balanced in the >> bfin_mac driver via __napi_complete() to avoid the kernel warning. > > Thanks for the explanation. > > In order to use plain napi_complete, may I suggest keeping a mask of > bfin_mac_interrupt disabled irq in the device private area then > enable_irq relevant one - if any - in bfin_mac_poll ? > > It should keep things balanced. This is fine to me. Thanks, Sonic Zhang -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index 7ae74d4..6dde102 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -1218,11 +1218,14 @@ out: #define RX_ERROR_MASK (RX_LONG | RX_ALIGN | RX_CRC | RX_LEN | \ RX_FRAG | RX_ADDR | RX_DMAO | RX_PHY | RX_LATE | RX_RANGE) -static void bfin_mac_rx(struct net_device *dev) +static void bfin_mac_rx(struct napi_struct *napi, int budget) { + struct bfin_mac_local *lp = container_of(napi, + struct bfin_mac_local, + napi); + struct net_device *dev = lp->ndev; struct sk_buff *skb, *new_skb; unsigned short len; - struct bfin_mac_local *lp __maybe_unused = netdev_priv(dev); #if defined(BFIN_MAC_CSUM_OFFLOAD) unsigned int i; unsigned char fcs[ETH_FCS_LEN + 1]; @@ -1256,7 +1259,7 @@ static void bfin_mac_rx(struct net_device *dev) current_rx_ptr->skb = new_skb; current_rx_ptr->desc_a.start_addr = (unsigned long)new_skb->data - 2; - len = (unsigned short)((current_rx_ptr->status.status_word) & RX_FRLEN); + len = (unsigned short)(current_rx_ptr->status.status_word & RX_FRLEN); /* Deduce Ethernet FCS length from Ethernet payload length */ len -= ETH_FCS_LEN; skb_put(skb, len); @@ -1294,7 +1297,8 @@ static void bfin_mac_rx(struct net_device *dev) } #endif - netif_rx(skb); + napi_gro_receive(&lp->napi, skb); + dev->stats.rx_packets++; dev->stats.rx_bytes += len; out: @@ -1302,41 +1306,51 @@ out: current_rx_ptr = current_rx_ptr->next; } +static int bfin_mac_poll(struct napi_struct *napi, int budget) +{ + int i = 0; + unsigned long flags; + + while (current_rx_ptr->status.status_word != 0 && i < budget) { + bfin_mac_rx(napi, budget); + i++; + } + + if (i < budget) { + napi_gro_flush(napi, false); + local_irq_save(flags); + __napi_complete(napi); + local_irq_restore(flags); + enable_irq(IRQ_MAC_RX); + } + + return i; +} + /* interrupt routine to handle rx and error signal */ static irqreturn_t bfin_mac_interrupt(int irq, void *dev_id) { - struct net_device *dev = dev_id; - int number = 0; - -get_one_packet: - if (current_rx_ptr->status.status_word == 0) { - /* no more new packet received */ - if (number == 0) { - if (current_rx_ptr->next->status.status_word != 0) { - current_rx_ptr = current_rx_ptr->next; - goto real_rx; - } - } - bfin_write_DMA1_IRQ_STATUS(bfin_read_DMA1_IRQ_STATUS() | - DMA_DONE | DMA_ERR); - return IRQ_HANDLED; + struct bfin_mac_local *lp = netdev_priv(dev_id); + u32 status; + + status = bfin_read_DMA1_IRQ_STATUS(); + + bfin_write_DMA1_IRQ_STATUS(status | DMA_DONE | DMA_ERR); + if ((status & DMA_DONE) && napi_schedule_prep(&lp->napi)) { + disable_irq_nosync(IRQ_MAC_RX); + __napi_schedule(&lp->napi); } -real_rx: - bfin_mac_rx(dev); - number++; - goto get_one_packet; + return IRQ_HANDLED; } #ifdef CONFIG_NET_POLL_CONTROLLER -static void bfin_mac_poll(struct net_device *dev) +static void bfin_mac_poll_controller(struct net_device *dev) { struct bfin_mac_local *lp = netdev_priv(dev); - disable_irq(IRQ_MAC_RX); bfin_mac_interrupt(IRQ_MAC_RX, dev); tx_reclaim_skb(lp); - enable_irq(IRQ_MAC_RX); } #endif /* CONFIG_NET_POLL_CONTROLLER */ @@ -1428,14 +1442,13 @@ static void bfin_mac_timeout(struct net_device *dev) tx_list_head = tx_list_head->next; } - if (netif_queue_stopped(lp->ndev)) - netif_wake_queue(lp->ndev); + if (netif_queue_stopped(dev)) + netif_wake_queue(dev); bfin_mac_enable(lp->phydev); /* We can accept TX packets again */ dev->trans_start = jiffies; /* prevent tx timeout */ - netif_wake_queue(dev); } static void bfin_mac_multicast_hash(struct net_device *dev) @@ -1562,6 +1575,7 @@ static int bfin_mac_open(struct net_device *dev) return ret; pr_debug("hardware init finished\n"); + napi_enable(&lp->napi); netif_start_queue(dev); netif_carrier_on(dev); @@ -1579,6 +1593,7 @@ static int bfin_mac_close(struct net_device *dev) pr_debug("%s: %s\n", dev->name, __func__); netif_stop_queue(dev); + napi_disable(&lp->napi); netif_carrier_off(dev); phy_stop(lp->phydev); @@ -1604,7 +1619,7 @@ static const struct net_device_ops bfin_mac_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = bfin_mac_poll, + .ndo_poll_controller = bfin_mac_poll_controller, #endif }; @@ -1689,6 +1704,8 @@ static int bfin_mac_probe(struct platform_device *pdev) lp->tx_reclaim_timer.data = (unsigned long)lp; lp->tx_reclaim_timer.function = tx_reclaim_skb_timeout; + netif_napi_add(ndev, &lp->napi, bfin_mac_poll, CONFIG_BFIN_RX_DESC_NUM); + spin_lock_init(&lp->lock); /* now, enable interrupts */ @@ -1723,6 +1740,7 @@ out_err_phc: out_err_reg_ndev: free_irq(IRQ_MAC_RX, ndev); out_err_request_irq: + netif_napi_del(&lp->napi); out_err_mii_probe: mdiobus_unregister(lp->mii_bus); mdiobus_free(lp->mii_bus); @@ -1743,6 +1761,8 @@ static int bfin_mac_remove(struct platform_device *pdev) unregister_netdev(ndev); + netif_napi_del(&lp->napi); + free_irq(IRQ_MAC_RX, ndev); free_netdev(ndev); diff --git a/drivers/net/ethernet/adi/bfin_mac.h b/drivers/net/ethernet/adi/bfin_mac.h index 6dec86a..3523f0e 100644 --- a/drivers/net/ethernet/adi/bfin_mac.h +++ b/drivers/net/ethernet/adi/bfin_mac.h @@ -80,6 +80,7 @@ struct bfin_mac_local { int irq_wake_requested; struct timer_list tx_reclaim_timer; struct net_device *ndev; + struct napi_struct napi; /* Data for EMAC_VLAN1 regs */ u16 vlan1_mask, vlan2_mask;