diff mbox

[v4,2/9] bgmac: leave interrupts disabled as long as there is work to do

Message ID 1428933162-26763-2-git-send-email-nbd@openwrt.org
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Felix Fietkau April 13, 2015, 1:52 p.m. UTC
Always poll rx and tx during NAPI poll instead of relying on the status
of the first interrupt. This prevents bgmac_poll from leaving unfinished
work around until the next IRQ.
In my tests this makes bridging/routing throughput under heavy load more
stable and ensures that no new IRQs arrive as long as bgmac_poll uses up
the entire budget.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
---
 drivers/net/ethernet/broadcom/bgmac.c | 31 ++++++++++---------------------
 drivers/net/ethernet/broadcom/bgmac.h |  1 -
 2 files changed, 10 insertions(+), 22 deletions(-)

Comments

Rafał Miłecki April 13, 2015, 2:34 p.m. UTC | #1
On 13 April 2015 at 15:52, Felix Fietkau <nbd@openwrt.org> wrote:
> Always poll rx and tx during NAPI poll instead of relying on the status
> of the first interrupt. This prevents bgmac_poll from leaving unfinished
> work around until the next IRQ.
> In my tests this makes bridging/routing throughput under heavy load more
> stable and ensures that no new IRQs arrive as long as bgmac_poll uses up
> the entire budget.

What do you think about keeping u32 int_status; and just updating it
at the end of bgmac_poll? In case you decide to implement multiple TX
queues, it may be cheaper to just check a single bit in memory instead
reading DMA ring status.


> @@ -1221,14 +1219,13 @@ static irqreturn_t bgmac_interrupt(int irq, void *dev_id)
>         if (!int_status)
>                 return IRQ_NONE;
>
> -       /* Ack */
> -       bgmac_write(bgmac, BGMAC_INT_STATUS, int_status);
> +       int_status &= ~(BGMAC_IS_TX0 | BGMAC_IS_RX);
> +       if (int_status)
> +               bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", int_status);
>
>         /* Disable new interrupts until handling existing ones */
>         bgmac_chip_intrs_off(bgmac);
>
> -       bgmac->int_status = int_status;
> -
>         napi_schedule(&bgmac->napi);
>
>         return IRQ_HANDLED;
> @@ -1237,25 +1234,17 @@ static irqreturn_t bgmac_interrupt(int irq, void *dev_id)
>  static int bgmac_poll(struct napi_struct *napi, int weight)
>  {
>         struct bgmac *bgmac = container_of(napi, struct bgmac, napi);
> -       struct bgmac_dma_ring *ring;
>         int handled = 0;
>
> -       if (bgmac->int_status & BGMAC_IS_TX0) {
> -               ring = &bgmac->tx_ring[0];
> -               bgmac_dma_tx_free(bgmac, ring);
> -               bgmac->int_status &= ~BGMAC_IS_TX0;
> -       }
> +       /* Ack */
> +       bgmac_write(bgmac, BGMAC_INT_STATUS, ~0);

Is this OK to ack every IRQ, even un handled ones?


> +       /* poll again if more events arrived in the mean time */
> +       if (bgmac_read(bgmac, BGMAC_INT_STATUS) & (BGMAC_IS_TX0 | BGMAC_IS_RX))
> +               return handled;

s/mean time/meantime/ (or meanwhile)
And if you care to keep one type of comments:
s/poll/Poll/
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Felix Fietkau April 13, 2015, 3:03 p.m. UTC | #2
On 2015-04-13 16:34, Rafał Miłecki wrote:
> On 13 April 2015 at 15:52, Felix Fietkau <nbd@openwrt.org> wrote:
>> Always poll rx and tx during NAPI poll instead of relying on the status
>> of the first interrupt. This prevents bgmac_poll from leaving unfinished
>> work around until the next IRQ.
>> In my tests this makes bridging/routing throughput under heavy load more
>> stable and ensures that no new IRQs arrive as long as bgmac_poll uses up
>> the entire budget.
> 
> What do you think about keeping u32 int_status; and just updating it
> at the end of bgmac_poll? In case you decide to implement multiple TX
> queues, it may be cheaper to just check a single bit in memory instead
> reading DMA ring status.
Events might arrive in the mean time. I ran some tests, and not checking
the irq status for processing rx/tx gave me fewer total IRQs under load.

>> @@ -1237,25 +1234,17 @@ static irqreturn_t bgmac_interrupt(int irq, void *dev_id)
>>  static int bgmac_poll(struct napi_struct *napi, int weight)
>>  {
>>         struct bgmac *bgmac = container_of(napi, struct bgmac, napi);
>> -       struct bgmac_dma_ring *ring;
>>         int handled = 0;
>>
>> -       if (bgmac->int_status & BGMAC_IS_TX0) {
>> -               ring = &bgmac->tx_ring[0];
>> -               bgmac_dma_tx_free(bgmac, ring);
>> -               bgmac->int_status &= ~BGMAC_IS_TX0;
>> -       }
>> +       /* Ack */
>> +       bgmac_write(bgmac, BGMAC_INT_STATUS, ~0);
> 
> Is this OK to ack every IRQ, even un handled ones?
Yes. The only IRQ types that matter are the ones handled by the poll
function.

>> +       /* poll again if more events arrived in the mean time */
>> +       if (bgmac_read(bgmac, BGMAC_INT_STATUS) & (BGMAC_IS_TX0 | BGMAC_IS_RX))
>> +               return handled;
> 
> s/mean time/meantime/ (or meanwhile)
> And if you care to keep one type of comments:
> s/poll/Poll/
Will do.

- Felix
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rafał Miłecki April 13, 2015, 3:06 p.m. UTC | #3
On 13 April 2015 at 17:03, Felix Fietkau <nbd@openwrt.org> wrote:
> On 2015-04-13 16:34, Rafał Miłecki wrote:
>> On 13 April 2015 at 15:52, Felix Fietkau <nbd@openwrt.org> wrote:
>>> Always poll rx and tx during NAPI poll instead of relying on the status
>>> of the first interrupt. This prevents bgmac_poll from leaving unfinished
>>> work around until the next IRQ.
>>> In my tests this makes bridging/routing throughput under heavy load more
>>> stable and ensures that no new IRQs arrive as long as bgmac_poll uses up
>>> the entire budget.
>>
>> What do you think about keeping u32 int_status; and just updating it
>> at the end of bgmac_poll? In case you decide to implement multiple TX
>> queues, it may be cheaper to just check a single bit in memory instead
>> reading DMA ring status.
> Events might arrive in the mean time. I ran some tests, and not checking
> the irq status for processing rx/tx gave me fewer total IRQs under load.

But you do check the status, I mean the following line:
if (bgmac_read(bgmac, BGMAC_INT_STATUS) & (BGMAC_IS_TX0 | BGMAC_IS_RX))

Would it make sense to do
bgmac->int_status = bgmac_read(bgmac, BGMAC_INT_STATUS);
if (bgmac->int_status)
instead?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Felix Fietkau April 13, 2015, 3:44 p.m. UTC | #4
On 2015-04-13 17:06, Rafał Miłecki wrote:
> On 13 April 2015 at 17:03, Felix Fietkau <nbd@openwrt.org> wrote:
>> On 2015-04-13 16:34, Rafał Miłecki wrote:
>>> On 13 April 2015 at 15:52, Felix Fietkau <nbd@openwrt.org> wrote:
>>>> Always poll rx and tx during NAPI poll instead of relying on the status
>>>> of the first interrupt. This prevents bgmac_poll from leaving unfinished
>>>> work around until the next IRQ.
>>>> In my tests this makes bridging/routing throughput under heavy load more
>>>> stable and ensures that no new IRQs arrive as long as bgmac_poll uses up
>>>> the entire budget.
>>>
>>> What do you think about keeping u32 int_status; and just updating it
>>> at the end of bgmac_poll? In case you decide to implement multiple TX
>>> queues, it may be cheaper to just check a single bit in memory instead
>>> reading DMA ring status.
>> Events might arrive in the mean time. I ran some tests, and not checking
>> the irq status for processing rx/tx gave me fewer total IRQs under load.
> 
> But you do check the status, I mean the following line:
> if (bgmac_read(bgmac, BGMAC_INT_STATUS) & (BGMAC_IS_TX0 | BGMAC_IS_RX))
> 
> Would it make sense to do
> bgmac->int_status = bgmac_read(bgmac, BGMAC_INT_STATUS);
I don't really see the point in storing the status from the initial IRQ.

The check there is only to decide whether the poll function should run
at all. By the time bgmac_poll is called, more events may have arrived
already, making the initial status useless.

- Felix
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rafał Miłecki April 13, 2015, 4:35 p.m. UTC | #5
On 13 April 2015 at 17:44, Felix Fietkau <nbd@openwrt.org> wrote:
> On 2015-04-13 17:06, Rafał Miłecki wrote:
>> On 13 April 2015 at 17:03, Felix Fietkau <nbd@openwrt.org> wrote:
>>> On 2015-04-13 16:34, Rafał Miłecki wrote:
>>>> On 13 April 2015 at 15:52, Felix Fietkau <nbd@openwrt.org> wrote:
>>>>> Always poll rx and tx during NAPI poll instead of relying on the status
>>>>> of the first interrupt. This prevents bgmac_poll from leaving unfinished
>>>>> work around until the next IRQ.
>>>>> In my tests this makes bridging/routing throughput under heavy load more
>>>>> stable and ensures that no new IRQs arrive as long as bgmac_poll uses up
>>>>> the entire budget.
>>>>
>>>> What do you think about keeping u32 int_status; and just updating it
>>>> at the end of bgmac_poll? In case you decide to implement multiple TX
>>>> queues, it may be cheaper to just check a single bit in memory instead
>>>> reading DMA ring status.
>>> Events might arrive in the mean time. I ran some tests, and not checking
>>> the irq status for processing rx/tx gave me fewer total IRQs under load.
>>
>> But you do check the status, I mean the following line:
>> if (bgmac_read(bgmac, BGMAC_INT_STATUS) & (BGMAC_IS_TX0 | BGMAC_IS_RX))
>>
>> Would it make sense to do
>> bgmac->int_status = bgmac_read(bgmac, BGMAC_INT_STATUS);
> I don't really see the point in storing the status from the initial IRQ.
>
> The check there is only to decide whether the poll function should run
> at all. By the time bgmac_poll is called, more events may have arrived
> already, making the initial status useless.

Ah, I didn't think about delay between bgmac_poll calls.

All my point was to not call bgmac_dma_tx_free or bgmac_dma_rx_read
when not needed. Imagine having 4 TX queues and 1 RX queue. You will
read 4 BGMAC_DMA_TX_STATUS registers and 1 BGMAC_DMA_RX_STATUS
registers plus do some calculations every time. By reading
BGMAC_INT_STATUS (and checking its bits) you could avoid this. In
theory a very tiny optimization, no idea if worth implement, I'll
leave it up to you.
diff mbox

Patch

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 73374b4..66876c5 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1105,8 +1105,6 @@  static void bgmac_chip_reset(struct bgmac *bgmac)
 	bgmac_phy_init(bgmac);
 
 	netdev_reset_queue(bgmac->net_dev);
-
-	bgmac->int_status = 0;
 }
 
 static void bgmac_chip_intrs_on(struct bgmac *bgmac)
@@ -1221,14 +1219,13 @@  static irqreturn_t bgmac_interrupt(int irq, void *dev_id)
 	if (!int_status)
 		return IRQ_NONE;
 
-	/* Ack */
-	bgmac_write(bgmac, BGMAC_INT_STATUS, int_status);
+	int_status &= ~(BGMAC_IS_TX0 | BGMAC_IS_RX);
+	if (int_status)
+		bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", int_status);
 
 	/* Disable new interrupts until handling existing ones */
 	bgmac_chip_intrs_off(bgmac);
 
-	bgmac->int_status = int_status;
-
 	napi_schedule(&bgmac->napi);
 
 	return IRQ_HANDLED;
@@ -1237,25 +1234,17 @@  static irqreturn_t bgmac_interrupt(int irq, void *dev_id)
 static int bgmac_poll(struct napi_struct *napi, int weight)
 {
 	struct bgmac *bgmac = container_of(napi, struct bgmac, napi);
-	struct bgmac_dma_ring *ring;
 	int handled = 0;
 
-	if (bgmac->int_status & BGMAC_IS_TX0) {
-		ring = &bgmac->tx_ring[0];
-		bgmac_dma_tx_free(bgmac, ring);
-		bgmac->int_status &= ~BGMAC_IS_TX0;
-	}
+	/* Ack */
+	bgmac_write(bgmac, BGMAC_INT_STATUS, ~0);
 
-	if (bgmac->int_status & BGMAC_IS_RX) {
-		ring = &bgmac->rx_ring[0];
-		handled += bgmac_dma_rx_read(bgmac, ring, weight);
-		bgmac->int_status &= ~BGMAC_IS_RX;
-	}
+	bgmac_dma_tx_free(bgmac, &bgmac->tx_ring[0]);
+	handled += bgmac_dma_rx_read(bgmac, &bgmac->rx_ring[0], weight);
 
-	if (bgmac->int_status) {
-		bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", bgmac->int_status);
-		bgmac->int_status = 0;
-	}
+	/* poll again if more events arrived in the mean time */
+	if (bgmac_read(bgmac, BGMAC_INT_STATUS) & (BGMAC_IS_TX0 | BGMAC_IS_RX))
+		return handled;
 
 	if (handled < weight) {
 		napi_complete(napi);
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 5a198d5..abd50d1 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -452,7 +452,6 @@  struct bgmac {
 
 	/* Int */
 	u32 int_mask;
-	u32 int_status;
 
 	/* Current MAC state */
 	int mac_speed;