diff mbox

[2/2] net: mv643xx_eth: Fix highmem support in non-TSO egress path

Message ID 1421844850-30886-3-git-send-email-ezequiel.garcia@free-electrons.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Ezequiel Garcia Jan. 21, 2015, 12:54 p.m. UTC
Commit 69ad0dd7af22b61d9e0e68e56b6290121618b0fb
Author: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date:   Mon May 19 13:59:59 2014 -0300

    net: mv643xx_eth: Use dma_map_single() to map the skb fragments

caused a nasty regression by removing the support for highmem skb
fragments. By using page_address() to get the address of a fragment's
page, we are assuming a lowmem page. However, such assumption is incorrect,
as fragments can be in highmem pages, resulting in very nasty issues.

This commit fixes this by using the skb_frag_dma_map() helper,
which takes care of mapping the skb fragment properly.

Fixes: 69ad0dd7af22 ("net: mv643xx_eth: Use dma_map_single() to map the skb fragments")
Reported-by: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

Comments

Russell King - ARM Linux Jan. 21, 2015, 5:40 p.m. UTC | #1
On Wed, Jan 21, 2015 at 09:54:10AM -0300, Ezequiel Garcia wrote:
> Commit 69ad0dd7af22b61d9e0e68e56b6290121618b0fb
> Author: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
> Date:   Mon May 19 13:59:59 2014 -0300
> 
>     net: mv643xx_eth: Use dma_map_single() to map the skb fragments
> 
> caused a nasty regression by removing the support for highmem skb
> fragments. By using page_address() to get the address of a fragment's
> page, we are assuming a lowmem page. However, such assumption is incorrect,
> as fragments can be in highmem pages, resulting in very nasty issues.
> 
> This commit fixes this by using the skb_frag_dma_map() helper,
> which takes care of mapping the skb fragment properly.

This seems fine, so:

> Fixes: 69ad0dd7af22 ("net: mv643xx_eth: Use dma_map_single() to map the skb fragments")
> Reported-by: Russell King <linux@arm.linux.org.uk>

Reported-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Russell King <rmk+kernel@arm.linux.org.uk>

Thanks.

> Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
> ---
>  drivers/net/ethernet/marvell/mv643xx_eth.c | 26 +++++++++++++++++++-------
>  1 file changed, 19 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
> index a62fc38..0c77f0e 100644
> --- a/drivers/net/ethernet/marvell/mv643xx_eth.c
> +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
> @@ -879,10 +879,8 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb)
>  		skb_frag_t *this_frag;
>  		int tx_index;
>  		struct tx_desc *desc;
> -		void *addr;
>  
>  		this_frag = &skb_shinfo(skb)->frags[frag];
> -		addr = page_address(this_frag->page.p) + this_frag->page_offset;
>  		tx_index = txq->tx_curr_desc++;
>  		if (txq->tx_curr_desc == txq->tx_ring_size)
>  			txq->tx_curr_desc = 0;
> @@ -902,8 +900,9 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb)
>  
>  		desc->l4i_chk = 0;
>  		desc->byte_cnt = skb_frag_size(this_frag);
> -		desc->buf_ptr = dma_map_single(mp->dev->dev.parent, addr,
> -					       desc->byte_cnt, DMA_TO_DEVICE);
> +		desc->buf_ptr = skb_frag_dma_map(mp->dev->dev.parent,
> +						 this_frag, 0, desc->byte_cnt,
> +						 DMA_TO_DEVICE);
>  	}
>  }
>  
> @@ -1065,9 +1064,22 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force)
>  		reclaimed++;
>  		txq->tx_desc_count--;
>  
> -		if (!IS_TSO_HEADER(txq, desc->buf_ptr))
> -			dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr,
> -					 desc->byte_cnt, DMA_TO_DEVICE);
> +		if (!IS_TSO_HEADER(txq, desc->buf_ptr)) {
> +
> +			/* The first descriptor is either a TSO header or
> +			 * the linear part of the skb.
> +			 */
> +			if (desc->cmd_sts & TX_FIRST_DESC)
> +				dma_unmap_single(mp->dev->dev.parent,
> +						 desc->buf_ptr,
> +						 desc->byte_cnt,
> +						 DMA_TO_DEVICE);
> +			else
> +				dma_unmap_page(mp->dev->dev.parent,
> +					       desc->buf_ptr,
> +					       desc->byte_cnt,
> +					       DMA_TO_DEVICE);
> +		}
>  
>  		if (cmd_sts & TX_ENABLE_INTERRUPT) {
>  			struct sk_buff *skb = __skb_dequeue(&txq->tx_skb);
> -- 
> 2.2.1
>
Ezequiel Garcia Jan. 21, 2015, 11:34 p.m. UTC | #2
On 01/21/2015 02:40 PM, Russell King - ARM Linux wrote:
> On Wed, Jan 21, 2015 at 09:54:10AM -0300, Ezequiel Garcia wrote:
>> Commit 69ad0dd7af22b61d9e0e68e56b6290121618b0fb
>> Author: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
>> Date:   Mon May 19 13:59:59 2014 -0300
>>
>>     net: mv643xx_eth: Use dma_map_single() to map the skb fragments
>>
>> caused a nasty regression by removing the support for highmem skb
>> fragments. By using page_address() to get the address of a fragment's
>> page, we are assuming a lowmem page. However, such assumption is incorrect,
>> as fragments can be in highmem pages, resulting in very nasty issues.
>>
>> This commit fixes this by using the skb_frag_dma_map() helper,
>> which takes care of mapping the skb fragment properly.
> 
> This seems fine, so:
> 

I have just realised that the non-TSO and the TSO paths must work
simultaneously (we don't know which path an egress skb will take).

So, with these patches, the unmapping is done using dma_unmap_page() which
is only correct if the skb took the non-TSO paths. In other words,
these fixes are wrong (although I have no idea the effect of
using dma_unmap_page on a mapping done with dma_map_single).

And the problem is that in the TSO path, the linear and the non-linear
fragments use the same kind of descriptors, so we can't distinguish
them in the cleanup, and can't decide if _single or _page should be used.

Any ideas?

I guess we could keep track in some data structure of the type of mapping
on each descriptor. Or alternatively, avoid highmem fragments altogether
by mapping to a lowmem page.

I'll try to come up with some more patches following the first idea.

Sorry for the crappiness,
Russell King - ARM Linux Jan. 22, 2015, 12:11 a.m. UTC | #3
On Wed, Jan 21, 2015 at 08:34:30PM -0300, Ezequiel Garcia wrote:
> I have just realised that the non-TSO and the TSO paths must work
> simultaneously (we don't know which path an egress skb will take).
> 
> So, with these patches, the unmapping is done using dma_unmap_page() which
> is only correct if the skb took the non-TSO paths. In other words,
> these fixes are wrong (although I have no idea the effect of
> using dma_unmap_page on a mapping done with dma_map_single).
> 
> And the problem is that in the TSO path, the linear and the non-linear
> fragments use the same kind of descriptors, so we can't distinguish
> them in the cleanup, and can't decide if _single or _page should be used.
> 
> Any ideas?

Or, maybe, if davem would reply, we might come to the conclusion (as
I previously pointed out) that it's not a driver issue, but a netdev
core issue:

static netdev_features_t harmonize_features(struct sk_buff *skb,
        netdev_features_t features)
{
...
        if (skb->ip_summed != CHECKSUM_NONE &&
            !can_checksum_protocol(features, type)) {
                features &= ~NETIF_F_ALL_CSUM;
        } else if (illegal_highdma(skb->dev, skb)) {
                features &= ~NETIF_F_SG;
        }

The problem is when the first "if" is true (as is the case with IPv6 on
mv643xx_eth.c), we clear NETIF_F_ALL_CSUM, but leave NETIF_F_SG set.

Had that first if been false, we would've called illegal_highdma(), and
found that the skb contains some highmem fragments, but the device does
*not* have NETIF_F_HIGHDMA set, and so that second "if" would be true.
The result of that is NETIF_F_SG is cleared.

In this case, in validate_xmit_skb(), skb_needs_linearize() would be
false for a skb with fragments, causing the skb to be linearised.  I've
not completely traced the GSO path, but I'd assume that does something
similar (which I think skb_segment() handles.)

So, I'm wondering whether the above should be:

static netdev_features_t harmonize_features(struct sk_buff *skb,
        netdev_features_t features)
{
...
        if (skb->ip_summed != CHECKSUM_NONE &&
            !can_checksum_protocol(features, type)) {
                features &= ~NETIF_F_ALL_CSUM;
        }

        if (illegal_highdma(skb->dev, skb)) {
                features &= ~NETIF_F_SG;
        }

So that we get NETIF_F_SG turned off for all cases (irrespective of the
NETIF_F_ALL_CSUM test) if we see a skb with highmem and we the device
does not support highdma.

Yes, the code above hasn't changed in functionality for a long time, but
that doesn't mean it isn't buggy, and isn't the cause of our current bug.

However, it would be far better to have the drivers fixed for the sake
of performance - it's only this dma_map_page() thing that is the real
cause of the problem in these drivers.

Looking at TSO, it seems madness that it doesn't support highmem:

void tso_start(struct sk_buff *skb, struct tso_t *tso)
{
...
        tso->data = skb->data + hdr_len;
...
                tso->data = page_address(frag->page.p) + frag->page_offset;

Of course, this would all be a lot easier for drivers if all drivers had
to worry about was a struct page, offset and size, rather than having to
track whether each individual mapping of a transmit packet was mapped
with dma_map_single() or dma_map_page().

That all said, what I really care about is the regression which basically
makes 3.18 unusable on this hardware and seeing _some_ kind of resolution
to that regression - I don't care if it doesn't quite perform, what I care
about is that the network driver doesn't oops the kernel.
Ezequiel Garcia Jan. 22, 2015, 12:17 p.m. UTC | #4
On 01/21/2015 09:11 PM, Russell King - ARM Linux wrote:
> On Wed, Jan 21, 2015 at 08:34:30PM -0300, Ezequiel Garcia wrote:
>> I have just realised that the non-TSO and the TSO paths must work
>> simultaneously (we don't know which path an egress skb will take).
>>
>> So, with these patches, the unmapping is done using dma_unmap_page() which
>> is only correct if the skb took the non-TSO paths. In other words,
>> these fixes are wrong (although I have no idea the effect of
>> using dma_unmap_page on a mapping done with dma_map_single).
>>
>> And the problem is that in the TSO path, the linear and the non-linear
>> fragments use the same kind of descriptors, so we can't distinguish
>> them in the cleanup, and can't decide if _single or _page should be used.
>>
>> Any ideas?
> 
> Or, maybe, if davem would reply, we might come to the conclusion (as
> I previously pointed out) that it's not a driver issue, but a netdev
> core issue:
> 
> static netdev_features_t harmonize_features(struct sk_buff *skb,
>         netdev_features_t features)
> {
> ...
>         if (skb->ip_summed != CHECKSUM_NONE &&
>             !can_checksum_protocol(features, type)) {
>                 features &= ~NETIF_F_ALL_CSUM;
>         } else if (illegal_highdma(skb->dev, skb)) {
>                 features &= ~NETIF_F_SG;
>         }
> 
> The problem is when the first "if" is true (as is the case with IPv6 on
> mv643xx_eth.c), we clear NETIF_F_ALL_CSUM, but leave NETIF_F_SG set.
> 
> Had that first if been false, we would've called illegal_highdma(), and
> found that the skb contains some highmem fragments, but the device does
> *not* have NETIF_F_HIGHDMA set, and so that second "if" would be true.
> The result of that is NETIF_F_SG is cleared.
> 
> In this case, in validate_xmit_skb(), skb_needs_linearize() would be
> false for a skb with fragments, causing the skb to be linearised.  I've
> not completely traced the GSO path, but I'd assume that does something
> similar (which I think skb_segment() handles.)
> 
> So, I'm wondering whether the above should be:
> 
> static netdev_features_t harmonize_features(struct sk_buff *skb,
>         netdev_features_t features)
> {
> ...
>         if (skb->ip_summed != CHECKSUM_NONE &&
>             !can_checksum_protocol(features, type)) {
>                 features &= ~NETIF_F_ALL_CSUM;
>         }
> 
>         if (illegal_highdma(skb->dev, skb)) {
>                 features &= ~NETIF_F_SG;
>         }
> 
> So that we get NETIF_F_SG turned off for all cases (irrespective of the
> NETIF_F_ALL_CSUM test) if we see a skb with highmem and we the device
> does not support highdma.
> 
> Yes, the code above hasn't changed in functionality for a long time, but
> that doesn't mean it isn't buggy, and isn't the cause of our current bug.
> 

Hm, that's interesting.

> However, it would be far better to have the drivers fixed for the sake
> of performance - it's only this dma_map_page() thing that is the real
> cause of the problem in these drivers.
> 

Yes, I have just sent a v2 to fix the mv643xx_eth driver (non-TSO path).
If that works, I'll see about preparing a fix for mvneta, and for both
egress paths.

> Looking at TSO, it seems madness that it doesn't support highmem:
> 
> void tso_start(struct sk_buff *skb, struct tso_t *tso)
> {
> ...
>         tso->data = skb->data + hdr_len;
> ...
>                 tso->data = page_address(frag->page.p) + frag->page_offset;
> 
> Of course, this would all be a lot easier for drivers if all drivers had
> to worry about was a struct page, offset and size, rather than having to
> track whether each individual mapping of a transmit packet was mapped
> with dma_map_single() or dma_map_page().
> 
> That all said, what I really care about is the regression which basically
> makes 3.18 unusable on this hardware and seeing _some_ kind of resolution
> to that regression - I don't care if it doesn't quite perform, what I care
> about is that the network driver doesn't oops the kernel.
> 

Thanks for all the info!
David Miller Jan. 26, 2015, 10:40 p.m. UTC | #5
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Wed, 21 Jan 2015 09:54:10 -0300

> +		if (!IS_TSO_HEADER(txq, desc->buf_ptr)) {
> +
> +			/* The first descriptor is either a TSO header or
> +			 * the linear part of the skb.
> +			 */

Similar to the first patch, please remove this empty line.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index a62fc38..0c77f0e 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -879,10 +879,8 @@  static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb)
 		skb_frag_t *this_frag;
 		int tx_index;
 		struct tx_desc *desc;
-		void *addr;
 
 		this_frag = &skb_shinfo(skb)->frags[frag];
-		addr = page_address(this_frag->page.p) + this_frag->page_offset;
 		tx_index = txq->tx_curr_desc++;
 		if (txq->tx_curr_desc == txq->tx_ring_size)
 			txq->tx_curr_desc = 0;
@@ -902,8 +900,9 @@  static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb)
 
 		desc->l4i_chk = 0;
 		desc->byte_cnt = skb_frag_size(this_frag);
-		desc->buf_ptr = dma_map_single(mp->dev->dev.parent, addr,
-					       desc->byte_cnt, DMA_TO_DEVICE);
+		desc->buf_ptr = skb_frag_dma_map(mp->dev->dev.parent,
+						 this_frag, 0, desc->byte_cnt,
+						 DMA_TO_DEVICE);
 	}
 }
 
@@ -1065,9 +1064,22 @@  static int txq_reclaim(struct tx_queue *txq, int budget, int force)
 		reclaimed++;
 		txq->tx_desc_count--;
 
-		if (!IS_TSO_HEADER(txq, desc->buf_ptr))
-			dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr,
-					 desc->byte_cnt, DMA_TO_DEVICE);
+		if (!IS_TSO_HEADER(txq, desc->buf_ptr)) {
+
+			/* The first descriptor is either a TSO header or
+			 * the linear part of the skb.
+			 */
+			if (desc->cmd_sts & TX_FIRST_DESC)
+				dma_unmap_single(mp->dev->dev.parent,
+						 desc->buf_ptr,
+						 desc->byte_cnt,
+						 DMA_TO_DEVICE);
+			else
+				dma_unmap_page(mp->dev->dev.parent,
+					       desc->buf_ptr,
+					       desc->byte_cnt,
+					       DMA_TO_DEVICE);
+		}
 
 		if (cmd_sts & TX_ENABLE_INTERRUPT) {
 			struct sk_buff *skb = __skb_dequeue(&txq->tx_skb);