diff mbox

[RFC,3/3] mlx4: Call skb_csum_offload_check to check offloadability

Message ID 1444088364-2839440-4-git-send-email-tom@herbertland.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Tom Herbert Oct. 5, 2015, 11:39 p.m. UTC
This provides an example of a driver calling the skb_csum_offload_check.

Signed-off-by: Tom Herbert <tom@herbertland.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  6 +++---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c     | 20 +++++++++++++++-----
 2 files changed, 18 insertions(+), 8 deletions(-)

Comments

Alexander H Duyck Oct. 6, 2015, 4:03 a.m. UTC | #1
On 10/05/2015 04:39 PM, Tom Herbert wrote:
> This provides an example of a driver calling the skb_csum_offload_check.
>
> Signed-off-by: Tom Herbert <tom@herbertland.com>
> ---
>   drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  6 +++---
>   drivers/net/ethernet/mellanox/mlx4/en_tx.c     | 20 +++++++++++++++-----
>   2 files changed, 18 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> index 4726122..f2ed8d0 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> @@ -2360,7 +2360,7 @@ out:
>   	}
>   
>   	/* set offloads */
> -	priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
> +	priv->dev->hw_enc_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
>   				      NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL;
>   	priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
>   	priv->dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
> @@ -2372,7 +2372,7 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
>   	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
>   						 vxlan_del_task);
>   	/* unset offloads */
> -	priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
> +	priv->dev->hw_enc_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
>   				      NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL);
>   	priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL;
>   	priv->dev->features    &= ~NETIF_F_GSO_UDP_TUNNEL;
> @@ -2943,7 +2943,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
>   	/*
>   	 * Set driver features
>   	 */
> -	dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
> +	dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM;
>   	if (mdev->LSO_support)
>   		dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
>   
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> index 494e776..f364ffd 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> @@ -702,6 +702,14 @@ static void mlx4_bf_copy(void __iomem *dst, const void *src,
>   	__iowrite64_copy(dst, src, bytecnt / 8);
>   }
>   
> +static const struct skb_csum_offl_spec csum_offl_spec = {
> +	.ipv4_okay = 1,
> +	.ipv6_okay = 1,
> +	.encap_okay = 1,
> +	.tcp_okay = 1,
> +	.udp_okay = 1,
> +};
> +

The question I would have is if inner IPv6 checksum is supported by this 
driver.  The code before didn't seem to indicate it was, and after the 
csum_offl_spec would seem to indicate it is.  One of my concerns about a 
change like this is that it is likely prone to introduce regressions as 
features are going to be toggling due to interpretations of flags and 
assumptions about what is good for the outer headers is good for the 
inner ones.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Herbert Oct. 6, 2015, 4:22 p.m. UTC | #2
On Mon, Oct 5, 2015 at 9:03 PM, Alexander Duyck
<alexander.duyck@gmail.com> wrote:
> On 10/05/2015 04:39 PM, Tom Herbert wrote:
>>
>> This provides an example of a driver calling the skb_csum_offload_check.
>>
>> Signed-off-by: Tom Herbert <tom@herbertland.com>
>> ---
>>   drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  6 +++---
>>   drivers/net/ethernet/mellanox/mlx4/en_tx.c     | 20 +++++++++++++++-----
>>   2 files changed, 18 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
>> b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
>> index 4726122..f2ed8d0 100644
>> --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
>> +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
>> @@ -2360,7 +2360,7 @@ out:
>>         }
>>         /* set offloads */
>> -       priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
>> +       priv->dev->hw_enc_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
>>                                       NETIF_F_TSO |
>> NETIF_F_GSO_UDP_TUNNEL;
>>         priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
>>         priv->dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
>> @@ -2372,7 +2372,7 @@ static void mlx4_en_del_vxlan_offloads(struct
>> work_struct *work)
>>         struct mlx4_en_priv *priv = container_of(work, struct
>> mlx4_en_priv,
>>                                                  vxlan_del_task);
>>         /* unset offloads */
>> -       priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
>> +       priv->dev->hw_enc_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
>>                                       NETIF_F_TSO |
>> NETIF_F_GSO_UDP_TUNNEL);
>>         priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL;
>>         priv->dev->features    &= ~NETIF_F_GSO_UDP_TUNNEL;
>> @@ -2943,7 +2943,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev,
>> int port,
>>         /*
>>          * Set driver features
>>          */
>> -       dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM |
>> NETIF_F_IPV6_CSUM;
>> +       dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM;
>>         if (mdev->LSO_support)
>>                 dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
>>   diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>> b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>> index 494e776..f364ffd 100644
>> --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>> +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>> @@ -702,6 +702,14 @@ static void mlx4_bf_copy(void __iomem *dst, const
>> void *src,
>>         __iowrite64_copy(dst, src, bytecnt / 8);
>>   }
>>   +static const struct skb_csum_offl_spec csum_offl_spec = {
>> +       .ipv4_okay = 1,
>> +       .ipv6_okay = 1,
>> +       .encap_okay = 1,
>> +       .tcp_okay = 1,
>> +       .udp_okay = 1,
>> +};
>> +
>
>
> The question I would have is if inner IPv6 checksum is supported by this
> driver.  The code before didn't seem to indicate it was, and after the
> csum_offl_spec would seem to indicate it is.  One of my concerns about a
> change like this is that it is likely prone to introduce regressions as
> features are going to be toggling due to interpretations of flags and
> assumptions about what is good for the outer headers is good for the inner
> ones.

Do you mean to say that there could be a device that supports an inner
and outer checksum for IPv4, but only an outer checksum for IPv6 and
not inner checksum?

Tom
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander H Duyck Oct. 6, 2015, 4:54 p.m. UTC | #3
On 10/06/2015 09:22 AM, Tom Herbert wrote:
> On Mon, Oct 5, 2015 at 9:03 PM, Alexander Duyck
> <alexander.duyck@gmail.com> wrote:
>> On 10/05/2015 04:39 PM, Tom Herbert wrote:
>>> This provides an example of a driver calling the skb_csum_offload_check.
>>>
>>> Signed-off-by: Tom Herbert <tom@herbertland.com>
>>> ---
>>>    drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  6 +++---
>>>    drivers/net/ethernet/mellanox/mlx4/en_tx.c     | 20 +++++++++++++++-----
>>>    2 files changed, 18 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
>>> b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
>>> index 4726122..f2ed8d0 100644
>>> --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
>>> +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
>>> @@ -2360,7 +2360,7 @@ out:
>>>          }
>>>          /* set offloads */
>>> -       priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
>>> +       priv->dev->hw_enc_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
>>>                                        NETIF_F_TSO |
>>> NETIF_F_GSO_UDP_TUNNEL;
>>>          priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
>>>          priv->dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
>>> @@ -2372,7 +2372,7 @@ static void mlx4_en_del_vxlan_offloads(struct
>>> work_struct *work)
>>>          struct mlx4_en_priv *priv = container_of(work, struct
>>> mlx4_en_priv,
>>>                                                   vxlan_del_task);
>>>          /* unset offloads */
>>> -       priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
>>> +       priv->dev->hw_enc_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
>>>                                        NETIF_F_TSO |
>>> NETIF_F_GSO_UDP_TUNNEL);
>>>          priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL;
>>>          priv->dev->features    &= ~NETIF_F_GSO_UDP_TUNNEL;
>>> @@ -2943,7 +2943,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev,
>>> int port,
>>>          /*
>>>           * Set driver features
>>>           */
>>> -       dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM |
>>> NETIF_F_IPV6_CSUM;
>>> +       dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM;
>>>          if (mdev->LSO_support)
>>>                  dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
>>>    diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>>> b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>>> index 494e776..f364ffd 100644
>>> --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>>> +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>>> @@ -702,6 +702,14 @@ static void mlx4_bf_copy(void __iomem *dst, const
>>> void *src,
>>>          __iowrite64_copy(dst, src, bytecnt / 8);
>>>    }
>>>    +static const struct skb_csum_offl_spec csum_offl_spec = {
>>> +       .ipv4_okay = 1,
>>> +       .ipv6_okay = 1,
>>> +       .encap_okay = 1,
>>> +       .tcp_okay = 1,
>>> +       .udp_okay = 1,
>>> +};
>>> +
>>
>> The question I would have is if inner IPv6 checksum is supported by this
>> driver.  The code before didn't seem to indicate it was, and after the
>> csum_offl_spec would seem to indicate it is.  One of my concerns about a
>> change like this is that it is likely prone to introduce regressions as
>> features are going to be toggling due to interpretations of flags and
>> assumptions about what is good for the outer headers is good for the inner
>> ones.
> Do you mean to say that there could be a device that supports an inner
> and outer checksum for IPv4, but only an outer checksum for IPv6 and
> not inner checksum?
>
> Tom

Yes, that is what I mean.  The fact is hardware designs are often short 
sighted like that.  Somebody may have decided to save a few gates by 
only supporting IPv4 because somebody somewhere didn't make it a hard 
requirement to support IPv6, or perhaps the implementation wasn't quite 
right and instead of spinning a new silicon they decided to de-feature 
IPv6 inner checksum offload.

I don't know if that is the case for the mlx4, maybe it is just a driver 
oversight, but the fact that it didn't list IPv6 as being a supported 
encapsulation before kind of implies that it doesn't support TCP/UDP 
checksums on top of encapsulated IPv6.

- Alex
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Oct. 7, 2015, 3:41 p.m. UTC | #4
On 10/6/2015 2:39 AM, Tom Herbert wrote:
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> index 494e776..f364ffd 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> @@ -702,6 +702,14 @@ static void mlx4_bf_copy(void __iomem *dst, const void *src,
>   	__iowrite64_copy(dst, src, bytecnt / 8);
>   }
>   
> +static const struct skb_csum_offl_spec csum_offl_spec = {
> +	.ipv4_okay = 1,
> +	.ipv6_okay = 1,
> +	.encap_okay = 1,
> +	.tcp_okay = 1,
> +	.udp_okay = 1,
> +};
> +
>   netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
>   {
>   	struct skb_shared_info *shinfo = skb_shinfo(skb);
> @@ -727,6 +735,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
>   	bool stop_queue;
>   	bool inline_ok;
>   	u32 ring_cons;
> +	struct skb_csum_offl_params offl_params;
>   
>   	if (!priv->port_up)
>   		goto tx_drop;
> @@ -853,8 +862,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
>   	/* Prepare ctrl segement apart opcode+ownership, which depends on
>   	 * whether LSO is used */
>   	tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
> -	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
> -		if (!skb->encapsulation)
> +	if (skb_csum_offload_chk(skb, &csum_offl_spec,
> +				 &offl_params, true)) {
> +		if (!offl_params.encapped_csum)
>   			tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
>   								 MLX4_WQE_CTRL_TCP_UDP_CSUM);
>   		else
> @@ -912,9 +922,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
>   		build_inline_wqe(tx_desc, skb, shinfo, real_size, &vlan_tag,
>   				 tx_ind, fragptr);
>   
> -	if (skb->encapsulation) {
> -		struct iphdr *ipv4 = (struct iphdr *)skb_inner_network_header(skb);
> -		if (ipv4->protocol == IPPROTO_TCP || ipv4->protocol == IPPROTO_UDP)
> +	if (!offl_params.encapped_csum) {
> +		if (offl_params.ip_proto == IPPROTO_TCP ||
> +		    offl_params.ip_proto == IPPROTO_UDP)
>   			op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP);
>   		else
>   			op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP);
> -- 2.4.6
few quick comments

not all mlx4 devices support offloading checksum for tunneled traffic, 
only ConnectX3-pro -- this translates for the mlx4 EN driver
logic that determines that on startup, e.g only when 
mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN
we set hw_enc_features and friends -- in that respect, I wasn't sure if 
it's okay to blindly set  .encap_oka = 1 here.

Another constraint, is that when the device does support (say) TCP TX 
checksum offload for the inner packet they don't support UDP
checksum offload for the outer packet.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Herbert Oct. 7, 2015, 6:07 p.m. UTC | #5
On Wed, Oct 7, 2015 at 8:41 AM, Or Gerlitz <ogerlitz@mellanox.com> wrote:
> On 10/6/2015 2:39 AM, Tom Herbert wrote:
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>> b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>> index 494e776..f364ffd 100644
>> --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>> +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>> @@ -702,6 +702,14 @@ static void mlx4_bf_copy(void __iomem *dst, const
>> void *src,
>>         __iowrite64_copy(dst, src, bytecnt / 8);
>>   }
>>   +static const struct skb_csum_offl_spec csum_offl_spec = {
>> +       .ipv4_okay = 1,
>> +       .ipv6_okay = 1,
>> +       .encap_okay = 1,
>> +       .tcp_okay = 1,
>> +       .udp_okay = 1,
>> +};
>> +
>>   netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
>>   {
>>         struct skb_shared_info *shinfo = skb_shinfo(skb);
>> @@ -727,6 +735,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct
>> net_device *dev)
>>         bool stop_queue;
>>         bool inline_ok;
>>         u32 ring_cons;
>> +       struct skb_csum_offl_params offl_params;
>>         if (!priv->port_up)
>>                 goto tx_drop;
>> @@ -853,8 +862,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct
>> net_device *dev)
>>         /* Prepare ctrl segement apart opcode+ownership, which depends on
>>          * whether LSO is used */
>>         tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
>> -       if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
>> -               if (!skb->encapsulation)
>> +       if (skb_csum_offload_chk(skb, &csum_offl_spec,
>> +                                &offl_params, true)) {
>> +               if (!offl_params.encapped_csum)
>>                         tx_desc->ctrl.srcrb_flags |=
>> cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
>>
>> MLX4_WQE_CTRL_TCP_UDP_CSUM);
>>                 else
>> @@ -912,9 +922,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct
>> net_device *dev)
>>                 build_inline_wqe(tx_desc, skb, shinfo, real_size,
>> &vlan_tag,
>>                                  tx_ind, fragptr);
>>   -     if (skb->encapsulation) {
>> -               struct iphdr *ipv4 = (struct iphdr
>> *)skb_inner_network_header(skb);
>> -               if (ipv4->protocol == IPPROTO_TCP || ipv4->protocol ==
>> IPPROTO_UDP)
>> +       if (!offl_params.encapped_csum) {
>> +               if (offl_params.ip_proto == IPPROTO_TCP ||
>> +                   offl_params.ip_proto == IPPROTO_UDP)
>>                         op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP |
>> MLX4_WQE_CTRL_ILP);
>>                 else
>>                         op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP);
>> -- 2.4.6
>
> few quick comments
>
> not all mlx4 devices support offloading checksum for tunneled traffic, only
> ConnectX3-pro -- this translates for the mlx4 EN driver
> logic that determines that on startup, e.g only when
> mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN
> we set hw_enc_features and friends -- in that respect, I wasn't sure if it's
> okay to blindly set  .encap_oka = 1 here.
>
Or, I would only give the mlnx support as an example. I think driver
owners would need to implement the specification for the their
devices.

> Another constraint, is that when the device does support (say) TCP TX
> checksum offload for the inner packet they don't support UDP
> checksum offload for the outer packet.

We can add such things to the specification. One value I see in having
a common structure to describe the checksum capabilities is that
becomes a way to clearly document what is (and is not) supported by
devices.

btw, I don't quite understand your example. If a device does not
support UDP checksum there is a flag for that in the specification. If
the stack sends a TCP packet encapsulated in a UDP packet with UDP
checksum enabled, it will try to offload the UDP checksum and not the
TCP one. There is currently no interface to offload two checksums in
the same packet (in non-GRO at least) and with things like RCO we
probably will never need that anyway.

Tom
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Oct. 8, 2015, 9:39 p.m. UTC | #6
On Wed, Oct 7, 2015 at 9:07 PM, Tom Herbert <tom@herbertland.com> wrote:
> On Wed, Oct 7, 2015 at 8:41 AM, Or Gerlitz <ogerlitz@mellanox.com> wrote:
>> On 10/6/2015 2:39 AM, Tom Herbert wrote:

>>>   +static const struct skb_csum_offl_spec csum_offl_spec = {
>>> +       .ipv4_okay = 1,
>>> +       .ipv6_okay = 1,
>>> +       .encap_okay = 1,
>>> +       .tcp_okay = 1,
>>> +       .udp_okay = 1,
>>> +};
>>> +
[...]


> Or, I would only give the mlnx support as an example. I think driver
> owners would need to implement the specification for the their devices.

sure, sorry to bother on that.

>> Another constraint, is that when the device does support (say) TCP TX
>> checksum offload for the inner packet they don't support UDP
>> checksum offload for the outer packet.

> We can add such things to the specification. One value I see in having
> a common structure to describe the checksum capabilities is that
> becomes a way to clearly document what is (and is not) supported by
> devices.

> btw, I don't quite understand your example. If a device does not
> support UDP checksum there is a flag for that in the specification.

But we do support UDP checksum generation for not-tunneled packets, so
the specification should somehow capture this combination.

> If the stack sends a TCP packet encapsulated in a UDP packet with UDP
> checksum enabled, it will try to offload the UDP checksum and not the
> TCP one.

Not following... who is "it" in this sentence, the stack or the device?

We don't advertise NETIF_F_GSO_UDP_TUNNEL_CSUM so for UDP tunning
GSO-ed packets  we should be fine. For non GSO... you say this works
only b/c the default for the vxlan driver is to use zero udp checksum?

> There is currently no interface to offload two checksums in
> the same packet (in non-GRO at least)

GRO? aren't we talking on xmit?

> and with things like RCO we probably will never need that anyway.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 4726122..f2ed8d0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2360,7 +2360,7 @@  out:
 	}
 
 	/* set offloads */
-	priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
+	priv->dev->hw_enc_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
 				      NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL;
 	priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
 	priv->dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
@@ -2372,7 +2372,7 @@  static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
 	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
 						 vxlan_del_task);
 	/* unset offloads */
-	priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
+	priv->dev->hw_enc_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
 				      NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL);
 	priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL;
 	priv->dev->features    &= ~NETIF_F_GSO_UDP_TUNNEL;
@@ -2943,7 +2943,7 @@  int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	/*
 	 * Set driver features
 	 */
-	dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+	dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM;
 	if (mdev->LSO_support)
 		dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 494e776..f364ffd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -702,6 +702,14 @@  static void mlx4_bf_copy(void __iomem *dst, const void *src,
 	__iowrite64_copy(dst, src, bytecnt / 8);
 }
 
+static const struct skb_csum_offl_spec csum_offl_spec = {
+	.ipv4_okay = 1,
+	.ipv6_okay = 1,
+	.encap_okay = 1,
+	.tcp_okay = 1,
+	.udp_okay = 1,
+};
+
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -727,6 +735,7 @@  netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	bool stop_queue;
 	bool inline_ok;
 	u32 ring_cons;
+	struct skb_csum_offl_params offl_params;
 
 	if (!priv->port_up)
 		goto tx_drop;
@@ -853,8 +862,9 @@  netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Prepare ctrl segement apart opcode+ownership, which depends on
 	 * whether LSO is used */
 	tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
-	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-		if (!skb->encapsulation)
+	if (skb_csum_offload_chk(skb, &csum_offl_spec,
+				 &offl_params, true)) {
+		if (!offl_params.encapped_csum)
 			tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
 								 MLX4_WQE_CTRL_TCP_UDP_CSUM);
 		else
@@ -912,9 +922,9 @@  netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		build_inline_wqe(tx_desc, skb, shinfo, real_size, &vlan_tag,
 				 tx_ind, fragptr);
 
-	if (skb->encapsulation) {
-		struct iphdr *ipv4 = (struct iphdr *)skb_inner_network_header(skb);
-		if (ipv4->protocol == IPPROTO_TCP || ipv4->protocol == IPPROTO_UDP)
+	if (!offl_params.encapped_csum) {
+		if (offl_params.ip_proto == IPPROTO_TCP ||
+		    offl_params.ip_proto == IPPROTO_UDP)
 			op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP);
 		else
 			op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP);