diff mbox

[net-next,6/6] net: Implmement RFC 6936 (zero RX csums for UDP/IPv6)

Message ID alpine.DEB.2.02.1404041720560.21339@tomh.mtv.corp.google.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Tom Herbert April 5, 2014, 12:28 a.m. UTC
RFC 6936 relaxes the requirement of RFC 2460 that UDP/IPv6 packets which
are received with a zero UDP checksum value must be dropped. RFC 6936
allow zero checksums to support tunnels over UDP.

This patch adds a new socket option UDP_CHECK6_ZERO_OKAY whcih can be
set on a UDP socket to indicate that a zero checksum is acceptable
(e.g. the socket is for a tunnel). The ip6 checksum and UDP receive
functions were updated accordingly to deal with this.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 include/linux/udp.h      |  3 ++-
 include/uapi/linux/udp.h |  1 +
 net/ipv4/udp.c           |  8 ++++++++
 net/ipv6/ip6_checksum.c  | 19 ++++++++-----------
 net/ipv6/udp.c           | 39 +++++++++++++++++++++++++++++++++++++--
 5 files changed, 56 insertions(+), 14 deletions(-)

Comments

Tom Herbert April 5, 2014, 9:56 p.m. UTC | #1
Please disregard new option, I think we can just use sk_no_check
instead of new socket option.

On Fri, Apr 4, 2014 at 5:28 PM, Tom Herbert <therbert@google.com> wrote:
> RFC 6936 relaxes the requirement of RFC 2460 that UDP/IPv6 packets which
> are received with a zero UDP checksum value must be dropped. RFC 6936
> allow zero checksums to support tunnels over UDP.
>
> This patch adds a new socket option UDP_CHECK6_ZERO_OKAY whcih can be
> set on a UDP socket to indicate that a zero checksum is acceptable
> (e.g. the socket is for a tunnel). The ip6 checksum and UDP receive
> functions were updated accordingly to deal with this.
>
> Signed-off-by: Tom Herbert <therbert@google.com>
> ---
>  include/linux/udp.h      |  3 ++-
>  include/uapi/linux/udp.h |  1 +
>  net/ipv4/udp.c           |  8 ++++++++
>  net/ipv6/ip6_checksum.c  | 19 ++++++++-----------
>  net/ipv6/udp.c           | 39 +++++++++++++++++++++++++++++++++++++--
>  5 files changed, 56 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/udp.h b/include/linux/udp.h
> index 42278bb..647ffc9 100644
> --- a/include/linux/udp.h
> +++ b/include/linux/udp.h
> @@ -63,7 +63,8 @@ struct udp_sock {
>  #define UDPLITE_SEND_CC  0x2           /* set via udplite setsockopt         */
>  #define UDPLITE_RECV_CC  0x4           /* set via udplite setsocktopt        */
>         __u8             pcflag;        /* marks socket as UDP-Lite if > 0    */
> -       __u8             unused[3];
> +       __u8             check6_zero_okay; /* Zero csum okay for IPv6 */
> +       __u8             unused[2];
>         /*
>          * For encapsulation sockets.
>          */
> diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
> index e2bcfd7..46d0e58 100644
> --- a/include/uapi/linux/udp.h
> +++ b/include/uapi/linux/udp.h
> @@ -28,6 +28,7 @@ struct udphdr {
>
>  /* UDP socket options */
>  #define UDP_CORK       1       /* Never send partially complete segments */
> +#define UDP_CHECK6_ZERO_OKAY   2       /* Zero checksum okay for IPv6 UDP */
>  #define UDP_ENCAP      100     /* Set the socket to accept encapsulated packets */
>
>  /* UDP encapsulation types */
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index d5e3a76..9b88788 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -2003,6 +2003,10 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
>                 }
>                 break;
>
> +       case UDP_CHECK6_ZERO_OKAY:
> +               up->check6_zero_okay = !!val;
> +               break;
> +
>         /*
>          *      UDP-Lite's partial checksum coverage (RFC 3828).
>          */
> @@ -2081,6 +2085,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
>                 val = up->corkflag;
>                 break;
>
> +       case UDP_CHECK6_ZERO_OKAY:
> +               val = up->check6_zero_okay;
> +               break;
> +
>         case UDP_ENCAP:
>                 val = up->encap_type;
>                 break;
> diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
> index 6105b8f..55896cd 100644
> --- a/net/ipv6/ip6_checksum.c
> +++ b/net/ipv6/ip6_checksum.c
> @@ -83,16 +83,13 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
>                         return err;
>         }
>
> -       if (uh->check == 0) {
> -               /* RFC 2460 section 8.1 says that we SHOULD log
> -                  this error. Well, it is reasonable.
> -                */
> -               LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
> -                              &ipv6_hdr(skb)->saddr, ntohs(uh->source),
> -                              &ipv6_hdr(skb)->daddr, ntohs(uh->dest));
> -               return 1;
> -       }
> -
> -       return skb_checksum_init(skb, IPPROTO_UDP, ip6_pseudo_compute);
> +       /*
> +        * To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
> +        * we accept a checksum of zero here. When we find the socket
> +        * for the UDP packet we'll check if that socket allows zero checksum
> +        * for IPv6 (set by socket option).
> +        */
> +       return skb_checksum_init_zero_check(skb, proto, uh->check,
> +                                          ip6_pseudo_compute);
>  }
>  EXPORT_SYMBOL(udp6_csum_init);
> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> index 1e586d9..8b874b9 100644
> --- a/net/ipv6/udp.c
> +++ b/net/ipv6/udp.c
> @@ -760,13 +760,25 @@ static void flush_stack(struct sock **stack, unsigned int count,
>         if (unlikely(skb1))
>                 kfree_skb(skb1);
>  }
> +
> +static void udp6_csum_zero_error(struct sk_buff *skb)
> +{
> +       /*
> +        * RFC 2460 section 8.1 says that we SHOULD log
> +        * this error. Well, it is reasonable.
> +       */
> +       LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
> +                      &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
> +                      &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
> +}
> +
>  /*
>   * Note: called only from the BH handler context,
>   * so we don't need to lock the hashes.
>   */
>  static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
>                 const struct in6_addr *saddr, const struct in6_addr *daddr,
> -               struct udp_table *udptable)
> +               struct udp_table *udptable, int proto)
>  {
>         struct sock *sk, *stack[256 / sizeof(struct sock *)];
>         const struct udphdr *uh = udp_hdr(skb);
> @@ -779,6 +791,19 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
>         dif = inet6_iif(skb);
>         sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
>         while (sk) {
> +               if (!uh->check && !udp_sk(sk)->check6_zero_okay) {
> +                       /*
> +                        * If checksum in packet is zero and not all the
> +                        * sockets accept a zero checksum then declare
> +                        * a checksum error.
> +                        */
> +                       flush_stack(stack, count, skb, ~0);
> +                       count = 0;
> +                       udp6_csum_zero_error(skb);
> +                       UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
> +                       UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
> +                       break;
> +               }
>                 stack[count++] = sk;
>                 sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
>                                        uh->source, saddr, dif);
> @@ -855,7 +880,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
>          */
>         if (ipv6_addr_is_multicast(daddr))
>                 return __udp6_lib_mcast_deliver(net, skb,
> -                               saddr, daddr, udptable);
> +                               saddr, daddr, udptable, proto);
>
>         /* Unicast */
>
> @@ -867,6 +892,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
>         if (sk != NULL) {
>                 int ret;
>
> +               if (!uh->check && !udp_sk(sk)->check6_zero_okay) {
> +                       udp6_csum_zero_error(skb);
> +                       goto csum_error;
> +               }
> +
>                 ret = udpv6_queue_rcv_skb(sk, skb);
>                 sock_put(sk);
>
> @@ -879,6 +909,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
>                 return 0;
>         }
>
> +       if (!uh->check) {
> +               udp6_csum_zero_error(skb);
> +               goto csum_error;
> +       }
> +
>         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
>                 goto discard;
>
> --
> 1.9.1.423.g4596e3a
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller April 7, 2014, 5:16 p.m. UTC | #2
From: Tom Herbert <therbert@google.com>
Date: Fri, 4 Apr 2014 17:28:24 -0700 (PDT)

> RFC 6936 relaxes the requirement of RFC 2460 that UDP/IPv6 packets which
> are received with a zero UDP checksum value must be dropped. RFC 6936
> allow zero checksums to support tunnels over UDP.
> 
> This patch adds a new socket option UDP_CHECK6_ZERO_OKAY whcih can be
> set on a UDP socket to indicate that a zero checksum is acceptable
> (e.g. the socket is for a tunnel). The ip6 checksum and UDP receive
> functions were updated accordingly to deal with this.
> 
> Signed-off-by: Tom Herbert <therbert@google.com>

I see you reply to this later and say we can use sk_no_check.

Are you really sure?  This might create a surprise for someone
inadvertantly setting that now and expecting it to have a very
specific effect only for ipv4 UDP sockets.

The safest thing to do is to create the new option, then there
is no discrepancy.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet April 7, 2014, 5:25 p.m. UTC | #3
On Mon, 2014-04-07 at 13:16 -0400, David Miller wrote:

> The safest thing to do is to create the new option, then there
> is no discrepancy.


Btw SO_NO_CHECK is a SOL_SOCKET option.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
YOSHIFUJI Hideaki / 吉藤英明 April 7, 2014, 6:48 p.m. UTC | #4
Tom Herbert wrote:
> RFC 6936 relaxes the requirement of RFC 2460 that UDP/IPv6 packets which
> are received with a zero UDP checksum value must be dropped. RFC 6936
> allow zero checksums to support tunnels over UDP.
> 
> This patch adds a new socket option UDP_CHECK6_ZERO_OKAY whcih can be
> set on a UDP socket to indicate that a zero checksum is acceptable
> (e.g. the socket is for a tunnel). The ip6 checksum and UDP receive
> functions were updated accordingly to deal with this.
> 
> Signed-off-by: Tom Herbert <therbert@google.com>
> ---
>  include/linux/udp.h      |  3 ++-
>  include/uapi/linux/udp.h |  1 +
>  net/ipv4/udp.c           |  8 ++++++++
>  net/ipv6/ip6_checksum.c  | 19 ++++++++-----------
>  net/ipv6/udp.c           | 39 +++++++++++++++++++++++++++++++++++++--
>  5 files changed, 56 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/udp.h b/include/linux/udp.h
> index 42278bb..647ffc9 100644
> --- a/include/linux/udp.h
> +++ b/include/linux/udp.h
> @@ -779,6 +791,19 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
>  	dif = inet6_iif(skb);
>  	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
>  	while (sk) {
> +		if (!uh->check && !udp_sk(sk)->check6_zero_okay) {
> +			/*
> +			 * If checksum in packet is zero and not all the
> +			 * sockets accept a zero checksum then declare
> +			 * a checksum error.
> +			 */
> +			flush_stack(stack, count, skb, ~0);
> +			count = 0;
> +			udp6_csum_zero_error(skb);
> +			UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
> +			UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
> +			break;
> +		}
>  		stack[count++] = sk;
>  		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
>  				       uh->source, saddr, dif);

This seems wrong; packets with zero-checksum will not be delivered to 
some sockets if some of sockets accept zero-checksums and others do not.

BTW, I have been thinking that we should introduce 4 options
(or bits) for IPv4/IPv6 checksumming for sender/receiver.

--yoshfuji
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Herbert April 7, 2014, 8:44 p.m. UTC | #5
On Mon, Apr 7, 2014 at 10:16 AM, David Miller <davem@davemloft.net> wrote:
> From: Tom Herbert <therbert@google.com>
> Date: Fri, 4 Apr 2014 17:28:24 -0700 (PDT)
>
>> RFC 6936 relaxes the requirement of RFC 2460 that UDP/IPv6 packets which
>> are received with a zero UDP checksum value must be dropped. RFC 6936
>> allow zero checksums to support tunnels over UDP.
>>
>> This patch adds a new socket option UDP_CHECK6_ZERO_OKAY whcih can be
>> set on a UDP socket to indicate that a zero checksum is acceptable
>> (e.g. the socket is for a tunnel). The ip6 checksum and UDP receive
>> functions were updated accordingly to deal with this.
>>
>> Signed-off-by: Tom Herbert <therbert@google.com>
>
> I see you reply to this later and say we can use sk_no_check.
>
> Are you really sure?  This might create a surprise for someone
> inadvertantly setting that now and expecting it to have a very
> specific effect only for ipv4 UDP sockets.
>
> The safest thing to do is to create the new option, then there
> is no discrepancy.

The semantics of sk_no_check are a little muddled since it is a
boolean that's not a boolean :-). It looks like:

1) If sk_no_check == UDP_CSUM_NOXMIT, we want to send zero csums.
Logically, I think this implies willingness to receive zero checksum
in UDP/IPV6 for the socket.
2) If sk_no_check is set l2tp_verify_checksum accepts bad checksums
(ironically, UDP/IPv6 checksum of zero is rejected before this point).
This seems like a really bad idea. The git log mentions this was done
because of "L2TP over localhost issue with incorrect checksums being
reported"-- so why not fix TX side to set correct csum or send zero
csums?
2) If sk_no_check == UDP_CSUM_NORCV, I assume this is intended to
disable RX csums to some extent (maybe allow zero csum?). The only
effect I see i the above mentioned l2tp interaction. The only code
that sets this is net/sunrpc/xprtsock.c, and I'm not sure what that is
trying to accomplish.

If we really need the mode of accepting zero csum but not sending it,
then UDP_CSUM_NORCV and a new socket option might have meaning. But if
we don't need that, sk_no_check and SO_NO_CHECK might be good enough
(assuming xprtsock use case is resolved) also sk_no_check could go
back to being a boolean. Since the zero checksum is only allowed for
tunnels, I think the latter might be right. In any case, I don't think
we want to allow the ability to accept packets with bad checksums on a
UDP socket like l2tp would do!

Tom
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Herbert April 7, 2014, 8:53 p.m. UTC | #6
On Mon, Apr 7, 2014 at 11:48 AM, YOSHIFUJI Hideaki
<yoshfuji@linux-ipv6.org> wrote:
> Tom Herbert wrote:
>> RFC 6936 relaxes the requirement of RFC 2460 that UDP/IPv6 packets which
>> are received with a zero UDP checksum value must be dropped. RFC 6936
>> allow zero checksums to support tunnels over UDP.
>>
>> This patch adds a new socket option UDP_CHECK6_ZERO_OKAY whcih can be
>> set on a UDP socket to indicate that a zero checksum is acceptable
>> (e.g. the socket is for a tunnel). The ip6 checksum and UDP receive
>> functions were updated accordingly to deal with this.
>>
>> Signed-off-by: Tom Herbert <therbert@google.com>
>> ---
>>  include/linux/udp.h      |  3 ++-
>>  include/uapi/linux/udp.h |  1 +
>>  net/ipv4/udp.c           |  8 ++++++++
>>  net/ipv6/ip6_checksum.c  | 19 ++++++++-----------
>>  net/ipv6/udp.c           | 39 +++++++++++++++++++++++++++++++++++++--
>>  5 files changed, 56 insertions(+), 14 deletions(-)
>>
>> diff --git a/include/linux/udp.h b/include/linux/udp.h
>> index 42278bb..647ffc9 100644
>> --- a/include/linux/udp.h
>> +++ b/include/linux/udp.h
>> @@ -779,6 +791,19 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
>>       dif = inet6_iif(skb);
>>       sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
>>       while (sk) {
>> +             if (!uh->check && !udp_sk(sk)->check6_zero_okay) {
>> +                     /*
>> +                      * If checksum in packet is zero and not all the
>> +                      * sockets accept a zero checksum then declare
>> +                      * a checksum error.
>> +                      */
>> +                     flush_stack(stack, count, skb, ~0);
>> +                     count = 0;
>> +                     udp6_csum_zero_error(skb);
>> +                     UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
>> +                     UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
>> +                     break;
>> +             }
>>               stack[count++] = sk;
>>               sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
>>                                      uh->source, saddr, dif);
>
> This seems wrong; packets with zero-checksum will not be delivered to
> some sockets if some of sockets accept zero-checksums and others do not.
>
Okay, I suppose delivering to some and not others is reasonable,
although there's no accounting for the non-deliverables-- I suppose
there's no completely clean way to do this...

> BTW, I have been thinking that we should introduce 4 options
> (or bits) for IPv4/IPv6 checksumming for sender/receiver.
>
What are these 4 options?

Tom

> --yoshfuji
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
YOSHIFUJI Hideaki / 吉藤英明 April 8, 2014, 1:17 a.m. UTC | #7
Tom Herbert wrote:

>>>       while (sk) {
>>> +             if (!uh->check && !udp_sk(sk)->check6_zero_okay) {
>>> +                     /*
>>> +                      * If checksum in packet is zero and not all the
>>> +                      * sockets accept a zero checksum then declare
>>> +                      * a checksum error.
>>> +                      */
>>> +                     flush_stack(stack, count, skb, ~0);
>>> +                     count = 0;
>>> +                     udp6_csum_zero_error(skb);
>>> +                     UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
>>> +                     UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
>>> +                     break;
>>> +             }
>>>               stack[count++] = sk;
>>>               sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
>>>                                      uh->source, saddr, dif);
>>
>> This seems wrong; packets with zero-checksum will not be delivered to
>> some sockets if some of sockets accept zero-checksums and others do not.
>>
> Okay, I suppose delivering to some and not others is reasonable,
> although there's no accounting for the non-deliverables-- I suppose
> there's no completely clean way to do this...

Well, I believe that supporting UDP packets with zero-checksum
should be implemented in a consistent way with UDP-lite.

>> BTW, I have been thinking that we should introduce 4 options
>> (or bits) for IPv4/IPv6 checksumming for sender/receiver.
>>
> What are these 4 options?

I meant combination of {ipv4,ipv6} and {sender,receiver}.

--yoshfuji

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Herbert April 8, 2014, 3:47 p.m. UTC | #8
On Mon, Apr 7, 2014 at 6:17 PM, YOSHIFUJI Hideaki
<yoshfuji@linux-ipv6.org> wrote:
> Tom Herbert wrote:
>
>>>>       while (sk) {
>>>> +             if (!uh->check && !udp_sk(sk)->check6_zero_okay) {
>>>> +                     /*
>>>> +                      * If checksum in packet is zero and not all the
>>>> +                      * sockets accept a zero checksum then declare
>>>> +                      * a checksum error.
>>>> +                      */
>>>> +                     flush_stack(stack, count, skb, ~0);
>>>> +                     count = 0;
>>>> +                     udp6_csum_zero_error(skb);
>>>> +                     UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
>>>> +                     UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
>>>> +                     break;
>>>> +             }
>>>>               stack[count++] = sk;
>>>>               sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
>>>>                                      uh->source, saddr, dif);
>>>
>>> This seems wrong; packets with zero-checksum will not be delivered to
>>> some sockets if some of sockets accept zero-checksums and others do not.
>>>
>> Okay, I suppose delivering to some and not others is reasonable,
>> although there's no accounting for the non-deliverables-- I suppose
>> there's no completely clean way to do this...
>
> Well, I believe that supporting UDP packets with zero-checksum
> should be implemented in a consistent way with UDP-lite.
>
Zero-checksum is disallowed for UDP-lite RFC3828, we can't change that.

>>> BTW, I have been thinking that we should introduce 4 options
>>> (or bits) for IPv4/IPv6 checksumming for sender/receiver.
>>>
>> What are these 4 options?
>
> I meant combination of {ipv4,ipv6} and {sender,receiver}.
>
I suppose you mean more granularity to enable zero checksums for v4
and v6 UDP TX, and allow zero checksum on RX for v6 UDP (no options
for v4 RX). I'm leery about making it easier to use zero checksums, we
should be encouraging use of UDP checksums. Consider VXLAN draft which
says that UDP checksum should be zero-- this means that the vni is
sent with no protection against corruption (this is L3 so any Ethernet
CRC doesn't count). So with a single bit flip we might send packets to
wrong VM thus breaking isolation which is fundamental in network
virtualization. A goal of my patches is to make CHECKSUM_COMPLETE
efficient and a better option for devices, then sending UDP csums with
zero is less compelling.

> --yoshfuji
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 42278bb..647ffc9 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -63,7 +63,8 @@  struct udp_sock {
 #define UDPLITE_SEND_CC  0x2  		/* set via udplite setsockopt         */
 #define UDPLITE_RECV_CC  0x4		/* set via udplite setsocktopt        */
 	__u8		 pcflag;        /* marks socket as UDP-Lite if > 0    */
-	__u8		 unused[3];
+	__u8		 check6_zero_okay; /* Zero csum okay for IPv6 */
+	__u8		 unused[2];
 	/*
 	 * For encapsulation sockets.
 	 */
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index e2bcfd7..46d0e58 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -28,6 +28,7 @@  struct udphdr {
 
 /* UDP socket options */
 #define UDP_CORK	1	/* Never send partially complete segments */
+#define UDP_CHECK6_ZERO_OKAY	2	/* Zero checksum okay for IPv6 UDP */
 #define UDP_ENCAP	100	/* Set the socket to accept encapsulated packets */
 
 /* UDP encapsulation types */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d5e3a76..9b88788 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2003,6 +2003,10 @@  int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		}
 		break;
 
+	case UDP_CHECK6_ZERO_OKAY:
+		up->check6_zero_okay = !!val;
+		break;
+
 	/*
 	 * 	UDP-Lite's partial checksum coverage (RFC 3828).
 	 */
@@ -2081,6 +2085,10 @@  int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 		val = up->corkflag;
 		break;
 
+	case UDP_CHECK6_ZERO_OKAY:
+		val = up->check6_zero_okay;
+		break;
+
 	case UDP_ENCAP:
 		val = up->encap_type;
 		break;
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 6105b8f..55896cd 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -83,16 +83,13 @@  int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
 			return err;
 	}
 
-	if (uh->check == 0) {
-		/* RFC 2460 section 8.1 says that we SHOULD log
-		   this error. Well, it is reasonable.
-		 */
-		LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
-			       &ipv6_hdr(skb)->saddr, ntohs(uh->source),
-			       &ipv6_hdr(skb)->daddr, ntohs(uh->dest));
-		return 1;
-	}
-
-	return skb_checksum_init(skb, IPPROTO_UDP, ip6_pseudo_compute);
+	/*
+	 * To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
+	 * we accept a checksum of zero here. When we find the socket
+	 * for the UDP packet we'll check if that socket allows zero checksum
+	 * for IPv6 (set by socket option).
+	 */
+	return skb_checksum_init_zero_check(skb, proto, uh->check,
+					   ip6_pseudo_compute);
 }
 EXPORT_SYMBOL(udp6_csum_init);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1e586d9..8b874b9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -760,13 +760,25 @@  static void flush_stack(struct sock **stack, unsigned int count,
 	if (unlikely(skb1))
 		kfree_skb(skb1);
 }
+
+static void udp6_csum_zero_error(struct sk_buff *skb)
+{
+	/*
+	 * RFC 2460 section 8.1 says that we SHOULD log
+	 * this error. Well, it is reasonable.
+	*/
+	LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
+		       &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
+		       &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
+}
+
 /*
  * Note: called only from the BH handler context,
  * so we don't need to lock the hashes.
  */
 static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 		const struct in6_addr *saddr, const struct in6_addr *daddr,
-		struct udp_table *udptable)
+		struct udp_table *udptable, int proto)
 {
 	struct sock *sk, *stack[256 / sizeof(struct sock *)];
 	const struct udphdr *uh = udp_hdr(skb);
@@ -779,6 +791,19 @@  static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	dif = inet6_iif(skb);
 	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
 	while (sk) {
+		if (!uh->check && !udp_sk(sk)->check6_zero_okay) {
+			/*
+			 * If checksum in packet is zero and not all the
+			 * sockets accept a zero checksum then declare
+			 * a checksum error.
+			 */
+			flush_stack(stack, count, skb, ~0);
+			count = 0;
+			udp6_csum_zero_error(skb);
+			UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
+			UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
+			break;
+		}
 		stack[count++] = sk;
 		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
 				       uh->source, saddr, dif);
@@ -855,7 +880,7 @@  int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	 */
 	if (ipv6_addr_is_multicast(daddr))
 		return __udp6_lib_mcast_deliver(net, skb,
-				saddr, daddr, udptable);
+				saddr, daddr, udptable, proto);
 
 	/* Unicast */
 
@@ -867,6 +892,11 @@  int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (sk != NULL) {
 		int ret;
 
+		if (!uh->check && !udp_sk(sk)->check6_zero_okay) {
+			udp6_csum_zero_error(skb);
+			goto csum_error;
+		}
+
 		ret = udpv6_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
@@ -879,6 +909,11 @@  int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		return 0;
 	}
 
+	if (!uh->check) {
+		udp6_csum_zero_error(skb);
+		goto csum_error;
+	}
+
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto discard;