diff mbox series

[net-next,1/2] gre: add sequence number for collect md mode.

Message ID 1519863101-112083-2-git-send-email-u9012063@gmail.com
State Superseded, archived
Delegated to: David Miller
Headers show
Series gre: add sequence number for collect md mode. | expand

Commit Message

William Tu March 1, 2018, 12:11 a.m. UTC
Currently GRE sequence number can only be used in native
tunnel mode.  This patch adds sequence number support for
gre collect metadata mode.  RFC2890 defines GRE sequence
number to be specific to the traffic flow identified by the
key.  However, this patch does not implement per-key seqno.
The sequence number is shared in the same tunnel device.
That is, different tunnel keys using the same collect_md
tunnel share single sequence number.

Signed-off-by: William Tu <u9012063@gmail.com>
---
 include/uapi/linux/bpf.h |  1 +
 net/core/filter.c        |  4 +++-
 net/ipv4/ip_gre.c        |  7 +++++--
 net/ipv6/ip6_gre.c       | 13 ++++++++-----
 4 files changed, 17 insertions(+), 8 deletions(-)

Comments

Daniel Borkmann March 1, 2018, 10:18 a.m. UTC | #1
On 03/01/2018 01:11 AM, William Tu wrote:
> Currently GRE sequence number can only be used in native
> tunnel mode.  This patch adds sequence number support for
> gre collect metadata mode.  RFC2890 defines GRE sequence
> number to be specific to the traffic flow identified by the
> key.  However, this patch does not implement per-key seqno.
> The sequence number is shared in the same tunnel device.
> That is, different tunnel keys using the same collect_md
> tunnel share single sequence number.
> 
> Signed-off-by: William Tu <u9012063@gmail.com>
> ---
>  include/uapi/linux/bpf.h |  1 +
>  net/core/filter.c        |  4 +++-
>  net/ipv4/ip_gre.c        |  7 +++++--
>  net/ipv6/ip6_gre.c       | 13 ++++++++-----
>  4 files changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index db6bdc375126..2c6dd942953d 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -800,6 +800,7 @@ enum bpf_func_id {
>  /* BPF_FUNC_skb_set_tunnel_key flags. */
>  #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
>  #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
> +#define BPF_F_GRE_SEQ			(1ULL << 3)
>  
>  /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
>   * BPF_FUNC_perf_event_read_value flags.
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 0c121adbdbaa..010305e0791a 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2991,7 +2991,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
>  	struct ip_tunnel_info *info;
>  
>  	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
> -			       BPF_F_DONT_FRAGMENT)))
> +			       BPF_F_DONT_FRAGMENT | BPF_F_GRE_SEQ)))
>  		return -EINVAL;
>  	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
>  		switch (size) {
> @@ -3025,6 +3025,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
>  		info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
>  	if (flags & BPF_F_ZERO_CSUM_TX)
>  		info->key.tun_flags &= ~TUNNEL_CSUM;
> +	if (flags & BPF_F_GRE_SEQ)
> +		info->key.tun_flags |= TUNNEL_SEQ;

Ok, looks fine. My only minor request would be to rename BPF_F_GRE_SEQ
into e.g. BPF_F_SEQ_NUMBER to at least not have something GRE specific
in the name in case we could later on reuse it elsewhere as well, and
the bpf_skb_set_tunnel_key() is unaware of the underlying encap anyway.

>  	info->key.tun_id = cpu_to_be64(from->tunnel_id);
>  	info->key.tos = from->tunnel_tos;
> diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
> index 0fe1d69b5df4..95fd225f402e 100644
> --- a/net/ipv4/ip_gre.c
> +++ b/net/ipv4/ip_gre.c
> @@ -522,6 +522,7 @@ static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
>  static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
>  			__be16 proto)
>  {
> +	struct ip_tunnel *tunnel = netdev_priv(dev);
>  	struct ip_tunnel_info *tun_info;
>  	const struct ip_tunnel_key *key;
>  	struct rtable *rt = NULL;
> @@ -545,9 +546,11 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
>  	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
>  		goto err_free_rt;
>  
> -	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
> +	flags = tun_info->key.tun_flags &
> +		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
>  	gre_build_header(skb, tunnel_hlen, flags, proto,
> -			 tunnel_id_to_key32(tun_info->key.tun_id), 0);
> +			 tunnel_id_to_key32(tun_info->key.tun_id),
> +			 (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
>  
>  	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
>  
> diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
> index 4f150a394387..16c5dfcbd195 100644
> --- a/net/ipv6/ip6_gre.c
> +++ b/net/ipv6/ip6_gre.c
> @@ -695,9 +695,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
>  	else
>  		fl6->daddr = tunnel->parms.raddr;
>  
> -	if (tunnel->parms.o_flags & TUNNEL_SEQ)
> -		tunnel->o_seqno++;
> -
>  	/* Push GRE header. */
>  	protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
>  
> @@ -720,14 +717,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
>  		fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
>  
>  		dsfield = key->tos;
> -		flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
> +		flags = key->tun_flags &
> +			(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
>  		tunnel->tun_hlen = gre_calc_hlen(flags);
>  
>  		gre_build_header(skb, tunnel->tun_hlen,
>  				 flags, protocol,
> -				 tunnel_id_to_key32(tun_info->key.tun_id), 0);
> +				 tunnel_id_to_key32(tun_info->key.tun_id),
> +				 (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
> +						      : 0);
>  
>  	} else {
> +		if (tunnel->parms.o_flags & TUNNEL_SEQ)
> +			tunnel->o_seqno++;
> +
>  		gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
>  				 protocol, tunnel->parms.o_key,
>  				 htonl(tunnel->o_seqno));
>
William Tu March 1, 2018, 5:17 p.m. UTC | #2
On Thu, Mar 1, 2018 at 2:18 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
> On 03/01/2018 01:11 AM, William Tu wrote:
>> Currently GRE sequence number can only be used in native
>> tunnel mode.  This patch adds sequence number support for
>> gre collect metadata mode.  RFC2890 defines GRE sequence
>> number to be specific to the traffic flow identified by the
>> key.  However, this patch does not implement per-key seqno.
>> The sequence number is shared in the same tunnel device.
>> That is, different tunnel keys using the same collect_md
>> tunnel share single sequence number.
>>
>> Signed-off-by: William Tu <u9012063@gmail.com>
>> ---
>>  include/uapi/linux/bpf.h |  1 +
>>  net/core/filter.c        |  4 +++-
>>  net/ipv4/ip_gre.c        |  7 +++++--
>>  net/ipv6/ip6_gre.c       | 13 ++++++++-----
>>  4 files changed, 17 insertions(+), 8 deletions(-)
>>
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index db6bdc375126..2c6dd942953d 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -800,6 +800,7 @@ enum bpf_func_id {
>>  /* BPF_FUNC_skb_set_tunnel_key flags. */
>>  #define BPF_F_ZERO_CSUM_TX           (1ULL << 1)
>>  #define BPF_F_DONT_FRAGMENT          (1ULL << 2)
>> +#define BPF_F_GRE_SEQ                        (1ULL << 3)
>>
>>  /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
>>   * BPF_FUNC_perf_event_read_value flags.
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 0c121adbdbaa..010305e0791a 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -2991,7 +2991,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
>>       struct ip_tunnel_info *info;
>>
>>       if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
>> -                            BPF_F_DONT_FRAGMENT)))
>> +                            BPF_F_DONT_FRAGMENT | BPF_F_GRE_SEQ)))
>>               return -EINVAL;
>>       if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
>>               switch (size) {
>> @@ -3025,6 +3025,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
>>               info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
>>       if (flags & BPF_F_ZERO_CSUM_TX)
>>               info->key.tun_flags &= ~TUNNEL_CSUM;
>> +     if (flags & BPF_F_GRE_SEQ)
>> +             info->key.tun_flags |= TUNNEL_SEQ;
>
> Ok, looks fine. My only minor request would be to rename BPF_F_GRE_SEQ
> into e.g. BPF_F_SEQ_NUMBER to at least not have something GRE specific
> in the name in case we could later on reuse it elsewhere as well, and
> the bpf_skb_set_tunnel_key() is unaware of the underlying encap anyway.

OK, make sense. Thanks!
I will rename it in the next version.
William
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index db6bdc375126..2c6dd942953d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -800,6 +800,7 @@  enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
+#define BPF_F_GRE_SEQ			(1ULL << 3)
 
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
diff --git a/net/core/filter.c b/net/core/filter.c
index 0c121adbdbaa..010305e0791a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2991,7 +2991,7 @@  BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
 	struct ip_tunnel_info *info;
 
 	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
-			       BPF_F_DONT_FRAGMENT)))
+			       BPF_F_DONT_FRAGMENT | BPF_F_GRE_SEQ)))
 		return -EINVAL;
 	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
 		switch (size) {
@@ -3025,6 +3025,8 @@  BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
 		info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
 	if (flags & BPF_F_ZERO_CSUM_TX)
 		info->key.tun_flags &= ~TUNNEL_CSUM;
+	if (flags & BPF_F_GRE_SEQ)
+		info->key.tun_flags |= TUNNEL_SEQ;
 
 	info->key.tun_id = cpu_to_be64(from->tunnel_id);
 	info->key.tos = from->tunnel_tos;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0fe1d69b5df4..95fd225f402e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -522,6 +522,7 @@  static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 			__be16 proto)
 {
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct ip_tunnel_info *tun_info;
 	const struct ip_tunnel_key *key;
 	struct rtable *rt = NULL;
@@ -545,9 +546,11 @@  static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
 		goto err_free_rt;
 
-	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+	flags = tun_info->key.tun_flags &
+		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 	gre_build_header(skb, tunnel_hlen, flags, proto,
-			 tunnel_id_to_key32(tun_info->key.tun_id), 0);
+			 tunnel_id_to_key32(tun_info->key.tun_id),
+			 (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
 
 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4f150a394387..16c5dfcbd195 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -695,9 +695,6 @@  static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 	else
 		fl6->daddr = tunnel->parms.raddr;
 
-	if (tunnel->parms.o_flags & TUNNEL_SEQ)
-		tunnel->o_seqno++;
-
 	/* Push GRE header. */
 	protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
 
@@ -720,14 +717,20 @@  static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 		fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 
 		dsfield = key->tos;
-		flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+		flags = key->tun_flags &
+			(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 		tunnel->tun_hlen = gre_calc_hlen(flags);
 
 		gre_build_header(skb, tunnel->tun_hlen,
 				 flags, protocol,
-				 tunnel_id_to_key32(tun_info->key.tun_id), 0);
+				 tunnel_id_to_key32(tun_info->key.tun_id),
+				 (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+						      : 0);
 
 	} else {
+		if (tunnel->parms.o_flags & TUNNEL_SEQ)
+			tunnel->o_seqno++;
+
 		gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
 				 protocol, tunnel->parms.o_key,
 				 htonl(tunnel->o_seqno));