diff mbox series

[bpf-next,08/13] bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_FIXED_GSO

Message ID 20190320144944.147862-9-willemdebruijn.kernel@gmail.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf tc tunneling | expand

Commit Message

Willem de Bruijn March 20, 2019, 2:49 p.m. UTC
From: Willem de Bruijn <willemb@google.com>

bpf_skb_adjust_room adjusts gso_size of gso packets to account for the
pushed or popped header room.

This is not allowed with UDP, where gso_size delineates datagrams. Add
an option to avoid these updates and allow this call for datagrams.

It can also be used with TCP, when MSS is known to allow headroom,
e.g., through MSS clamping or route MTU.

Link: https://patchwork.ozlabs.org/patch/1052497/
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/uapi/linux/bpf.h |  4 ++++
 net/core/filter.c        | 36 +++++++++++++++++++++++++-----------
 2 files changed, 29 insertions(+), 11 deletions(-)

Comments

Alan Maguire March 21, 2019, 1:42 p.m. UTC | #1
On Wed, 20 Mar 2019, Willem de Bruijn wrote:

> From: Willem de Bruijn <willemb@google.com>
> 
> bpf_skb_adjust_room adjusts gso_size of gso packets to account for the
> pushed or popped header room.
> 
> This is not allowed with UDP, where gso_size delineates datagrams. Add
> an option to avoid these updates and allow this call for datagrams.
> 
> It can also be used with TCP, when MSS is known to allow headroom,
> e.g., through MSS clamping or route MTU.
> 
> Link: https://patchwork.ozlabs.org/patch/1052497/
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---
>  include/uapi/linux/bpf.h |  4 ++++
>  net/core/filter.c        | 36 +++++++++++++++++++++++++-----------
>  2 files changed, 29 insertions(+), 11 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4f5c918e6fcf4..0eda8f564a381 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2593,6 +2593,10 @@ enum bpf_func_id {
>  /* Current network namespace */
>  #define BPF_F_CURRENT_NETNS		(-1L)
>  
> +/* BPF_FUNC_skb_adjust_room flags. */
> +#define BPF_F_ADJ_ROOM_FIXED_GSO	(1ULL << 0)

minor nit - could we add this flag to the documentation for 
bpf_skb_adjust_room? Same suggestion for the encap flags in
patch 8 too. Thanks!

Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
 
> +#define BPF_F_ADJ_ROOM_MASK		(BPF_F_ADJ_ROOM_FIXED_GSO)
> +
>  /* Mode for BPF_FUNC_skb_adjust_room helper. */
>  enum bpf_adj_room_mode {
>  	BPF_ADJ_ROOM_NET,
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 1a0cf578b4502..e346e48098000 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2963,12 +2963,17 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
>  	}
>  }
>  
> -static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff)
> +static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
> +			    u64 flags)
>  {
>  	int ret;
>  
> -	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
> -		return -ENOTSUPP;
> +	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
> +		/* udp gso_size delineates datagrams, only allow if fixed */
> +		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
> +		    !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
> +			return -ENOTSUPP;
> +	}
>  
>  	ret = skb_cow_head(skb, len_diff);
>  	if (unlikely(ret < 0))
> @@ -2982,7 +2987,9 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff)
>  		struct skb_shared_info *shinfo = skb_shinfo(skb);
>  
>  		/* Due to header grow, MSS needs to be downgraded. */
> -		skb_decrease_gso_size(shinfo, len_diff);
> +		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
> +			skb_decrease_gso_size(shinfo, len_diff);
> +
>  		/* Header must be checked, and gso_segs recomputed. */
>  		shinfo->gso_type |= SKB_GSO_DODGY;
>  		shinfo->gso_segs = 0;
> @@ -2991,12 +2998,17 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff)
>  	return 0;
>  }
>  
> -static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff)
> +static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
> +			      u64 flags)
>  {
>  	int ret;
>  
> -	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
> -		return -ENOTSUPP;
> +	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
> +		/* udp gso_size delineates datagrams, only allow if fixed */
> +		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
> +		    !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
> +			return -ENOTSUPP;
> +	}
>  
>  	ret = skb_unclone(skb, GFP_ATOMIC);
>  	if (unlikely(ret < 0))
> @@ -3010,7 +3022,9 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff)
>  		struct skb_shared_info *shinfo = skb_shinfo(skb);
>  
>  		/* Due to header shrink, MSS can be upgraded. */
> -		skb_increase_gso_size(shinfo, len_diff);
> +		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
> +			skb_increase_gso_size(shinfo, len_diff);
> +
>  		/* Header must be checked, and gso_segs recomputed. */
>  		shinfo->gso_type |= SKB_GSO_DODGY;
>  		shinfo->gso_segs = 0;
> @@ -3037,7 +3051,7 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>  	u32 off;
>  	int ret;
>  
> -	if (unlikely(flags))
> +	if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
>  		return -EINVAL;
>  	if (unlikely(len_diff_abs > 0xfffU))
>  		return -EFAULT;
> @@ -3065,8 +3079,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>  			 !skb_is_gso(skb))))
>  		return -ENOTSUPP;
>  
> -	ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs) :
> -		       bpf_skb_net_grow(skb, off, len_diff_abs);
> +	ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
> +		       bpf_skb_net_grow(skb, off, len_diff_abs, flags);
>  
>  	bpf_compute_data_pointers(skb);
>  	return ret;
> -- 
> 2.21.0.225.g810b269d1ac-goog
> 
>
Willem de Bruijn March 21, 2019, 2 p.m. UTC | #2
On Thu, Mar 21, 2019 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
>
>
> On Wed, 20 Mar 2019, Willem de Bruijn wrote:
>
> > From: Willem de Bruijn <willemb@google.com>
> >
> > bpf_skb_adjust_room adjusts gso_size of gso packets to account for the
> > pushed or popped header room.
> >
> > This is not allowed with UDP, where gso_size delineates datagrams. Add
> > an option to avoid these updates and allow this call for datagrams.
> >
> > It can also be used with TCP, when MSS is known to allow headroom,
> > e.g., through MSS clamping or route MTU.
> >
> > Link: https://patchwork.ozlabs.org/patch/1052497/
> > Signed-off-by: Willem de Bruijn <willemb@google.com>
> > ---
> >  include/uapi/linux/bpf.h |  4 ++++
> >  net/core/filter.c        | 36 +++++++++++++++++++++++++-----------
> >  2 files changed, 29 insertions(+), 11 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 4f5c918e6fcf4..0eda8f564a381 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -2593,6 +2593,10 @@ enum bpf_func_id {
> >  /* Current network namespace */
> >  #define BPF_F_CURRENT_NETNS          (-1L)
> >
> > +/* BPF_FUNC_skb_adjust_room flags. */
> > +#define BPF_F_ADJ_ROOM_FIXED_GSO     (1ULL << 0)
>
> minor nit - could we add this flag to the documentation for
> bpf_skb_adjust_room? Same suggestion for the encap flags in
> patch 8 too. Thanks!
>
> Reviewed-by: Alan Maguire <alan.maguire@oracle.com>

Definitely, will do in v2. Thanks.
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4f5c918e6fcf4..0eda8f564a381 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2593,6 +2593,10 @@  enum bpf_func_id {
 /* Current network namespace */
 #define BPF_F_CURRENT_NETNS		(-1L)
 
+/* BPF_FUNC_skb_adjust_room flags. */
+#define BPF_F_ADJ_ROOM_FIXED_GSO	(1ULL << 0)
+#define BPF_F_ADJ_ROOM_MASK		(BPF_F_ADJ_ROOM_FIXED_GSO)
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
diff --git a/net/core/filter.c b/net/core/filter.c
index 1a0cf578b4502..e346e48098000 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2963,12 +2963,17 @@  static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 	}
 }
 
-static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff)
+static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
+			    u64 flags)
 {
 	int ret;
 
-	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
-		return -ENOTSUPP;
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
+		/* udp gso_size delineates datagrams, only allow if fixed */
+		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
+		    !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+			return -ENOTSUPP;
+	}
 
 	ret = skb_cow_head(skb, len_diff);
 	if (unlikely(ret < 0))
@@ -2982,7 +2987,9 @@  static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff)
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 
 		/* Due to header grow, MSS needs to be downgraded. */
-		skb_decrease_gso_size(shinfo, len_diff);
+		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+			skb_decrease_gso_size(shinfo, len_diff);
+
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= SKB_GSO_DODGY;
 		shinfo->gso_segs = 0;
@@ -2991,12 +2998,17 @@  static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff)
 	return 0;
 }
 
-static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff)
+static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
+			      u64 flags)
 {
 	int ret;
 
-	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
-		return -ENOTSUPP;
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
+		/* udp gso_size delineates datagrams, only allow if fixed */
+		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
+		    !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+			return -ENOTSUPP;
+	}
 
 	ret = skb_unclone(skb, GFP_ATOMIC);
 	if (unlikely(ret < 0))
@@ -3010,7 +3022,9 @@  static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff)
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 
 		/* Due to header shrink, MSS can be upgraded. */
-		skb_increase_gso_size(shinfo, len_diff);
+		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+			skb_increase_gso_size(shinfo, len_diff);
+
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= SKB_GSO_DODGY;
 		shinfo->gso_segs = 0;
@@ -3037,7 +3051,7 @@  BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 	u32 off;
 	int ret;
 
-	if (unlikely(flags))
+	if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
 		return -EINVAL;
 	if (unlikely(len_diff_abs > 0xfffU))
 		return -EFAULT;
@@ -3065,8 +3079,8 @@  BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 			 !skb_is_gso(skb))))
 		return -ENOTSUPP;
 
-	ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs) :
-		       bpf_skb_net_grow(skb, off, len_diff_abs);
+	ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
+		       bpf_skb_net_grow(skb, off, len_diff_abs, flags);
 
 	bpf_compute_data_pointers(skb);
 	return ret;