[bpf-next] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room()

Message ID 20181108151137.3975-1-nicolas.dichtel@6wind.com
State New
Delegated to: BPF Maintainers
Headers show
Series
  • [bpf-next] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room()
Related show

Commit Message

Nicolas Dichtel Nov. 8, 2018, 3:11 p.m.
This new mode enables to add or remove an l2 header in a programmatic way
with cls_bpf.
For example, it enables to play with mpls headers.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
 include/uapi/linux/bpf.h       |  3 ++
 net/core/filter.c              | 54 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  3 ++
 3 files changed, 60 insertions(+)

Comments

Martin Lau Nov. 9, 2018, 6:51 p.m. | #1
On Thu, Nov 08, 2018 at 04:11:37PM +0100, Nicolas Dichtel wrote:
> This new mode enables to add or remove an l2 header in a programmatic way
> with cls_bpf.
> For example, it enables to play with mpls headers.
> 
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> ---
>  include/uapi/linux/bpf.h       |  3 ++
>  net/core/filter.c              | 54 ++++++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h |  3 ++
>  3 files changed, 60 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 852dc17ab47a..47407fd5162b 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1467,6 +1467,8 @@ union bpf_attr {
>   *
>   * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
>   * 		  (room space is added or removed below the layer 3 header).
> + * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
> + * 		  packet (room space is added or removed below skb->data).
>   *
>   * 		All values for *flags* are reserved for future usage, and must
>   * 		be left at zero.
> @@ -2408,6 +2410,7 @@ enum bpf_func_id {
>  /* Mode for BPF_FUNC_skb_adjust_room helper. */
>  enum bpf_adj_room_mode {
>  	BPF_ADJ_ROOM_NET,
> +	BPF_ADJ_ROOM_DATA,
>  };
>  
>  /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index e521c5ebc7d1..e699849b269d 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2884,6 +2884,58 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
>  	return ret;
>  }
>  
> +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
> +{
> +	unsigned short hhlen = skb->dev->header_ops ?
> +			       skb->dev->hard_header_len : 0;
> +	int ret;
> +
> +	ret = skb_unclone(skb, GFP_ATOMIC);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	__skb_pull(skb, len);
> +	skb_reset_mac_header(skb);
> +	skb_reset_network_header(skb);
> +	skb->network_header += hhlen;
> +	skb_reset_transport_header(skb);
hmm...why transport_header does not need += hhlen here
while network_header does?

> +	return 0;
> +}
> +
> +static int bpf_skb_data_grow(struct sk_buff *skb, u32 len)
> +{
> +	unsigned short hhlen = skb->dev->header_ops ?
> +			       skb->dev->hard_header_len : 0;
> +	int ret;
> +
> +	ret = skb_cow(skb, len);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	skb_push(skb, len);
> +	skb_reset_mac_header(skb);
> +	return 0;
> +}
> +
> +static int bpf_skb_adjust_data(struct sk_buff *skb, s32 len_diff)
> +{
> +	u32 len_diff_abs = abs(len_diff);
> +	bool shrink = len_diff < 0;
> +	int ret;
> +
> +	if (unlikely(len_diff_abs > 0xfffU))
> +		return -EFAULT;
> +
> +	if (shrink && len_diff_abs >= skb_headlen(skb))
> +		return -EFAULT;
> +
> +	ret = shrink ? bpf_skb_data_shrink(skb, len_diff_abs) :
> +		       bpf_skb_data_grow(skb, len_diff_abs);
> +
> +	bpf_compute_data_pointers(skb);
> +	return ret;
> +}
> +
>  BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>  	   u32, mode, u64, flags)
>  {
> @@ -2891,6 +2943,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>  		return -EINVAL;
>  	if (likely(mode == BPF_ADJ_ROOM_NET))
>  		return bpf_skb_adjust_net(skb, len_diff);
> +	if (likely(mode == BPF_ADJ_ROOM_DATA))
> +		return bpf_skb_adjust_data(skb, len_diff);
>  
>  	return -ENOTSUPP;
>  }
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 852dc17ab47a..47407fd5162b 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1467,6 +1467,8 @@ union bpf_attr {
>   *
>   * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
>   * 		  (room space is added or removed below the layer 3 header).
> + * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
> + * 		  packet (room space is added or removed below skb->data).
>   *
>   * 		All values for *flags* are reserved for future usage, and must
>   * 		be left at zero.
> @@ -2408,6 +2410,7 @@ enum bpf_func_id {
>  /* Mode for BPF_FUNC_skb_adjust_room helper. */
>  enum bpf_adj_room_mode {
>  	BPF_ADJ_ROOM_NET,
> +	BPF_ADJ_ROOM_DATA,
>  };
>  
>  /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
> -- 
> 2.18.0
>
Nicolas Dichtel Nov. 10, 2018, 11:43 p.m. | #2
Le 09/11/2018 à 19:51, Martin Lau a écrit :
> On Thu, Nov 08, 2018 at 04:11:37PM +0100, Nicolas Dichtel wrote:
[snip]
>> +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
>> +{
>> +	unsigned short hhlen = skb->dev->header_ops ?
>> +			       skb->dev->hard_header_len : 0;
>> +	int ret;
>> +
>> +	ret = skb_unclone(skb, GFP_ATOMIC);
>> +	if (unlikely(ret < 0))
>> +		return ret;
>> +
>> +	__skb_pull(skb, len);
>> +	skb_reset_mac_header(skb);
>> +	skb_reset_network_header(skb);
>> +	skb->network_header += hhlen;
>> +	skb_reset_transport_header(skb);
> hmm...why transport_header does not need += hhlen here
> while network_header does?

network_header is mandatory because bpf_redirect(BPF_F_INGRESS) can be called
and network_header is expected to be correctly set in this case.
For transport_header, I choose to not set it, because the stack will set it
later (for example ip_rcv_core()).


Regards,
Nicolas
Martin Lau Nov. 12, 2018, 6:39 p.m. | #3
On Sun, Nov 11, 2018 at 12:43:27AM +0100, Nicolas Dichtel wrote:
> Le 09/11/2018 à 19:51, Martin Lau a écrit :
> > On Thu, Nov 08, 2018 at 04:11:37PM +0100, Nicolas Dichtel wrote:
> [snip]
> >> +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
> >> +{
> >> +	unsigned short hhlen = skb->dev->header_ops ?
> >> +			       skb->dev->hard_header_len : 0;
> >> +	int ret;
> >> +
> >> +	ret = skb_unclone(skb, GFP_ATOMIC);
> >> +	if (unlikely(ret < 0))
> >> +		return ret;
> >> +
> >> +	__skb_pull(skb, len);
> >> +	skb_reset_mac_header(skb);
> >> +	skb_reset_network_header(skb);
> >> +	skb->network_header += hhlen;
Nit. skb_set_network_header(skb, hhlen);

Othen than that

Acked-by: Martin KaFai Lau <kafai@fb.com>

> >> +	skb_reset_transport_header(skb);
> > hmm...why transport_header does not need += hhlen here
> > while network_header does?
> 
> network_header is mandatory because bpf_redirect(BPF_F_INGRESS) can be called
> and network_header is expected to be correctly set in this case.
> For transport_header, I choose to not set it, because the stack will set it
> later (for example ip_rcv_core()).
ic. make sense.

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 852dc17ab47a..47407fd5162b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1467,6 +1467,8 @@  union bpf_attr {
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
  * 		  (room space is added or removed below the layer 3 header).
+ * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
+ * 		  packet (room space is added or removed below skb->data).
  *
  * 		All values for *flags* are reserved for future usage, and must
  * 		be left at zero.
@@ -2408,6 +2410,7 @@  enum bpf_func_id {
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
+	BPF_ADJ_ROOM_DATA,
 };
 
 /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
diff --git a/net/core/filter.c b/net/core/filter.c
index e521c5ebc7d1..e699849b269d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2884,6 +2884,58 @@  static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
 	return ret;
 }
 
+static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
+{
+	unsigned short hhlen = skb->dev->header_ops ?
+			       skb->dev->hard_header_len : 0;
+	int ret;
+
+	ret = skb_unclone(skb, GFP_ATOMIC);
+	if (unlikely(ret < 0))
+		return ret;
+
+	__skb_pull(skb, len);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb->network_header += hhlen;
+	skb_reset_transport_header(skb);
+	return 0;
+}
+
+static int bpf_skb_data_grow(struct sk_buff *skb, u32 len)
+{
+	unsigned short hhlen = skb->dev->header_ops ?
+			       skb->dev->hard_header_len : 0;
+	int ret;
+
+	ret = skb_cow(skb, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	skb_push(skb, len);
+	skb_reset_mac_header(skb);
+	return 0;
+}
+
+static int bpf_skb_adjust_data(struct sk_buff *skb, s32 len_diff)
+{
+	u32 len_diff_abs = abs(len_diff);
+	bool shrink = len_diff < 0;
+	int ret;
+
+	if (unlikely(len_diff_abs > 0xfffU))
+		return -EFAULT;
+
+	if (shrink && len_diff_abs >= skb_headlen(skb))
+		return -EFAULT;
+
+	ret = shrink ? bpf_skb_data_shrink(skb, len_diff_abs) :
+		       bpf_skb_data_grow(skb, len_diff_abs);
+
+	bpf_compute_data_pointers(skb);
+	return ret;
+}
+
 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 	   u32, mode, u64, flags)
 {
@@ -2891,6 +2943,8 @@  BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 		return -EINVAL;
 	if (likely(mode == BPF_ADJ_ROOM_NET))
 		return bpf_skb_adjust_net(skb, len_diff);
+	if (likely(mode == BPF_ADJ_ROOM_DATA))
+		return bpf_skb_adjust_data(skb, len_diff);
 
 	return -ENOTSUPP;
 }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 852dc17ab47a..47407fd5162b 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1467,6 +1467,8 @@  union bpf_attr {
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
  * 		  (room space is added or removed below the layer 3 header).
+ * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
+ * 		  packet (room space is added or removed below skb->data).
  *
  * 		All values for *flags* are reserved for future usage, and must
  * 		be left at zero.
@@ -2408,6 +2410,7 @@  enum bpf_func_id {
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
+	BPF_ADJ_ROOM_DATA,
 };
 
 /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */