diff mbox series

[bpf-next,v2] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room()

Message ID 20181113163517.4185-1-nicolas.dichtel@6wind.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series [bpf-next,v2] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room() | expand

Commit Message

Nicolas Dichtel Nov. 13, 2018, 4:35 p.m. UTC
This new mode enables to add or remove an l2 header in a programmatic way
with cls_bpf.
For example, it enables to play with mpls headers.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
---

v2: use skb_set_network_header()

 include/uapi/linux/bpf.h       |  3 ++
 net/core/filter.c              | 53 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  3 ++
 3 files changed, 59 insertions(+)

Comments

Alexei Starovoitov Nov. 17, 2018, 4:52 a.m. UTC | #1
On Tue, Nov 13, 2018 at 05:35:17PM +0100, Nicolas Dichtel wrote:
> This new mode enables to add or remove an l2 header in a programmatic way
> with cls_bpf.
> For example, it enables to play with mpls headers.
> 
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> Acked-by: Martin KaFai Lau <kafai@fb.com>

Acked-by: Alexei Starovoitov <ast@kernel.org>

Daniel, thoughts?
Daniel Borkmann Nov. 20, 2018, 1:06 a.m. UTC | #2
On 11/13/2018 05:35 PM, Nicolas Dichtel wrote:
> This new mode enables to add or remove an l2 header in a programmatic way
> with cls_bpf.
> For example, it enables to play with mpls headers.
> 
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> Acked-by: Martin KaFai Lau <kafai@fb.com>

(Sorry for late reply, swamped due to Plumbers.)

> ---
> 
> v2: use skb_set_network_header()
> 
>  include/uapi/linux/bpf.h       |  3 ++
>  net/core/filter.c              | 53 ++++++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h |  3 ++
>  3 files changed, 59 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 47d606d744cc..9762ef3a536c 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1467,6 +1467,8 @@ union bpf_attr {
>   *
>   * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
>   * 		  (room space is added or removed below the layer 3 header).
> + * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
> + * 		  packet (room space is added or removed below skb->data).

Nit: in this case it would probably make more sense to name it BPF_ADJ_ROOM_MAC.
BPF_ADJ_ROOM_DATA reads like we're adjusting payload data.

>   * 		All values for *flags* are reserved for future usage, and must
>   * 		be left at zero.
> @@ -2412,6 +2414,7 @@ enum bpf_func_id {
>  /* Mode for BPF_FUNC_skb_adjust_room helper. */
>  enum bpf_adj_room_mode {
>  	BPF_ADJ_ROOM_NET,
> +	BPF_ADJ_ROOM_DATA,
>  };
>  
>  /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 53d50fb75ea1..e56da3077dca 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2884,6 +2884,57 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
>  	return ret;
>  }
>  
> +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
> +{
> +	unsigned short hhlen = skb->dev->header_ops ?
> +			       skb->dev->hard_header_len : 0;
> +	int ret;
> +
> +	ret = skb_unclone(skb, GFP_ATOMIC);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	__skb_pull(skb, len);
> +	skb_reset_mac_header(skb);
> +	skb_set_network_header(skb, hhlen);
> +	skb_reset_transport_header(skb);
> +	return 0;
> +}
> +
> +static int bpf_skb_data_grow(struct sk_buff *skb, u32 len)
> +{
> +	unsigned short hhlen = skb->dev->header_ops ?
> +			       skb->dev->hard_header_len : 0;
> +	int ret;
> +
> +	ret = skb_cow(skb, len);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	skb_push(skb, len);
> +	skb_reset_mac_header(skb);

Looks like this could leak uninitialized mem into the BPF prog as it's
not zeroed out. Can't we just reuse bpf_skb_generic_push() and the
bpf_skb_generic_pop()?

bpf_skb_vlan_push() and bpf_skb_vlan_pop() do a bpf_push_mac_rcsum() /
bpf_pull_mac_rcsum() in order to not screw up things like csum complete.
Wouldn't this be needed here as well?

How does this interact with MPLS GSO?

If the packet gets pushed back into the stack, would AF_MPLS handle it?
Probably good to add test cases to BPF kselftest suite with it.

Don't we need to reject the packet in case of skb->encapsulation?

Looking at push_mpls() and pop_mpls() from OVS implementation, do we
also need to deal with skb inner network header / proto?

Given all this would it rather make sense to add MPLS specific helpers
in similar fashion to the vlan ones we have?

Is there any other use case aside from MPLS?

> +	return 0;
> +}
> +
> +static int bpf_skb_adjust_data(struct sk_buff *skb, s32 len_diff)
> +{
> +	u32 len_diff_abs = abs(len_diff);
> +	bool shrink = len_diff < 0;
> +	int ret;
> +
> +	if (unlikely(len_diff_abs > 0xfffU))
> +		return -EFAULT;
> +
> +	if (shrink && len_diff_abs >= skb_headlen(skb))
> +		return -EFAULT;
> +
> +	ret = shrink ? bpf_skb_data_shrink(skb, len_diff_abs) :
> +		       bpf_skb_data_grow(skb, len_diff_abs);

Hmm, I think this has a bug in that the above two do not adjust
skb->mac_len. Then things like cls_bpf_classify() will __skb_pull()
based on the old mac_len, getting you to a wrong offset in the
packet when this is for example pushed back into ingress, etc.

> +	bpf_compute_data_pointers(skb);
> +	return ret;
> +}
> +
>  BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>  	   u32, mode, u64, flags)
>  {
> @@ -2891,6 +2942,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>  		return -EINVAL;
>  	if (likely(mode == BPF_ADJ_ROOM_NET))
>  		return bpf_skb_adjust_net(skb, len_diff);
> +	if (likely(mode == BPF_ADJ_ROOM_DATA))
> +		return bpf_skb_adjust_data(skb, len_diff);
>  
>  	return -ENOTSUPP;
>  }
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 852dc17ab47a..47407fd5162b 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1467,6 +1467,8 @@ union bpf_attr {
>   *
>   * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
>   * 		  (room space is added or removed below the layer 3 header).
> + * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
> + * 		  packet (room space is added or removed below skb->data).
>   *
>   * 		All values for *flags* are reserved for future usage, and must
>   * 		be left at zero.
> @@ -2408,6 +2410,7 @@ enum bpf_func_id {
>  /* Mode for BPF_FUNC_skb_adjust_room helper. */
>  enum bpf_adj_room_mode {
>  	BPF_ADJ_ROOM_NET,
> +	BPF_ADJ_ROOM_DATA,
>  };
>  
>  /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
>
Nicolas Dichtel Nov. 21, 2018, 2:43 p.m. UTC | #3
Le 20/11/2018 à 02:06, Daniel Borkmann a écrit :
> On 11/13/2018 05:35 PM, Nicolas Dichtel wrote:
>> This new mode enables to add or remove an l2 header in a programmatic way
>> with cls_bpf.
>> For example, it enables to play with mpls headers.
>>
>> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
>> Acked-by: Martin KaFai Lau <kafai@fb.com>
> 
> (Sorry for late reply, swamped due to Plumbers.)
I thought so, no problem ;-)

> 
>> ---
>>
>> v2: use skb_set_network_header()
>>
>>  include/uapi/linux/bpf.h       |  3 ++
>>  net/core/filter.c              | 53 ++++++++++++++++++++++++++++++++++
>>  tools/include/uapi/linux/bpf.h |  3 ++
>>  3 files changed, 59 insertions(+)
>>
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 47d606d744cc..9762ef3a536c 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -1467,6 +1467,8 @@ union bpf_attr {
>>   *
>>   * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
>>   * 		  (room space is added or removed below the layer 3 header).
>> + * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
>> + * 		  packet (room space is added or removed below skb->data).
> 
> Nit: in this case it would probably make more sense to name it BPF_ADJ_ROOM_MAC.
> BPF_ADJ_ROOM_DATA reads like we're adjusting payload data.
Yep, I first had in mind skb->data, but you're right.

> 
>>   * 		All values for *flags* are reserved for future usage, and must
>>   * 		be left at zero.
>> @@ -2412,6 +2414,7 @@ enum bpf_func_id {
>>  /* Mode for BPF_FUNC_skb_adjust_room helper. */
>>  enum bpf_adj_room_mode {
>>  	BPF_ADJ_ROOM_NET,
>> +	BPF_ADJ_ROOM_DATA,
>>  };
>>  
>>  /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 53d50fb75ea1..e56da3077dca 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -2884,6 +2884,57 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
>>  	return ret;
>>  }
>>  
>> +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
>> +{
>> +	unsigned short hhlen = skb->dev->header_ops ?
>> +			       skb->dev->hard_header_len : 0;
>> +	int ret;
>> +
>> +	ret = skb_unclone(skb, GFP_ATOMIC);
>> +	if (unlikely(ret < 0))
>> +		return ret;
>> +
>> +	__skb_pull(skb, len);
>> +	skb_reset_mac_header(skb);
>> +	skb_set_network_header(skb, hhlen);
>> +	skb_reset_transport_header(skb);
>> +	return 0;
>> +}
>> +
>> +static int bpf_skb_data_grow(struct sk_buff *skb, u32 len)
>> +{
>> +	unsigned short hhlen = skb->dev->header_ops ?
>> +			       skb->dev->hard_header_len : 0;
>> +	int ret;
>> +
>> +	ret = skb_cow(skb, len);
>> +	if (unlikely(ret < 0))
>> +		return ret;
>> +
>> +	skb_push(skb, len);
>> +	skb_reset_mac_header(skb);
> 
> Looks like this could leak uninitialized mem into the BPF prog as it's
> not zeroed out. Can't we just reuse bpf_skb_generic_push() and the
> bpf_skb_generic_pop()?
Hmm, at some point, it was not possible, but it was an intermediate version and
I don't see why now.

> 
> bpf_skb_vlan_push() and bpf_skb_vlan_pop() do a bpf_push_mac_rcsum() /
> bpf_pull_mac_rcsum() in order to not screw up things like csum complete.
> Wouldn't this be needed here as well?
> 
> How does this interact with MPLS GSO?
I overlooked both points.

> 
> If the packet gets pushed back into the stack, would AF_MPLS handle it?
> Probably good to add test cases to BPF kselftest suite with it.
Ok, I will have a look to this.

> 
> Don't we need to reject the packet in case of skb->encapsulation?
> 
> Looking at push_mpls() and pop_mpls() from OVS implementation, do we
> also need to deal with skb inner network header / proto?
> 
> Given all this would it rather make sense to add MPLS specific helpers
> in similar fashion to the vlan ones we have?
> 
> Is there any other use case aside from MPLS?
Yes, I was targeting a generic encap/decap at l2 level. Maybe more complex than
I thought.

> 
>> +	return 0;
>> +}
>> +
>> +static int bpf_skb_adjust_data(struct sk_buff *skb, s32 len_diff)
>> +{
>> +	u32 len_diff_abs = abs(len_diff);
>> +	bool shrink = len_diff < 0;
>> +	int ret;
>> +
>> +	if (unlikely(len_diff_abs > 0xfffU))
>> +		return -EFAULT;
>> +
>> +	if (shrink && len_diff_abs >= skb_headlen(skb))
>> +		return -EFAULT;
>> +
>> +	ret = shrink ? bpf_skb_data_shrink(skb, len_diff_abs) :
>> +		       bpf_skb_data_grow(skb, len_diff_abs);
> 
> Hmm, I think this has a bug in that the above two do not adjust
> skb->mac_len. Then things like cls_bpf_classify() will __skb_pull()
> based on the old mac_len, getting you to a wrong offset in the
> packet when this is for example pushed back into ingress, etc.
Nice catch!


Thank you,
Nicolas
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 47d606d744cc..9762ef3a536c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1467,6 +1467,8 @@  union bpf_attr {
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
  * 		  (room space is added or removed below the layer 3 header).
+ * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
+ * 		  packet (room space is added or removed below skb->data).
  *
  * 		All values for *flags* are reserved for future usage, and must
  * 		be left at zero.
@@ -2412,6 +2414,7 @@  enum bpf_func_id {
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
+	BPF_ADJ_ROOM_DATA,
 };
 
 /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
diff --git a/net/core/filter.c b/net/core/filter.c
index 53d50fb75ea1..e56da3077dca 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2884,6 +2884,57 @@  static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
 	return ret;
 }
 
+static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
+{
+	unsigned short hhlen = skb->dev->header_ops ?
+			       skb->dev->hard_header_len : 0;
+	int ret;
+
+	ret = skb_unclone(skb, GFP_ATOMIC);
+	if (unlikely(ret < 0))
+		return ret;
+
+	__skb_pull(skb, len);
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, hhlen);
+	skb_reset_transport_header(skb);
+	return 0;
+}
+
+static int bpf_skb_data_grow(struct sk_buff *skb, u32 len)
+{
+	unsigned short hhlen = skb->dev->header_ops ?
+			       skb->dev->hard_header_len : 0;
+	int ret;
+
+	ret = skb_cow(skb, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	skb_push(skb, len);
+	skb_reset_mac_header(skb);
+	return 0;
+}
+
+static int bpf_skb_adjust_data(struct sk_buff *skb, s32 len_diff)
+{
+	u32 len_diff_abs = abs(len_diff);
+	bool shrink = len_diff < 0;
+	int ret;
+
+	if (unlikely(len_diff_abs > 0xfffU))
+		return -EFAULT;
+
+	if (shrink && len_diff_abs >= skb_headlen(skb))
+		return -EFAULT;
+
+	ret = shrink ? bpf_skb_data_shrink(skb, len_diff_abs) :
+		       bpf_skb_data_grow(skb, len_diff_abs);
+
+	bpf_compute_data_pointers(skb);
+	return ret;
+}
+
 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 	   u32, mode, u64, flags)
 {
@@ -2891,6 +2942,8 @@  BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 		return -EINVAL;
 	if (likely(mode == BPF_ADJ_ROOM_NET))
 		return bpf_skb_adjust_net(skb, len_diff);
+	if (likely(mode == BPF_ADJ_ROOM_DATA))
+		return bpf_skb_adjust_data(skb, len_diff);
 
 	return -ENOTSUPP;
 }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 852dc17ab47a..47407fd5162b 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1467,6 +1467,8 @@  union bpf_attr {
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
  * 		  (room space is added or removed below the layer 3 header).
+ * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
+ * 		  packet (room space is added or removed below skb->data).
  *
  * 		All values for *flags* are reserved for future usage, and must
  * 		be left at zero.
@@ -2408,6 +2410,7 @@  enum bpf_func_id {
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
+	BPF_ADJ_ROOM_DATA,
 };
 
 /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */