Message ID | 20181108151137.3975-1-nicolas.dichtel@6wind.com |
---|---|
State | Changes Requested, archived |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [bpf-next] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room() | expand |
On Thu, Nov 08, 2018 at 04:11:37PM +0100, Nicolas Dichtel wrote: > This new mode enables to add or remove an l2 header in a programmatic way > with cls_bpf. > For example, it enables to play with mpls headers. > > Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> > --- > include/uapi/linux/bpf.h | 3 ++ > net/core/filter.c | 54 ++++++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 3 ++ > 3 files changed, 60 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 852dc17ab47a..47407fd5162b 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -1467,6 +1467,8 @@ union bpf_attr { > * > * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer > * (room space is added or removed below the layer 3 header). > + * * **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the > + * packet (room space is added or removed below skb->data). > * > * All values for *flags* are reserved for future usage, and must > * be left at zero. > @@ -2408,6 +2410,7 @@ enum bpf_func_id { > /* Mode for BPF_FUNC_skb_adjust_room helper. */ > enum bpf_adj_room_mode { > BPF_ADJ_ROOM_NET, > + BPF_ADJ_ROOM_DATA, > }; > > /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ > diff --git a/net/core/filter.c b/net/core/filter.c > index e521c5ebc7d1..e699849b269d 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -2884,6 +2884,58 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) > return ret; > } > > +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len) > +{ > + unsigned short hhlen = skb->dev->header_ops ? > + skb->dev->hard_header_len : 0; > + int ret; > + > + ret = skb_unclone(skb, GFP_ATOMIC); > + if (unlikely(ret < 0)) > + return ret; > + > + __skb_pull(skb, len); > + skb_reset_mac_header(skb); > + skb_reset_network_header(skb); > + skb->network_header += hhlen; > + skb_reset_transport_header(skb); hmm...why transport_header does not need += hhlen here while network_header does? > + return 0; > +} > + > +static int bpf_skb_data_grow(struct sk_buff *skb, u32 len) > +{ > + unsigned short hhlen = skb->dev->header_ops ? > + skb->dev->hard_header_len : 0; > + int ret; > + > + ret = skb_cow(skb, len); > + if (unlikely(ret < 0)) > + return ret; > + > + skb_push(skb, len); > + skb_reset_mac_header(skb); > + return 0; > +} > + > +static int bpf_skb_adjust_data(struct sk_buff *skb, s32 len_diff) > +{ > + u32 len_diff_abs = abs(len_diff); > + bool shrink = len_diff < 0; > + int ret; > + > + if (unlikely(len_diff_abs > 0xfffU)) > + return -EFAULT; > + > + if (shrink && len_diff_abs >= skb_headlen(skb)) > + return -EFAULT; > + > + ret = shrink ? bpf_skb_data_shrink(skb, len_diff_abs) : > + bpf_skb_data_grow(skb, len_diff_abs); > + > + bpf_compute_data_pointers(skb); > + return ret; > +} > + > BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, > u32, mode, u64, flags) > { > @@ -2891,6 +2943,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, > return -EINVAL; > if (likely(mode == BPF_ADJ_ROOM_NET)) > return bpf_skb_adjust_net(skb, len_diff); > + if (likely(mode == BPF_ADJ_ROOM_DATA)) > + return bpf_skb_adjust_data(skb, len_diff); > > return -ENOTSUPP; > } > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 852dc17ab47a..47407fd5162b 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -1467,6 +1467,8 @@ union bpf_attr { > * > * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer > * (room space is added or removed below the layer 3 header). > + * * **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the > + * packet (room space is added or removed below skb->data). > * > * All values for *flags* are reserved for future usage, and must > * be left at zero. > @@ -2408,6 +2410,7 @@ enum bpf_func_id { > /* Mode for BPF_FUNC_skb_adjust_room helper. */ > enum bpf_adj_room_mode { > BPF_ADJ_ROOM_NET, > + BPF_ADJ_ROOM_DATA, > }; > > /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ > -- > 2.18.0 >
Le 09/11/2018 à 19:51, Martin Lau a écrit : > On Thu, Nov 08, 2018 at 04:11:37PM +0100, Nicolas Dichtel wrote: [snip] >> +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len) >> +{ >> + unsigned short hhlen = skb->dev->header_ops ? >> + skb->dev->hard_header_len : 0; >> + int ret; >> + >> + ret = skb_unclone(skb, GFP_ATOMIC); >> + if (unlikely(ret < 0)) >> + return ret; >> + >> + __skb_pull(skb, len); >> + skb_reset_mac_header(skb); >> + skb_reset_network_header(skb); >> + skb->network_header += hhlen; >> + skb_reset_transport_header(skb); > hmm...why transport_header does not need += hhlen here > while network_header does? network_header is mandatory because bpf_redirect(BPF_F_INGRESS) can be called and network_header is expected to be correctly set in this case. For transport_header, I choose to not set it, because the stack will set it later (for example ip_rcv_core()). Regards, Nicolas
On Sun, Nov 11, 2018 at 12:43:27AM +0100, Nicolas Dichtel wrote: > Le 09/11/2018 à 19:51, Martin Lau a écrit : > > On Thu, Nov 08, 2018 at 04:11:37PM +0100, Nicolas Dichtel wrote: > [snip] > >> +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len) > >> +{ > >> + unsigned short hhlen = skb->dev->header_ops ? > >> + skb->dev->hard_header_len : 0; > >> + int ret; > >> + > >> + ret = skb_unclone(skb, GFP_ATOMIC); > >> + if (unlikely(ret < 0)) > >> + return ret; > >> + > >> + __skb_pull(skb, len); > >> + skb_reset_mac_header(skb); > >> + skb_reset_network_header(skb); > >> + skb->network_header += hhlen; Nit. skb_set_network_header(skb, hhlen); Othen than that Acked-by: Martin KaFai Lau <kafai@fb.com> > >> + skb_reset_transport_header(skb); > > hmm...why transport_header does not need += hhlen here > > while network_header does? > > network_header is mandatory because bpf_redirect(BPF_F_INGRESS) can be called > and network_header is expected to be correctly set in this case. > For transport_header, I choose to not set it, because the stack will set it > later (for example ip_rcv_core()). ic. make sense.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 852dc17ab47a..47407fd5162b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1467,6 +1467,8 @@ union bpf_attr { * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * (room space is added or removed below the layer 3 header). + * * **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the + * packet (room space is added or removed below skb->data). * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2408,6 +2410,7 @@ enum bpf_func_id { /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, + BPF_ADJ_ROOM_DATA, }; /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ diff --git a/net/core/filter.c b/net/core/filter.c index e521c5ebc7d1..e699849b269d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2884,6 +2884,58 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) return ret; } +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len) +{ + unsigned short hhlen = skb->dev->header_ops ? + skb->dev->hard_header_len : 0; + int ret; + + ret = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(ret < 0)) + return ret; + + __skb_pull(skb, len); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb->network_header += hhlen; + skb_reset_transport_header(skb); + return 0; +} + +static int bpf_skb_data_grow(struct sk_buff *skb, u32 len) +{ + unsigned short hhlen = skb->dev->header_ops ? + skb->dev->hard_header_len : 0; + int ret; + + ret = skb_cow(skb, len); + if (unlikely(ret < 0)) + return ret; + + skb_push(skb, len); + skb_reset_mac_header(skb); + return 0; +} + +static int bpf_skb_adjust_data(struct sk_buff *skb, s32 len_diff) +{ + u32 len_diff_abs = abs(len_diff); + bool shrink = len_diff < 0; + int ret; + + if (unlikely(len_diff_abs > 0xfffU)) + return -EFAULT; + + if (shrink && len_diff_abs >= skb_headlen(skb)) + return -EFAULT; + + ret = shrink ? bpf_skb_data_shrink(skb, len_diff_abs) : + bpf_skb_data_grow(skb, len_diff_abs); + + bpf_compute_data_pointers(skb); + return ret; +} + BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, u32, mode, u64, flags) { @@ -2891,6 +2943,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, return -EINVAL; if (likely(mode == BPF_ADJ_ROOM_NET)) return bpf_skb_adjust_net(skb, len_diff); + if (likely(mode == BPF_ADJ_ROOM_DATA)) + return bpf_skb_adjust_data(skb, len_diff); return -ENOTSUPP; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 852dc17ab47a..47407fd5162b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1467,6 +1467,8 @@ union bpf_attr { * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * (room space is added or removed below the layer 3 header). + * * **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the + * packet (room space is added or removed below skb->data). * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2408,6 +2410,7 @@ enum bpf_func_id { /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, + BPF_ADJ_ROOM_DATA, }; /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
This new mode enables to add or remove an l2 header in a programmatic way with cls_bpf. For example, it enables to play with mpls headers. Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> --- include/uapi/linux/bpf.h | 3 ++ net/core/filter.c | 54 ++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 3 ++ 3 files changed, 60 insertions(+)