Message ID | 20181130200831.165645-1-willemdebruijn.kernel@gmail.com |
---|---|
State | Changes Requested, archived |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [bpf-next] bpf: allow BPF read access to qdisc pkt_len | expand |
On Fri, Nov 30, 2018 at 12:09 PM Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote: > > From: Petar Penkov <ppenkov@google.com> > > The pkt_len field in qdisc_skb_cb stores the skb length as it will > appear on the wire after segmentation. For byte accounting, this value > is more accurate than skb->len. It is computed on entry to the TC > layer, so only valid there. > > Allow read access to this field from BPF tc classifier and action > programs. The implementation is analogous to tc_classid, aside from > restricting to read access. > > Signed-off-by: Petar Penkov <ppenkov@google.com> > Signed-off-by: Vlad Dumitrescu <vladum@google.com> > Signed-off-by: Willem de Bruijn <willemb@google.com> > --- > include/uapi/linux/bpf.h | 1 + > net/core/filter.c | 16 +++++++++++ > tools/include/uapi/linux/bpf.h | 1 + > tools/testing/selftests/bpf/test_verifier.c | 32 +++++++++++++++++++++ > 4 files changed, 50 insertions(+) Please split this into 3 patches: 1 for include/uapi/linux/bpf.h and filter.c 1 for tools/include/uapi/linux/bpf.h 1 for tools/testing/selftests/bpf/test_verifier.c Other than this Acked-by: Song Liu <songliubraving@fb.com> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 597afdbc1ab9..ce028482d423 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -2483,6 +2483,7 @@ struct __sk_buff { > __u32 data_meta; > struct bpf_flow_keys *flow_keys; > __u64 tstamp; > + __u32 pkt_len; > }; > > struct bpf_tunnel_key { > diff --git a/net/core/filter.c b/net/core/filter.c > index bd0df75dc7b6..af071ef22c0a 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -5773,6 +5773,7 @@ static bool sk_filter_is_valid_access(int off, int size, > case bpf_ctx_range(struct __sk_buff, flow_keys): > case bpf_ctx_range_till(struct __sk_buff, family, local_port): > case bpf_ctx_range(struct __sk_buff, tstamp): > + case bpf_ctx_range(struct __sk_buff, pkt_len): > return false; > } > > @@ -5797,6 +5798,7 @@ static bool cg_skb_is_valid_access(int off, int size, > case bpf_ctx_range(struct __sk_buff, tc_classid): > case bpf_ctx_range(struct __sk_buff, data_meta): > case bpf_ctx_range(struct __sk_buff, flow_keys): > + case bpf_ctx_range(struct __sk_buff, pkt_len): > return false; > case bpf_ctx_range(struct __sk_buff, data): > case bpf_ctx_range(struct __sk_buff, data_end): > @@ -5843,6 +5845,7 @@ static bool lwt_is_valid_access(int off, int size, > case bpf_ctx_range(struct __sk_buff, data_meta): > case bpf_ctx_range(struct __sk_buff, flow_keys): > case bpf_ctx_range(struct __sk_buff, tstamp): > + case bpf_ctx_range(struct __sk_buff, pkt_len): > return false; > } > > @@ -6273,6 +6276,7 @@ static bool sk_skb_is_valid_access(int off, int size, > case bpf_ctx_range(struct __sk_buff, data_meta): > case bpf_ctx_range(struct __sk_buff, flow_keys): > case bpf_ctx_range(struct __sk_buff, tstamp): > + case bpf_ctx_range(struct __sk_buff, pkt_len): > return false; > } > > @@ -6360,6 +6364,7 @@ static bool flow_dissector_is_valid_access(int off, int size, > case bpf_ctx_range(struct __sk_buff, data_meta): > case bpf_ctx_range_till(struct __sk_buff, family, local_port): > case bpf_ctx_range(struct __sk_buff, tstamp): > + case bpf_ctx_range(struct __sk_buff, pkt_len): > return false; > } > > @@ -6685,6 +6690,17 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, > bpf_target_off(struct sk_buff, > tstamp, 8, > target_size)); > + break; > + > + case offsetof(struct __sk_buff, pkt_len): > + BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4); > + > + off = si->off; > + off -= offsetof(struct __sk_buff, pkt_len); > + off += offsetof(struct sk_buff, cb); > + off += offsetof(struct qdisc_skb_cb, pkt_len); > + *target_size = 4; > + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); > } > > return insn - insn_buf; > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 597afdbc1ab9..ce028482d423 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -2483,6 +2483,7 @@ struct __sk_buff { > __u32 data_meta; > struct bpf_flow_keys *flow_keys; > __u64 tstamp; > + __u32 pkt_len; > }; > > struct bpf_tunnel_key { > diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c > index 17021d2b6bfe..0ab37fb4afad 100644 > --- a/tools/testing/selftests/bpf/test_verifier.c > +++ b/tools/testing/selftests/bpf/test_verifier.c > @@ -13972,6 +13972,38 @@ static struct bpf_test tests[] = { > .result_unpriv = REJECT, > .result = ACCEPT, > }, > + { > + "check pkt_len is not readable by sockets", > + .insns = { > + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, > + offsetof(struct __sk_buff, pkt_len)), > + BPF_EXIT_INSN(), > + }, > + .errstr = "invalid bpf_context access", > + .result = REJECT, > + }, > + { > + "check pkt_len is readable by tc classifier", > + .insns = { > + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, > + offsetof(struct __sk_buff, pkt_len)), > + BPF_EXIT_INSN(), > + }, > + .prog_type = BPF_PROG_TYPE_SCHED_CLS, > + .result = ACCEPT, > + }, > + { > + "check pkt_len is not writable by tc classifier", > + .insns = { > + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, > + offsetof(struct __sk_buff, pkt_len)), > + BPF_EXIT_INSN(), > + }, > + .prog_type = BPF_PROG_TYPE_SCHED_CLS, > + .errstr = "invalid bpf_context access", > + .errstr_unpriv = "R1 leaks addr", > + .result = REJECT, > + }, > }; > > static int probe_filter_length(const struct bpf_insn *fp) > -- > 2.20.0.rc1.387.gf8505762e3-goog >
On Fri, Nov 30, 2018 at 5:48 PM Song Liu <liu.song.a23@gmail.com> wrote: > > On Fri, Nov 30, 2018 at 12:09 PM Willem de Bruijn > <willemdebruijn.kernel@gmail.com> wrote: > > > > From: Petar Penkov <ppenkov@google.com> > > > > The pkt_len field in qdisc_skb_cb stores the skb length as it will > > appear on the wire after segmentation. For byte accounting, this value > > is more accurate than skb->len. It is computed on entry to the TC > > layer, so only valid there. > > > > Allow read access to this field from BPF tc classifier and action > > programs. The implementation is analogous to tc_classid, aside from > > restricting to read access. > > > > Signed-off-by: Petar Penkov <ppenkov@google.com> > > Signed-off-by: Vlad Dumitrescu <vladum@google.com> > > Signed-off-by: Willem de Bruijn <willemb@google.com> > > --- > > include/uapi/linux/bpf.h | 1 + > > net/core/filter.c | 16 +++++++++++ > > tools/include/uapi/linux/bpf.h | 1 + > > tools/testing/selftests/bpf/test_verifier.c | 32 +++++++++++++++++++++ > > 4 files changed, 50 insertions(+) > > Please split this into 3 patches: > 1 for include/uapi/linux/bpf.h and filter.c > 1 for tools/include/uapi/linux/bpf.h > 1 for tools/testing/selftests/bpf/test_verifier.c > > Other than this > Acked-by: Song Liu <songliubraving@fb.com> Thanks for the fast review. I'm happy to resend in three parts, of course, but am curious: what is the rationale for splitting this up? This patch follows the process for commit f11216b24219 ("bpf: add skb->tstamp r/w access from tc clsact and cg skb progs"), which went in as a single patch just last week.
On 12/01/2018 12:42 AM, Willem de Bruijn wrote: > On Fri, Nov 30, 2018 at 5:48 PM Song Liu <liu.song.a23@gmail.com> wrote: >> >> On Fri, Nov 30, 2018 at 12:09 PM Willem de Bruijn >> <willemdebruijn.kernel@gmail.com> wrote: >>> >>> From: Petar Penkov <ppenkov@google.com> >>> >>> The pkt_len field in qdisc_skb_cb stores the skb length as it will >>> appear on the wire after segmentation. For byte accounting, this value >>> is more accurate than skb->len. It is computed on entry to the TC >>> layer, so only valid there. >>> >>> Allow read access to this field from BPF tc classifier and action >>> programs. The implementation is analogous to tc_classid, aside from >>> restricting to read access. >>> >>> Signed-off-by: Petar Penkov <ppenkov@google.com> >>> Signed-off-by: Vlad Dumitrescu <vladum@google.com> >>> Signed-off-by: Willem de Bruijn <willemb@google.com> >>> --- >>> include/uapi/linux/bpf.h | 1 + >>> net/core/filter.c | 16 +++++++++++ >>> tools/include/uapi/linux/bpf.h | 1 + >>> tools/testing/selftests/bpf/test_verifier.c | 32 +++++++++++++++++++++ >>> 4 files changed, 50 insertions(+) >> >> Please split this into 3 patches: >> 1 for include/uapi/linux/bpf.h and filter.c >> 1 for tools/include/uapi/linux/bpf.h >> 1 for tools/testing/selftests/bpf/test_verifier.c >> >> Other than this >> Acked-by: Song Liu <songliubraving@fb.com> > > Thanks for the fast review. > > I'm happy to resend in three parts, of course, but am curious: what is > the rationale for splitting this up? > > This patch follows the process for commit f11216b24219 ("bpf: add > skb->tstamp r/w access from tc clsact and cg skb progs"), which went > in as a single patch just last week. Yeah, I think it's fine as is, one small thing I'm wondering though is given that we now would have both 'skb->len' and 'skb->pkt_len', would it be more intuitive for a BPF program developer to distinguish the two by having the latter named e.g. 'skb->wire_len' so it's slightly more obvious that it's including header size at post-segmentation? If not probably some comment in the uapi header similar as in qdisc_pkt_len_init() might be helpful in any case. Thanks, Daniel
On Fri, Nov 30, 2018 at 7:41 PM Daniel Borkmann <daniel@iogearbox.net> wrote: > > On 12/01/2018 12:42 AM, Willem de Bruijn wrote: > > On Fri, Nov 30, 2018 at 5:48 PM Song Liu <liu.song.a23@gmail.com> wrote: > >> > >> On Fri, Nov 30, 2018 at 12:09 PM Willem de Bruijn > >> <willemdebruijn.kernel@gmail.com> wrote: > >>> > >>> From: Petar Penkov <ppenkov@google.com> > >>> > >>> The pkt_len field in qdisc_skb_cb stores the skb length as it will > >>> appear on the wire after segmentation. For byte accounting, this value > >>> is more accurate than skb->len. It is computed on entry to the TC > >>> layer, so only valid there. > >>> > >>> Allow read access to this field from BPF tc classifier and action > >>> programs. The implementation is analogous to tc_classid, aside from > >>> restricting to read access. > >>> > >>> Signed-off-by: Petar Penkov <ppenkov@google.com> > >>> Signed-off-by: Vlad Dumitrescu <vladum@google.com> > >>> Signed-off-by: Willem de Bruijn <willemb@google.com> > >>> --- > >>> include/uapi/linux/bpf.h | 1 + > >>> net/core/filter.c | 16 +++++++++++ > >>> tools/include/uapi/linux/bpf.h | 1 + > >>> tools/testing/selftests/bpf/test_verifier.c | 32 +++++++++++++++++++++ > >>> 4 files changed, 50 insertions(+) > >> > >> Please split this into 3 patches: > >> 1 for include/uapi/linux/bpf.h and filter.c > >> 1 for tools/include/uapi/linux/bpf.h > >> 1 for tools/testing/selftests/bpf/test_verifier.c > >> > >> Other than this > >> Acked-by: Song Liu <songliubraving@fb.com> > > > > Thanks for the fast review. > > > > I'm happy to resend in three parts, of course, but am curious: what is > > the rationale for splitting this up? > > > > This patch follows the process for commit f11216b24219 ("bpf: add > > skb->tstamp r/w access from tc clsact and cg skb progs"), which went > > in as a single patch just last week. > > Yeah, I think it's fine as is, one small thing I'm wondering though is > given that we now would have both 'skb->len' and 'skb->pkt_len', would > it be more intuitive for a BPF program developer to distinguish the two > by having the latter named e.g. 'skb->wire_len' so it's slightly more > obvious that it's including header size at post-segmentation? Yes, I actually had considered qdisc_pkt_len to drive home the point that this is derived from, and thus defined by, qdisc_pkt_len_init. But wire_len is a much more intuitive description. I'll send a v2. Thanks!
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 597afdbc1ab9..ce028482d423 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2483,6 +2483,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; __u64 tstamp; + __u32 pkt_len; }; struct bpf_tunnel_key { diff --git a/net/core/filter.c b/net/core/filter.c index bd0df75dc7b6..af071ef22c0a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5773,6 +5773,7 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, pkt_len): return false; } @@ -5797,6 +5798,7 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range(struct __sk_buff, pkt_len): return false; case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_end): @@ -5843,6 +5845,7 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, pkt_len): return false; } @@ -6273,6 +6276,7 @@ static bool sk_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, pkt_len): return false; } @@ -6360,6 +6364,7 @@ static bool flow_dissector_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, pkt_len): return false; } @@ -6685,6 +6690,17 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, bpf_target_off(struct sk_buff, tstamp, 8, target_size)); + break; + + case offsetof(struct __sk_buff, pkt_len): + BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4); + + off = si->off; + off -= offsetof(struct __sk_buff, pkt_len); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct qdisc_skb_cb, pkt_len); + *target_size = 4; + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); } return insn - insn_buf; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 597afdbc1ab9..ce028482d423 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2483,6 +2483,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; __u64 tstamp; + __u32 pkt_len; }; struct bpf_tunnel_key { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 17021d2b6bfe..0ab37fb4afad 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -13972,6 +13972,38 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, .result = ACCEPT, }, + { + "check pkt_len is not readable by sockets", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, pkt_len)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check pkt_len is readable by tc classifier", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, pkt_len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "check pkt_len is not writable by tc classifier", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, pkt_len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid bpf_context access", + .errstr_unpriv = "R1 leaks addr", + .result = REJECT, + }, }; static int probe_filter_length(const struct bpf_insn *fp)