diff mbox series

[bpf-next] bpf: allow BPF read access to qdisc pkt_len

Message ID 20181130200831.165645-1-willemdebruijn.kernel@gmail.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series [bpf-next] bpf: allow BPF read access to qdisc pkt_len | expand

Commit Message

Willem de Bruijn Nov. 30, 2018, 8:08 p.m. UTC
From: Petar Penkov <ppenkov@google.com>

The pkt_len field in qdisc_skb_cb stores the skb length as it will
appear on the wire after segmentation. For byte accounting, this value
is more accurate than skb->len. It is computed on entry to the TC
layer, so only valid there.

Allow read access to this field from BPF tc classifier and action
programs. The implementation is analogous to tc_classid, aside from
restricting to read access.

Signed-off-by: Petar Penkov <ppenkov@google.com>
Signed-off-by: Vlad Dumitrescu <vladum@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/uapi/linux/bpf.h                    |  1 +
 net/core/filter.c                           | 16 +++++++++++
 tools/include/uapi/linux/bpf.h              |  1 +
 tools/testing/selftests/bpf/test_verifier.c | 32 +++++++++++++++++++++
 4 files changed, 50 insertions(+)

Comments

Song Liu Nov. 30, 2018, 10:48 p.m. UTC | #1
On Fri, Nov 30, 2018 at 12:09 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> From: Petar Penkov <ppenkov@google.com>
>
> The pkt_len field in qdisc_skb_cb stores the skb length as it will
> appear on the wire after segmentation. For byte accounting, this value
> is more accurate than skb->len. It is computed on entry to the TC
> layer, so only valid there.
>
> Allow read access to this field from BPF tc classifier and action
> programs. The implementation is analogous to tc_classid, aside from
> restricting to read access.
>
> Signed-off-by: Petar Penkov <ppenkov@google.com>
> Signed-off-by: Vlad Dumitrescu <vladum@google.com>
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---
>  include/uapi/linux/bpf.h                    |  1 +
>  net/core/filter.c                           | 16 +++++++++++
>  tools/include/uapi/linux/bpf.h              |  1 +
>  tools/testing/selftests/bpf/test_verifier.c | 32 +++++++++++++++++++++
>  4 files changed, 50 insertions(+)

Please split this into 3 patches:
1 for include/uapi/linux/bpf.h and filter.c
1 for tools/include/uapi/linux/bpf.h
1 for tools/testing/selftests/bpf/test_verifier.c

Other than this
Acked-by: Song Liu <songliubraving@fb.com>


>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 597afdbc1ab9..ce028482d423 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2483,6 +2483,7 @@ struct __sk_buff {
>         __u32 data_meta;
>         struct bpf_flow_keys *flow_keys;
>         __u64 tstamp;
> +       __u32 pkt_len;
>  };
>
>  struct bpf_tunnel_key {
> diff --git a/net/core/filter.c b/net/core/filter.c
> index bd0df75dc7b6..af071ef22c0a 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5773,6 +5773,7 @@ static bool sk_filter_is_valid_access(int off, int size,
>         case bpf_ctx_range(struct __sk_buff, flow_keys):
>         case bpf_ctx_range_till(struct __sk_buff, family, local_port):
>         case bpf_ctx_range(struct __sk_buff, tstamp):
> +       case bpf_ctx_range(struct __sk_buff, pkt_len):
>                 return false;
>         }
>
> @@ -5797,6 +5798,7 @@ static bool cg_skb_is_valid_access(int off, int size,
>         case bpf_ctx_range(struct __sk_buff, tc_classid):
>         case bpf_ctx_range(struct __sk_buff, data_meta):
>         case bpf_ctx_range(struct __sk_buff, flow_keys):
> +       case bpf_ctx_range(struct __sk_buff, pkt_len):
>                 return false;
>         case bpf_ctx_range(struct __sk_buff, data):
>         case bpf_ctx_range(struct __sk_buff, data_end):
> @@ -5843,6 +5845,7 @@ static bool lwt_is_valid_access(int off, int size,
>         case bpf_ctx_range(struct __sk_buff, data_meta):
>         case bpf_ctx_range(struct __sk_buff, flow_keys):
>         case bpf_ctx_range(struct __sk_buff, tstamp):
> +       case bpf_ctx_range(struct __sk_buff, pkt_len):
>                 return false;
>         }
>
> @@ -6273,6 +6276,7 @@ static bool sk_skb_is_valid_access(int off, int size,
>         case bpf_ctx_range(struct __sk_buff, data_meta):
>         case bpf_ctx_range(struct __sk_buff, flow_keys):
>         case bpf_ctx_range(struct __sk_buff, tstamp):
> +       case bpf_ctx_range(struct __sk_buff, pkt_len):
>                 return false;
>         }
>
> @@ -6360,6 +6364,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
>         case bpf_ctx_range(struct __sk_buff, data_meta):
>         case bpf_ctx_range_till(struct __sk_buff, family, local_port):
>         case bpf_ctx_range(struct __sk_buff, tstamp):
> +       case bpf_ctx_range(struct __sk_buff, pkt_len):
>                 return false;
>         }
>
> @@ -6685,6 +6690,17 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
>                                               bpf_target_off(struct sk_buff,
>                                                              tstamp, 8,
>                                                              target_size));
> +               break;
> +
> +       case offsetof(struct __sk_buff, pkt_len):
> +               BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
> +
> +               off = si->off;
> +               off -= offsetof(struct __sk_buff, pkt_len);
> +               off += offsetof(struct sk_buff, cb);
> +               off += offsetof(struct qdisc_skb_cb, pkt_len);
> +               *target_size = 4;
> +               *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
>         }
>
>         return insn - insn_buf;
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 597afdbc1ab9..ce028482d423 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -2483,6 +2483,7 @@ struct __sk_buff {
>         __u32 data_meta;
>         struct bpf_flow_keys *flow_keys;
>         __u64 tstamp;
> +       __u32 pkt_len;
>  };
>
>  struct bpf_tunnel_key {
> diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
> index 17021d2b6bfe..0ab37fb4afad 100644
> --- a/tools/testing/selftests/bpf/test_verifier.c
> +++ b/tools/testing/selftests/bpf/test_verifier.c
> @@ -13972,6 +13972,38 @@ static struct bpf_test tests[] = {
>                 .result_unpriv = REJECT,
>                 .result = ACCEPT,
>         },
> +       {
> +               "check pkt_len is not readable by sockets",
> +               .insns = {
> +                       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
> +                                   offsetof(struct __sk_buff, pkt_len)),
> +                       BPF_EXIT_INSN(),
> +               },
> +               .errstr = "invalid bpf_context access",
> +               .result = REJECT,
> +       },
> +       {
> +               "check pkt_len is readable by tc classifier",
> +               .insns = {
> +                       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
> +                                   offsetof(struct __sk_buff, pkt_len)),
> +                       BPF_EXIT_INSN(),
> +               },
> +               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
> +               .result = ACCEPT,
> +       },
> +       {
> +               "check pkt_len is not writable by tc classifier",
> +               .insns = {
> +                       BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
> +                                   offsetof(struct __sk_buff, pkt_len)),
> +                       BPF_EXIT_INSN(),
> +               },
> +               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
> +               .errstr = "invalid bpf_context access",
> +               .errstr_unpriv = "R1 leaks addr",
> +               .result = REJECT,
> +       },
>  };
>
>  static int probe_filter_length(const struct bpf_insn *fp)
> --
> 2.20.0.rc1.387.gf8505762e3-goog
>
Willem de Bruijn Nov. 30, 2018, 11:42 p.m. UTC | #2
On Fri, Nov 30, 2018 at 5:48 PM Song Liu <liu.song.a23@gmail.com> wrote:
>
> On Fri, Nov 30, 2018 at 12:09 PM Willem de Bruijn
> <willemdebruijn.kernel@gmail.com> wrote:
> >
> > From: Petar Penkov <ppenkov@google.com>
> >
> > The pkt_len field in qdisc_skb_cb stores the skb length as it will
> > appear on the wire after segmentation. For byte accounting, this value
> > is more accurate than skb->len. It is computed on entry to the TC
> > layer, so only valid there.
> >
> > Allow read access to this field from BPF tc classifier and action
> > programs. The implementation is analogous to tc_classid, aside from
> > restricting to read access.
> >
> > Signed-off-by: Petar Penkov <ppenkov@google.com>
> > Signed-off-by: Vlad Dumitrescu <vladum@google.com>
> > Signed-off-by: Willem de Bruijn <willemb@google.com>
> > ---
> >  include/uapi/linux/bpf.h                    |  1 +
> >  net/core/filter.c                           | 16 +++++++++++
> >  tools/include/uapi/linux/bpf.h              |  1 +
> >  tools/testing/selftests/bpf/test_verifier.c | 32 +++++++++++++++++++++
> >  4 files changed, 50 insertions(+)
>
> Please split this into 3 patches:
> 1 for include/uapi/linux/bpf.h and filter.c
> 1 for tools/include/uapi/linux/bpf.h
> 1 for tools/testing/selftests/bpf/test_verifier.c
>
> Other than this
> Acked-by: Song Liu <songliubraving@fb.com>

Thanks for the fast review.

I'm happy to resend in three parts, of course, but am curious: what is
the rationale for splitting this up?

This patch follows the process for commit  f11216b24219 ("bpf: add
skb->tstamp r/w access from tc clsact and cg skb progs"), which went
in as a single patch just last week.
Daniel Borkmann Dec. 1, 2018, 12:41 a.m. UTC | #3
On 12/01/2018 12:42 AM, Willem de Bruijn wrote:
> On Fri, Nov 30, 2018 at 5:48 PM Song Liu <liu.song.a23@gmail.com> wrote:
>>
>> On Fri, Nov 30, 2018 at 12:09 PM Willem de Bruijn
>> <willemdebruijn.kernel@gmail.com> wrote:
>>>
>>> From: Petar Penkov <ppenkov@google.com>
>>>
>>> The pkt_len field in qdisc_skb_cb stores the skb length as it will
>>> appear on the wire after segmentation. For byte accounting, this value
>>> is more accurate than skb->len. It is computed on entry to the TC
>>> layer, so only valid there.
>>>
>>> Allow read access to this field from BPF tc classifier and action
>>> programs. The implementation is analogous to tc_classid, aside from
>>> restricting to read access.
>>>
>>> Signed-off-by: Petar Penkov <ppenkov@google.com>
>>> Signed-off-by: Vlad Dumitrescu <vladum@google.com>
>>> Signed-off-by: Willem de Bruijn <willemb@google.com>
>>> ---
>>>  include/uapi/linux/bpf.h                    |  1 +
>>>  net/core/filter.c                           | 16 +++++++++++
>>>  tools/include/uapi/linux/bpf.h              |  1 +
>>>  tools/testing/selftests/bpf/test_verifier.c | 32 +++++++++++++++++++++
>>>  4 files changed, 50 insertions(+)
>>
>> Please split this into 3 patches:
>> 1 for include/uapi/linux/bpf.h and filter.c
>> 1 for tools/include/uapi/linux/bpf.h
>> 1 for tools/testing/selftests/bpf/test_verifier.c
>>
>> Other than this
>> Acked-by: Song Liu <songliubraving@fb.com>
> 
> Thanks for the fast review.
> 
> I'm happy to resend in three parts, of course, but am curious: what is
> the rationale for splitting this up?
> 
> This patch follows the process for commit  f11216b24219 ("bpf: add
> skb->tstamp r/w access from tc clsact and cg skb progs"), which went
> in as a single patch just last week.

Yeah, I think it's fine as is, one small thing I'm wondering though is
given that we now would have both 'skb->len' and 'skb->pkt_len', would
it be more intuitive for a BPF program developer to distinguish the two
by having the latter named e.g. 'skb->wire_len' so it's slightly more
obvious that it's including header size at post-segmentation? If not
probably some comment in the uapi header similar as in qdisc_pkt_len_init()
might be helpful in any case.

Thanks,
Daniel
Willem de Bruijn Dec. 1, 2018, 9:02 p.m. UTC | #4
On Fri, Nov 30, 2018 at 7:41 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 12/01/2018 12:42 AM, Willem de Bruijn wrote:
> > On Fri, Nov 30, 2018 at 5:48 PM Song Liu <liu.song.a23@gmail.com> wrote:
> >>
> >> On Fri, Nov 30, 2018 at 12:09 PM Willem de Bruijn
> >> <willemdebruijn.kernel@gmail.com> wrote:
> >>>
> >>> From: Petar Penkov <ppenkov@google.com>
> >>>
> >>> The pkt_len field in qdisc_skb_cb stores the skb length as it will
> >>> appear on the wire after segmentation. For byte accounting, this value
> >>> is more accurate than skb->len. It is computed on entry to the TC
> >>> layer, so only valid there.
> >>>
> >>> Allow read access to this field from BPF tc classifier and action
> >>> programs. The implementation is analogous to tc_classid, aside from
> >>> restricting to read access.
> >>>
> >>> Signed-off-by: Petar Penkov <ppenkov@google.com>
> >>> Signed-off-by: Vlad Dumitrescu <vladum@google.com>
> >>> Signed-off-by: Willem de Bruijn <willemb@google.com>
> >>> ---
> >>>  include/uapi/linux/bpf.h                    |  1 +
> >>>  net/core/filter.c                           | 16 +++++++++++
> >>>  tools/include/uapi/linux/bpf.h              |  1 +
> >>>  tools/testing/selftests/bpf/test_verifier.c | 32 +++++++++++++++++++++
> >>>  4 files changed, 50 insertions(+)
> >>
> >> Please split this into 3 patches:
> >> 1 for include/uapi/linux/bpf.h and filter.c
> >> 1 for tools/include/uapi/linux/bpf.h
> >> 1 for tools/testing/selftests/bpf/test_verifier.c
> >>
> >> Other than this
> >> Acked-by: Song Liu <songliubraving@fb.com>
> >
> > Thanks for the fast review.
> >
> > I'm happy to resend in three parts, of course, but am curious: what is
> > the rationale for splitting this up?
> >
> > This patch follows the process for commit  f11216b24219 ("bpf: add
> > skb->tstamp r/w access from tc clsact and cg skb progs"), which went
> > in as a single patch just last week.
>
> Yeah, I think it's fine as is, one small thing I'm wondering though is
> given that we now would have both 'skb->len' and 'skb->pkt_len', would
> it be more intuitive for a BPF program developer to distinguish the two
> by having the latter named e.g. 'skb->wire_len' so it's slightly more
> obvious that it's including header size at post-segmentation?

Yes, I actually had considered qdisc_pkt_len to drive home the point
that this is derived from, and thus defined by, qdisc_pkt_len_init. But
wire_len is a much more intuitive description. I'll send a v2. Thanks!
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 597afdbc1ab9..ce028482d423 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2483,6 +2483,7 @@  struct __sk_buff {
 	__u32 data_meta;
 	struct bpf_flow_keys *flow_keys;
 	__u64 tstamp;
+	__u32 pkt_len;
 };
 
 struct bpf_tunnel_key {
diff --git a/net/core/filter.c b/net/core/filter.c
index bd0df75dc7b6..af071ef22c0a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5773,6 +5773,7 @@  static bool sk_filter_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 	case bpf_ctx_range(struct __sk_buff, tstamp):
+	case bpf_ctx_range(struct __sk_buff, pkt_len):
 		return false;
 	}
 
@@ -5797,6 +5798,7 @@  static bool cg_skb_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range(struct __sk_buff, pkt_len):
 		return false;
 	case bpf_ctx_range(struct __sk_buff, data):
 	case bpf_ctx_range(struct __sk_buff, data_end):
@@ -5843,6 +5845,7 @@  static bool lwt_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
 	case bpf_ctx_range(struct __sk_buff, tstamp):
+	case bpf_ctx_range(struct __sk_buff, pkt_len):
 		return false;
 	}
 
@@ -6273,6 +6276,7 @@  static bool sk_skb_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
 	case bpf_ctx_range(struct __sk_buff, tstamp):
+	case bpf_ctx_range(struct __sk_buff, pkt_len):
 		return false;
 	}
 
@@ -6360,6 +6364,7 @@  static bool flow_dissector_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 	case bpf_ctx_range(struct __sk_buff, tstamp):
+	case bpf_ctx_range(struct __sk_buff, pkt_len):
 		return false;
 	}
 
@@ -6685,6 +6690,17 @@  static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 					      bpf_target_off(struct sk_buff,
 							     tstamp, 8,
 							     target_size));
+		break;
+
+	case offsetof(struct __sk_buff, pkt_len):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
+
+		off = si->off;
+		off -= offsetof(struct __sk_buff, pkt_len);
+		off += offsetof(struct sk_buff, cb);
+		off += offsetof(struct qdisc_skb_cb, pkt_len);
+		*target_size = 4;
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
 	}
 
 	return insn - insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 597afdbc1ab9..ce028482d423 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2483,6 +2483,7 @@  struct __sk_buff {
 	__u32 data_meta;
 	struct bpf_flow_keys *flow_keys;
 	__u64 tstamp;
+	__u32 pkt_len;
 };
 
 struct bpf_tunnel_key {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 17021d2b6bfe..0ab37fb4afad 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -13972,6 +13972,38 @@  static struct bpf_test tests[] = {
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
 	},
+	{
+		"check pkt_len is not readable by sockets",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, pkt_len)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check pkt_len is readable by tc classifier",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, pkt_len)),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"check pkt_len is not writable by tc classifier",
+		.insns = {
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, pkt_len)),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "invalid bpf_context access",
+		.errstr_unpriv = "R1 leaks addr",
+		.result = REJECT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)