diff mbox series

[RFC,bpf-next,v2,7/9] bpf: when doing BPF_PROG_TEST_RUN for flow dissector use no-skb mode

Message ID 20190319221948.170441-8-sdf@google.com
State RFC
Delegated to: BPF Maintainers
Headers show
Series net: flow_dissector: trigger BPF hook when called from eth_get_headlen | expand

Commit Message

Stanislav Fomichev March 19, 2019, 10:19 p.m. UTC
Now that we have __flow_bpf_dissect which works on raw data (by
constructing temporary on-stack skb), use it when doing
BPF_PROG_TEST_RUN for flow dissector.

This should help us catch any possible bugs due to missing shinfo on
the per-cpu skb.

Note that existing __skb_flow_bpf_dissect swallows L2 headers and returns
nhoff=0, we need to preserve the existing behavior.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 net/bpf/test_run.c | 48 ++++++++++++++--------------------------------
 1 file changed, 14 insertions(+), 34 deletions(-)

Comments

Willem de Bruijn March 20, 2019, 2:14 a.m. UTC | #1
On Tue, Mar 19, 2019 at 6:21 PM Stanislav Fomichev <sdf@google.com> wrote:
>
> Now that we have __flow_bpf_dissect which works on raw data (by
> constructing temporary on-stack skb), use it when doing
> BPF_PROG_TEST_RUN for flow dissector.
>
> This should help us catch any possible bugs due to missing shinfo on
> the per-cpu skb.
>
> Note that existing __skb_flow_bpf_dissect swallows L2 headers and returns
> nhoff=0, we need to preserve the existing behavior.
>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  net/bpf/test_run.c | 48 ++++++++++++++--------------------------------
>  1 file changed, 14 insertions(+), 34 deletions(-)
>

> @@ -300,9 +277,13 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
>         preempt_disable();
>         time_start = ktime_get_ns();
>         for (i = 0; i < repeat; i++) {
> -               retval = bpf_flow_dissect_skb(prog, skb,
> -                                             &flow_keys_dissector,
> -                                             &flow_keys);
> +               retval = bpf_flow_dissect(prog, data, eth->h_proto, ETH_HLEN,
> +                                         size, &flow_keys_dissector,
> +                                         &flow_keys);
> +               if (flow_keys.nhoff >= ETH_HLEN)
> +                       flow_keys.nhoff -= ETH_HLEN;
> +               if (flow_keys.thoff >= ETH_HLEN)
> +                       flow_keys.thoff -= ETH_HLEN;

why are these conditional?
Stanislav Fomichev March 20, 2019, 4:57 p.m. UTC | #2
On 03/19, Willem de Bruijn wrote:
> On Tue, Mar 19, 2019 at 6:21 PM Stanislav Fomichev <sdf@google.com> wrote:
> >
> > Now that we have __flow_bpf_dissect which works on raw data (by
> > constructing temporary on-stack skb), use it when doing
> > BPF_PROG_TEST_RUN for flow dissector.
> >
> > This should help us catch any possible bugs due to missing shinfo on
> > the per-cpu skb.
> >
> > Note that existing __skb_flow_bpf_dissect swallows L2 headers and returns
> > nhoff=0, we need to preserve the existing behavior.
> >
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  net/bpf/test_run.c | 48 ++++++++++++++--------------------------------
> >  1 file changed, 14 insertions(+), 34 deletions(-)
> >
> 
> > @@ -300,9 +277,13 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
> >         preempt_disable();
> >         time_start = ktime_get_ns();
> >         for (i = 0; i < repeat; i++) {
> > -               retval = bpf_flow_dissect_skb(prog, skb,
> > -                                             &flow_keys_dissector,
> > -                                             &flow_keys);
> > +               retval = bpf_flow_dissect(prog, data, eth->h_proto, ETH_HLEN,
> > +                                         size, &flow_keys_dissector,
> > +                                         &flow_keys);
> > +               if (flow_keys.nhoff >= ETH_HLEN)
> > +                       flow_keys.nhoff -= ETH_HLEN;
> > +               if (flow_keys.thoff >= ETH_HLEN)
> > +                       flow_keys.thoff -= ETH_HLEN;
> 
> why are these conditional?
Hm, I didn't want these to be negative, because bpf flow program can set
them to zero and clamp_flow_keys makes sure they are in a "sensible"
range. For this particular case, I think we need to amend
clamp_flow_keys to make sure that flow_keys.nhoff is in the range of
initial_nhoff..hlen, not 0..hlen (and then we can drop these checks).
Willem de Bruijn March 20, 2019, 6:29 p.m. UTC | #3
On Wed, Mar 20, 2019 at 12:57 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> On 03/19, Willem de Bruijn wrote:
> > On Tue, Mar 19, 2019 at 6:21 PM Stanislav Fomichev <sdf@google.com> wrote:
> > >
> > > Now that we have __flow_bpf_dissect which works on raw data (by
> > > constructing temporary on-stack skb), use it when doing
> > > BPF_PROG_TEST_RUN for flow dissector.
> > >
> > > This should help us catch any possible bugs due to missing shinfo on
> > > the per-cpu skb.
> > >
> > > Note that existing __skb_flow_bpf_dissect swallows L2 headers and returns
> > > nhoff=0, we need to preserve the existing behavior.
> > >
> > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > ---
> > >  net/bpf/test_run.c | 48 ++++++++++++++--------------------------------
> > >  1 file changed, 14 insertions(+), 34 deletions(-)
> > >
> >
> > > @@ -300,9 +277,13 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
> > >         preempt_disable();
> > >         time_start = ktime_get_ns();
> > >         for (i = 0; i < repeat; i++) {
> > > -               retval = bpf_flow_dissect_skb(prog, skb,
> > > -                                             &flow_keys_dissector,
> > > -                                             &flow_keys);
> > > +               retval = bpf_flow_dissect(prog, data, eth->h_proto, ETH_HLEN,
> > > +                                         size, &flow_keys_dissector,
> > > +                                         &flow_keys);
> > > +               if (flow_keys.nhoff >= ETH_HLEN)
> > > +                       flow_keys.nhoff -= ETH_HLEN;
> > > +               if (flow_keys.thoff >= ETH_HLEN)
> > > +                       flow_keys.thoff -= ETH_HLEN;
> >
> > why are these conditional?
> Hm, I didn't want these to be negative, because bpf flow program can set
> them to zero and clamp_flow_keys makes sure they are in a "sensible"
> range. For this particular case, I think we need to amend
> clamp_flow_keys to make sure that flow_keys.nhoff is in the range of
> initial_nhoff..hlen, not 0..hlen (and then we can drop these checks).

So, previously eth_type_trans would call with data at the network
header. Now it is called with data at the link layer. How would
__skb_flow_bpf_dissect "swallows L2 headers and returns nhoff=0"? That
sounds incorrect.

Agreed that the output should lie between nhoff and hlen, but as far
as I can tell it is always zero indexed to the data passed, here the
link layer:

        if (!data) {
                data = skb->data;
                proto = skb_vlan_tag_present(skb) ?
                         skb->vlan_proto : skb->protocol;
                nhoff = skb_network_offset(skb);
Stanislav Fomichev March 20, 2019, 7:02 p.m. UTC | #4
On 03/20, Willem de Bruijn wrote:
> On Wed, Mar 20, 2019 at 12:57 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> >
> > On 03/19, Willem de Bruijn wrote:
> > > On Tue, Mar 19, 2019 at 6:21 PM Stanislav Fomichev <sdf@google.com> wrote:
> > > >
> > > > Now that we have __flow_bpf_dissect which works on raw data (by
> > > > constructing temporary on-stack skb), use it when doing
> > > > BPF_PROG_TEST_RUN for flow dissector.
> > > >
> > > > This should help us catch any possible bugs due to missing shinfo on
> > > > the per-cpu skb.
> > > >
> > > > Note that existing __skb_flow_bpf_dissect swallows L2 headers and returns
> > > > nhoff=0, we need to preserve the existing behavior.
> > > >
> > > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > > ---
> > > >  net/bpf/test_run.c | 48 ++++++++++++++--------------------------------
> > > >  1 file changed, 14 insertions(+), 34 deletions(-)
> > > >
> > >
> > > > @@ -300,9 +277,13 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
> > > >         preempt_disable();
> > > >         time_start = ktime_get_ns();
> > > >         for (i = 0; i < repeat; i++) {
> > > > -               retval = bpf_flow_dissect_skb(prog, skb,
> > > > -                                             &flow_keys_dissector,
> > > > -                                             &flow_keys);
> > > > +               retval = bpf_flow_dissect(prog, data, eth->h_proto, ETH_HLEN,
> > > > +                                         size, &flow_keys_dissector,
> > > > +                                         &flow_keys);
> > > > +               if (flow_keys.nhoff >= ETH_HLEN)
> > > > +                       flow_keys.nhoff -= ETH_HLEN;
> > > > +               if (flow_keys.thoff >= ETH_HLEN)
> > > > +                       flow_keys.thoff -= ETH_HLEN;
> > >
> > > why are these conditional?
> > Hm, I didn't want these to be negative, because bpf flow program can set
> > them to zero and clamp_flow_keys makes sure they are in a "sensible"
> > range. For this particular case, I think we need to amend
> > clamp_flow_keys to make sure that flow_keys.nhoff is in the range of
> > initial_nhoff..hlen, not 0..hlen (and then we can drop these checks).
> 
> So, previously eth_type_trans would call with data at the network
> header. Now it is called with data at the link layer. How would
> __skb_flow_bpf_dissect "swallows L2 headers and returns nhoff=0"? That
s/__skb_flow_bpf_dissect/eth_type_trans/, I'll clarify that in the patch
description.

> sounds incorrect.
Previously, for skb case, eth_type_trans would pull ETH_HLEN (L2) and
after that we did skb_reset_network_header. So when later we initialized
flow keys (flow_keys->nhoff = skb_network_offset(skb)), that would
yield nhoff == 0.

For example, see:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/prog_tests/flow_dissector.c

Now, we explicitly call bpf_flow_dissect with nhoff=ETH_HLEN and have to
undo it, otherwise, it breaks those tests.

We could do something like the following instead:
retval = bpf_flow_dissect(prog, data + ETH_HLEN, eth->h_proto, 0,
                          size, &flow_keys_dissector,
                          &flow_keys);

But I wanted to make sure nhoff != 0 works.

> 
> Agreed that the output should lie between nhoff and hlen, but as far
> as I can tell it is always zero indexed to the data passed, here the
> link layer:
> 
>         if (!data) {
>                 data = skb->data;
>                 proto = skb_vlan_tag_present(skb) ?
>                          skb->vlan_proto : skb->protocol;
>                 nhoff = skb_network_offset(skb);
That's for the skb != NULL case. eth_get_headlen calls with skb == NULL
and passes data and nhoff=sizeof(*eth):

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/net/ethernet/eth.c#n139
Willem de Bruijn March 20, 2019, 7:08 p.m. UTC | #5
On Wed, Mar 20, 2019 at 3:02 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> On 03/20, Willem de Bruijn wrote:
> > On Wed, Mar 20, 2019 at 12:57 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > >
> > > On 03/19, Willem de Bruijn wrote:
> > > > On Tue, Mar 19, 2019 at 6:21 PM Stanislav Fomichev <sdf@google.com> wrote:
> > > > >
> > > > > Now that we have __flow_bpf_dissect which works on raw data (by
> > > > > constructing temporary on-stack skb), use it when doing
> > > > > BPF_PROG_TEST_RUN for flow dissector.
> > > > >
> > > > > This should help us catch any possible bugs due to missing shinfo on
> > > > > the per-cpu skb.
> > > > >
> > > > > Note that existing __skb_flow_bpf_dissect swallows L2 headers and returns
> > > > > nhoff=0, we need to preserve the existing behavior.
> > > > >
> > > > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > > > ---
> > > > >  net/bpf/test_run.c | 48 ++++++++++++++--------------------------------
> > > > >  1 file changed, 14 insertions(+), 34 deletions(-)
> > > > >
> > > >
> > > > > @@ -300,9 +277,13 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
> > > > >         preempt_disable();
> > > > >         time_start = ktime_get_ns();
> > > > >         for (i = 0; i < repeat; i++) {
> > > > > -               retval = bpf_flow_dissect_skb(prog, skb,
> > > > > -                                             &flow_keys_dissector,
> > > > > -                                             &flow_keys);
> > > > > +               retval = bpf_flow_dissect(prog, data, eth->h_proto, ETH_HLEN,
> > > > > +                                         size, &flow_keys_dissector,
> > > > > +                                         &flow_keys);
> > > > > +               if (flow_keys.nhoff >= ETH_HLEN)
> > > > > +                       flow_keys.nhoff -= ETH_HLEN;
> > > > > +               if (flow_keys.thoff >= ETH_HLEN)
> > > > > +                       flow_keys.thoff -= ETH_HLEN;
> > > >
> > > > why are these conditional?
> > > Hm, I didn't want these to be negative, because bpf flow program can set
> > > them to zero and clamp_flow_keys makes sure they are in a "sensible"
> > > range. For this particular case, I think we need to amend
> > > clamp_flow_keys to make sure that flow_keys.nhoff is in the range of
> > > initial_nhoff..hlen, not 0..hlen (and then we can drop these checks).
> >
> > So, previously eth_type_trans would call with data at the network
> > header. Now it is called with data at the link layer. How would
> > __skb_flow_bpf_dissect "swallows L2 headers and returns nhoff=0"? That
> s/__skb_flow_bpf_dissect/eth_type_trans/, I'll clarify that in the patch
> description.
>
> > sounds incorrect.
> Previously, for skb case, eth_type_trans would pull ETH_HLEN (L2) and
> after that we did skb_reset_network_header. So when later we initialized
> flow keys (flow_keys->nhoff = skb_network_offset(skb)), that would
> yield nhoff == 0.
>
> For example, see:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
>
> Now, we explicitly call bpf_flow_dissect with nhoff=ETH_HLEN and have to
> undo it, otherwise, it breaks those tests.
>
> We could do something like the following instead:
> retval = bpf_flow_dissect(prog, data + ETH_HLEN, eth->h_proto, 0,
>                           size, &flow_keys_dissector,
>                           &flow_keys);
>
> But I wanted to make sure nhoff != 0 works.

Makes sense. Ensuring that nhoff lies within initial_nhoff..hlen
sounds correct to me. But this is a limitation of the test, so should
be in the test logic, not in the generic clamp code. Perhaps just fail
the test if returned nhoff < ETH_HLEN?
Stanislav Fomichev March 20, 2019, 7:19 p.m. UTC | #6
On 03/20, Willem de Bruijn wrote:
> On Wed, Mar 20, 2019 at 3:02 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> >
> > On 03/20, Willem de Bruijn wrote:
> > > On Wed, Mar 20, 2019 at 12:57 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > > >
> > > > On 03/19, Willem de Bruijn wrote:
> > > > > On Tue, Mar 19, 2019 at 6:21 PM Stanislav Fomichev <sdf@google.com> wrote:
> > > > > >
> > > > > > Now that we have __flow_bpf_dissect which works on raw data (by
> > > > > > constructing temporary on-stack skb), use it when doing
> > > > > > BPF_PROG_TEST_RUN for flow dissector.
> > > > > >
> > > > > > This should help us catch any possible bugs due to missing shinfo on
> > > > > > the per-cpu skb.
> > > > > >
> > > > > > Note that existing __skb_flow_bpf_dissect swallows L2 headers and returns
> > > > > > nhoff=0, we need to preserve the existing behavior.
> > > > > >
> > > > > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > > > > ---
> > > > > >  net/bpf/test_run.c | 48 ++++++++++++++--------------------------------
> > > > > >  1 file changed, 14 insertions(+), 34 deletions(-)
> > > > > >
> > > > >
> > > > > > @@ -300,9 +277,13 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
> > > > > >         preempt_disable();
> > > > > >         time_start = ktime_get_ns();
> > > > > >         for (i = 0; i < repeat; i++) {
> > > > > > -               retval = bpf_flow_dissect_skb(prog, skb,
> > > > > > -                                             &flow_keys_dissector,
> > > > > > -                                             &flow_keys);
> > > > > > +               retval = bpf_flow_dissect(prog, data, eth->h_proto, ETH_HLEN,
> > > > > > +                                         size, &flow_keys_dissector,
> > > > > > +                                         &flow_keys);
> > > > > > +               if (flow_keys.nhoff >= ETH_HLEN)
> > > > > > +                       flow_keys.nhoff -= ETH_HLEN;
> > > > > > +               if (flow_keys.thoff >= ETH_HLEN)
> > > > > > +                       flow_keys.thoff -= ETH_HLEN;
> > > > >
> > > > > why are these conditional?
> > > > Hm, I didn't want these to be negative, because bpf flow program can set
> > > > them to zero and clamp_flow_keys makes sure they are in a "sensible"
> > > > range. For this particular case, I think we need to amend
> > > > clamp_flow_keys to make sure that flow_keys.nhoff is in the range of
> > > > initial_nhoff..hlen, not 0..hlen (and then we can drop these checks).
> > >
> > > So, previously eth_type_trans would call with data at the network
> > > header. Now it is called with data at the link layer. How would
> > > __skb_flow_bpf_dissect "swallows L2 headers and returns nhoff=0"? That
> > s/__skb_flow_bpf_dissect/eth_type_trans/, I'll clarify that in the patch
> > description.
> >
> > > sounds incorrect.
> > Previously, for skb case, eth_type_trans would pull ETH_HLEN (L2) and
> > after that we did skb_reset_network_header. So when later we initialized
> > flow keys (flow_keys->nhoff = skb_network_offset(skb)), that would
> > yield nhoff == 0.
> >
> > For example, see:
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
> >
> > Now, we explicitly call bpf_flow_dissect with nhoff=ETH_HLEN and have to
> > undo it, otherwise, it breaks those tests.
> >
> > We could do something like the following instead:
> > retval = bpf_flow_dissect(prog, data + ETH_HLEN, eth->h_proto, 0,
> >                           size, &flow_keys_dissector,
> >                           &flow_keys);
> >
> > But I wanted to make sure nhoff != 0 works.
> 
> Makes sense. Ensuring that nhoff lies within initial_nhoff..hlen
> sounds correct to me. But this is a limitation of the test, so should
> be in the test logic, not in the generic clamp code. Perhaps just fail
> the test if returned nhoff < ETH_HLEN?
I don't think it's only about the tests. BPF program can return
nhoff/thoff out of range as well (if there was some bug in its logic,
for example). We should not blindly trust whatever it returns, right?
Willem de Bruijn March 20, 2019, 7:23 p.m. UTC | #7
On Wed, Mar 20, 2019 at 3:19 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> On 03/20, Willem de Bruijn wrote:
> > On Wed, Mar 20, 2019 at 3:02 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > >
> > > On 03/20, Willem de Bruijn wrote:
> > > > On Wed, Mar 20, 2019 at 12:57 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > > > >
> > > > > On 03/19, Willem de Bruijn wrote:
> > > > > > On Tue, Mar 19, 2019 at 6:21 PM Stanislav Fomichev <sdf@google.com> wrote:
> > > > > > >
> > > > > > > Now that we have __flow_bpf_dissect which works on raw data (by
> > > > > > > constructing temporary on-stack skb), use it when doing
> > > > > > > BPF_PROG_TEST_RUN for flow dissector.
> > > > > > >
> > > > > > > This should help us catch any possible bugs due to missing shinfo on
> > > > > > > the per-cpu skb.
> > > > > > >
> > > > > > > Note that existing __skb_flow_bpf_dissect swallows L2 headers and returns
> > > > > > > nhoff=0, we need to preserve the existing behavior.
> > > > > > >
> > > > > > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > > > > > ---
> > > > > > >  net/bpf/test_run.c | 48 ++++++++++++++--------------------------------
> > > > > > >  1 file changed, 14 insertions(+), 34 deletions(-)
> > > > > > >
> > > > > >
> > > > > > > @@ -300,9 +277,13 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
> > > > > > >         preempt_disable();
> > > > > > >         time_start = ktime_get_ns();
> > > > > > >         for (i = 0; i < repeat; i++) {
> > > > > > > -               retval = bpf_flow_dissect_skb(prog, skb,
> > > > > > > -                                             &flow_keys_dissector,
> > > > > > > -                                             &flow_keys);
> > > > > > > +               retval = bpf_flow_dissect(prog, data, eth->h_proto, ETH_HLEN,
> > > > > > > +                                         size, &flow_keys_dissector,
> > > > > > > +                                         &flow_keys);
> > > > > > > +               if (flow_keys.nhoff >= ETH_HLEN)
> > > > > > > +                       flow_keys.nhoff -= ETH_HLEN;
> > > > > > > +               if (flow_keys.thoff >= ETH_HLEN)
> > > > > > > +                       flow_keys.thoff -= ETH_HLEN;
> > > > > >
> > > > > > why are these conditional?
> > > > > Hm, I didn't want these to be negative, because bpf flow program can set
> > > > > them to zero and clamp_flow_keys makes sure they are in a "sensible"
> > > > > range. For this particular case, I think we need to amend
> > > > > clamp_flow_keys to make sure that flow_keys.nhoff is in the range of
> > > > > initial_nhoff..hlen, not 0..hlen (and then we can drop these checks).
> > > >
> > > > So, previously eth_type_trans would call with data at the network
> > > > header. Now it is called with data at the link layer. How would
> > > > __skb_flow_bpf_dissect "swallows L2 headers and returns nhoff=0"? That
> > > s/__skb_flow_bpf_dissect/eth_type_trans/, I'll clarify that in the patch
> > > description.
> > >
> > > > sounds incorrect.
> > > Previously, for skb case, eth_type_trans would pull ETH_HLEN (L2) and
> > > after that we did skb_reset_network_header. So when later we initialized
> > > flow keys (flow_keys->nhoff = skb_network_offset(skb)), that would
> > > yield nhoff == 0.
> > >
> > > For example, see:
> > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
> > >
> > > Now, we explicitly call bpf_flow_dissect with nhoff=ETH_HLEN and have to
> > > undo it, otherwise, it breaks those tests.
> > >
> > > We could do something like the following instead:
> > > retval = bpf_flow_dissect(prog, data + ETH_HLEN, eth->h_proto, 0,
> > >                           size, &flow_keys_dissector,
> > >                           &flow_keys);
> > >
> > > But I wanted to make sure nhoff != 0 works.
> >
> > Makes sense. Ensuring that nhoff lies within initial_nhoff..hlen
> > sounds correct to me. But this is a limitation of the test, so should
> > be in the test logic, not in the generic clamp code. Perhaps just fail
> > the test if returned nhoff < ETH_HLEN?
> I don't think it's only about the tests. BPF program can return
> nhoff/thoff out of range as well (if there was some bug in its logic,
> for example). We should not blindly trust whatever it returns, right?

Definitely. That's why we clamp. I'm not sure that we have to restrict
the minimum offset to initial nhoff, however.
Stanislav Fomichev March 20, 2019, 7:48 p.m. UTC | #8
On 03/20, Willem de Bruijn wrote:
> On Wed, Mar 20, 2019 at 3:19 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> >
> > On 03/20, Willem de Bruijn wrote:
> > > On Wed, Mar 20, 2019 at 3:02 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > > >
> > > > On 03/20, Willem de Bruijn wrote:
> > > > > On Wed, Mar 20, 2019 at 12:57 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > > > > >
> > > > > > On 03/19, Willem de Bruijn wrote:
> > > > > > > On Tue, Mar 19, 2019 at 6:21 PM Stanislav Fomichev <sdf@google.com> wrote:
> > > > > > > >
> > > > > > > > Now that we have __flow_bpf_dissect which works on raw data (by
> > > > > > > > constructing temporary on-stack skb), use it when doing
> > > > > > > > BPF_PROG_TEST_RUN for flow dissector.
> > > > > > > >
> > > > > > > > This should help us catch any possible bugs due to missing shinfo on
> > > > > > > > the per-cpu skb.
> > > > > > > >
> > > > > > > > Note that existing __skb_flow_bpf_dissect swallows L2 headers and returns
> > > > > > > > nhoff=0, we need to preserve the existing behavior.
> > > > > > > >
> > > > > > > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > > > > > > ---
> > > > > > > >  net/bpf/test_run.c | 48 ++++++++++++++--------------------------------
> > > > > > > >  1 file changed, 14 insertions(+), 34 deletions(-)
> > > > > > > >
> > > > > > >
> > > > > > > > @@ -300,9 +277,13 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
> > > > > > > >         preempt_disable();
> > > > > > > >         time_start = ktime_get_ns();
> > > > > > > >         for (i = 0; i < repeat; i++) {
> > > > > > > > -               retval = bpf_flow_dissect_skb(prog, skb,
> > > > > > > > -                                             &flow_keys_dissector,
> > > > > > > > -                                             &flow_keys);
> > > > > > > > +               retval = bpf_flow_dissect(prog, data, eth->h_proto, ETH_HLEN,
> > > > > > > > +                                         size, &flow_keys_dissector,
> > > > > > > > +                                         &flow_keys);
> > > > > > > > +               if (flow_keys.nhoff >= ETH_HLEN)
> > > > > > > > +                       flow_keys.nhoff -= ETH_HLEN;
> > > > > > > > +               if (flow_keys.thoff >= ETH_HLEN)
> > > > > > > > +                       flow_keys.thoff -= ETH_HLEN;
> > > > > > >
> > > > > > > why are these conditional?
> > > > > > Hm, I didn't want these to be negative, because bpf flow program can set
> > > > > > them to zero and clamp_flow_keys makes sure they are in a "sensible"
> > > > > > range. For this particular case, I think we need to amend
> > > > > > clamp_flow_keys to make sure that flow_keys.nhoff is in the range of
> > > > > > initial_nhoff..hlen, not 0..hlen (and then we can drop these checks).
> > > > >
> > > > > So, previously eth_type_trans would call with data at the network
> > > > > header. Now it is called with data at the link layer. How would
> > > > > __skb_flow_bpf_dissect "swallows L2 headers and returns nhoff=0"? That
> > > > s/__skb_flow_bpf_dissect/eth_type_trans/, I'll clarify that in the patch
> > > > description.
> > > >
> > > > > sounds incorrect.
> > > > Previously, for skb case, eth_type_trans would pull ETH_HLEN (L2) and
> > > > after that we did skb_reset_network_header. So when later we initialized
> > > > flow keys (flow_keys->nhoff = skb_network_offset(skb)), that would
> > > > yield nhoff == 0.
> > > >
> > > > For example, see:
> > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
> > > >
> > > > Now, we explicitly call bpf_flow_dissect with nhoff=ETH_HLEN and have to
> > > > undo it, otherwise, it breaks those tests.
> > > >
> > > > We could do something like the following instead:
> > > > retval = bpf_flow_dissect(prog, data + ETH_HLEN, eth->h_proto, 0,
> > > >                           size, &flow_keys_dissector,
> > > >                           &flow_keys);
> > > >
> > > > But I wanted to make sure nhoff != 0 works.
> > >
> > > Makes sense. Ensuring that nhoff lies within initial_nhoff..hlen
> > > sounds correct to me. But this is a limitation of the test, so should
> > > be in the test logic, not in the generic clamp code. Perhaps just fail
> > > the test if returned nhoff < ETH_HLEN?
> > I don't think it's only about the tests. BPF program can return
> > nhoff/thoff out of range as well (if there was some bug in its logic,
> > for example). We should not blindly trust whatever it returns, right?
> 
> Definitely. That's why we clamp. I'm not sure that we have to restrict
> the minimum offset to initial nhoff, however.
Makes sense. TBH, only the tests currently care about nhoff that flow
dissector returns. In the kernel we use only thoff from bpf flow dissector
and ignore any modifications to the nhoff.

Do you think there is a usecase for nhoff possibly going backwards?
In other words, why not prohibit that from the beginning and set the
expectations strait (i.e. nhoff only grows).
Willem de Bruijn March 20, 2019, 8:03 p.m. UTC | #9
On Wed, Mar 20, 2019 at 3:48 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> On 03/20, Willem de Bruijn wrote:
> > On Wed, Mar 20, 2019 at 3:19 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > >
> > > On 03/20, Willem de Bruijn wrote:
> > > > On Wed, Mar 20, 2019 at 3:02 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > > > >
> > > > > On 03/20, Willem de Bruijn wrote:
> > > > > > On Wed, Mar 20, 2019 at 12:57 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > > > > > >
> > > > > > > On 03/19, Willem de Bruijn wrote:
> > > > > > > > On Tue, Mar 19, 2019 at 6:21 PM Stanislav Fomichev <sdf@google.com> wrote:
> > > > > > > > >
> > > > > > > > > Now that we have __flow_bpf_dissect which works on raw data (by
> > > > > > > > > constructing temporary on-stack skb), use it when doing
> > > > > > > > > BPF_PROG_TEST_RUN for flow dissector.
> > > > > > > > >
> > > > > > > > > This should help us catch any possible bugs due to missing shinfo on
> > > > > > > > > the per-cpu skb.
> > > > > > > > >
> > > > > > > > > Note that existing __skb_flow_bpf_dissect swallows L2 headers and returns
> > > > > > > > > nhoff=0, we need to preserve the existing behavior.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > > > > > > > ---
> > > > > > > > >  net/bpf/test_run.c | 48 ++++++++++++++--------------------------------
> > > > > > > > >  1 file changed, 14 insertions(+), 34 deletions(-)
> > > > > > > > >
> > > > > > > >
> > > > > > > > > @@ -300,9 +277,13 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
> > > > > > > > >         preempt_disable();
> > > > > > > > >         time_start = ktime_get_ns();
> > > > > > > > >         for (i = 0; i < repeat; i++) {
> > > > > > > > > -               retval = bpf_flow_dissect_skb(prog, skb,
> > > > > > > > > -                                             &flow_keys_dissector,
> > > > > > > > > -                                             &flow_keys);
> > > > > > > > > +               retval = bpf_flow_dissect(prog, data, eth->h_proto, ETH_HLEN,
> > > > > > > > > +                                         size, &flow_keys_dissector,
> > > > > > > > > +                                         &flow_keys);
> > > > > > > > > +               if (flow_keys.nhoff >= ETH_HLEN)
> > > > > > > > > +                       flow_keys.nhoff -= ETH_HLEN;
> > > > > > > > > +               if (flow_keys.thoff >= ETH_HLEN)
> > > > > > > > > +                       flow_keys.thoff -= ETH_HLEN;
> > > > > > > >
> > > > > > > > why are these conditional?
> > > > > > > Hm, I didn't want these to be negative, because bpf flow program can set
> > > > > > > them to zero and clamp_flow_keys makes sure they are in a "sensible"
> > > > > > > range. For this particular case, I think we need to amend
> > > > > > > clamp_flow_keys to make sure that flow_keys.nhoff is in the range of
> > > > > > > initial_nhoff..hlen, not 0..hlen (and then we can drop these checks).
> > > > > >
> > > > > > So, previously eth_type_trans would call with data at the network
> > > > > > header. Now it is called with data at the link layer. How would
> > > > > > __skb_flow_bpf_dissect "swallows L2 headers and returns nhoff=0"? That
> > > > > s/__skb_flow_bpf_dissect/eth_type_trans/, I'll clarify that in the patch
> > > > > description.
> > > > >
> > > > > > sounds incorrect.
> > > > > Previously, for skb case, eth_type_trans would pull ETH_HLEN (L2) and
> > > > > after that we did skb_reset_network_header. So when later we initialized
> > > > > flow keys (flow_keys->nhoff = skb_network_offset(skb)), that would
> > > > > yield nhoff == 0.
> > > > >
> > > > > For example, see:
> > > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
> > > > >
> > > > > Now, we explicitly call bpf_flow_dissect with nhoff=ETH_HLEN and have to
> > > > > undo it, otherwise, it breaks those tests.
> > > > >
> > > > > We could do something like the following instead:
> > > > > retval = bpf_flow_dissect(prog, data + ETH_HLEN, eth->h_proto, 0,
> > > > >                           size, &flow_keys_dissector,
> > > > >                           &flow_keys);
> > > > >
> > > > > But I wanted to make sure nhoff != 0 works.
> > > >
> > > > Makes sense. Ensuring that nhoff lies within initial_nhoff..hlen
> > > > sounds correct to me. But this is a limitation of the test, so should
> > > > be in the test logic, not in the generic clamp code. Perhaps just fail
> > > > the test if returned nhoff < ETH_HLEN?
> > > I don't think it's only about the tests. BPF program can return
> > > nhoff/thoff out of range as well (if there was some bug in its logic,
> > > for example). We should not blindly trust whatever it returns, right?
> >
> > Definitely. That's why we clamp. I'm not sure that we have to restrict
> > the minimum offset to initial nhoff, however.
> Makes sense. TBH, only the tests currently care about nhoff that flow
> dissector returns. In the kernel we use only thoff from bpf flow dissector
> and ignore any modifications to the nhoff.
>
> Do you think there is a usecase for nhoff possibly going backwards?
> In other words, why not prohibit that from the beginning and set the
> expectations strait (i.e. nhoff only grows).

Fair point. That is how the non bpf flow dissector works. And if the
initial offset is always sensible, indeed I see no reasonable case
where the program would return a lower value. I was a bit concerned
about that precondition. In practice, all but one caller passes 0 and
data at network header, where this discussion is moot. And the
exception is eth_get_headlen which hardcodes ETH_HLEN. Given that, y
our original suggestion to adjust the clamp function SGTM.
diff mbox series

Patch

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 512773a95ad5..90f7eaf129c6 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -252,10 +252,8 @@  int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	u32 repeat = kattr->test.repeat;
 	struct bpf_flow_keys flow_keys;
 	u64 time_start, time_spent = 0;
-	struct bpf_skb_data_end *cb;
+	const struct ethhdr *eth;
 	u32 retval, duration;
-	struct sk_buff *skb;
-	struct sock *sk;
 	void *data;
 	int ret;
 	u32 i;
@@ -263,35 +261,14 @@  int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
 		return -EINVAL;
 
-	data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
-			     SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+	if (size < ETH_HLEN)
+		return -EINVAL;
+
+	data = bpf_test_init(kattr, size, 0, 0);
 	if (IS_ERR(data))
 		return PTR_ERR(data);
 
-	sk = kzalloc(sizeof(*sk), GFP_USER);
-	if (!sk) {
-		kfree(data);
-		return -ENOMEM;
-	}
-	sock_net_set(sk, current->nsproxy->net_ns);
-	sock_init_data(NULL, sk);
-
-	skb = build_skb(data, 0);
-	if (!skb) {
-		kfree(data);
-		kfree(sk);
-		return -ENOMEM;
-	}
-	skb->sk = sk;
-
-	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
-	__skb_put(skb, size);
-	skb->protocol = eth_type_trans(skb,
-				       current->nsproxy->net_ns->loopback_dev);
-	skb_reset_network_header(skb);
-
-	cb = (struct bpf_skb_data_end *)skb->cb;
-	cb->qdisc_cb.flow_keys = &flow_keys;
+	eth = (struct ethhdr *)data;
 
 	if (!repeat)
 		repeat = 1;
@@ -300,9 +277,13 @@  int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	preempt_disable();
 	time_start = ktime_get_ns();
 	for (i = 0; i < repeat; i++) {
-		retval = bpf_flow_dissect_skb(prog, skb,
-					      &flow_keys_dissector,
-					      &flow_keys);
+		retval = bpf_flow_dissect(prog, data, eth->h_proto, ETH_HLEN,
+					  size, &flow_keys_dissector,
+					  &flow_keys);
+		if (flow_keys.nhoff >= ETH_HLEN)
+			flow_keys.nhoff -= ETH_HLEN;
+		if (flow_keys.thoff >= ETH_HLEN)
+			flow_keys.thoff -= ETH_HLEN;
 
 		if (signal_pending(current)) {
 			preempt_enable();
@@ -335,7 +316,6 @@  int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 			      retval, duration);
 
 out:
-	kfree_skb(skb);
-	kfree(sk);
+	kfree(data);
 	return ret;
 }