diff mbox series

[bpf-next,v2,3/5] selftests/bpf: use thoff instead of nhoff in BPF flow dissector

Message ID 20181204040100.243614-4-sdf@google.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series support flow dissector in BPF_PROG_TEST_RUN | expand

Commit Message

Stanislav Fomichev Dec. 4, 2018, 4 a.m. UTC
We are returning thoff from the flow dissector, not the nhoff. Pass
thoff along with nhoff to the bpf program (initially thoff == nhoff)
and expect flow dissector amend/return thoff, not nhoff.

This avoids confusion, when by the time bpf flow dissector exits,
nhoff == thoff, which doesn't make much sense (this is relevant
in the context of the next patch, where I add simple selftest
and manually construct expected flow_keys).

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 net/core/flow_dissector.c              |  1 +
 tools/testing/selftests/bpf/bpf_flow.c | 36 ++++++++++++--------------
 2 files changed, 18 insertions(+), 19 deletions(-)

Comments

Song Liu Dec. 4, 2018, 11:16 p.m. UTC | #1
On Mon, Dec 3, 2018 at 8:01 PM Stanislav Fomichev <sdf@google.com> wrote:
>
> We are returning thoff from the flow dissector, not the nhoff. Pass
> thoff along with nhoff to the bpf program (initially thoff == nhoff)
> and expect flow dissector amend/return thoff, not nhoff.
>
> This avoids confusion, when by the time bpf flow dissector exits,
> nhoff == thoff, which doesn't make much sense (this is relevant
> in the context of the next patch, where I add simple selftest
> and manually construct expected flow_keys).
>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  net/core/flow_dissector.c              |  1 +
>  tools/testing/selftests/bpf/bpf_flow.c | 36 ++++++++++++--------------
>  2 files changed, 18 insertions(+), 19 deletions(-)
>
> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> index 3c8a78decbc0..ac19da6f390b 100644
> --- a/net/core/flow_dissector.c
> +++ b/net/core/flow_dissector.c
> @@ -708,6 +708,7 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
>         memset(flow_keys, 0, sizeof(*flow_keys));
>         cb->qdisc_cb.flow_keys = flow_keys;
>         flow_keys->nhoff = skb_network_offset(skb);
> +       flow_keys->thoff = flow_keys->nhoff;

Do we need this fix without this set? If yes, do we need it for bpf
tree as well?

Thanks,
Song

>
>         bpf_compute_data_pointers((struct sk_buff *)skb);
>         result = BPF_PROG_RUN(prog, skb);
> diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c
> index b9798f558ca7..284660f5aa95 100644
> --- a/tools/testing/selftests/bpf/bpf_flow.c
> +++ b/tools/testing/selftests/bpf/bpf_flow.c
> @@ -70,18 +70,18 @@ static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
>  {
>         void *data_end = (void *)(long)skb->data_end;
>         void *data = (void *)(long)skb->data;
> -       __u16 nhoff = skb->flow_keys->nhoff;
> +       __u16 thoff = skb->flow_keys->thoff;
>         __u8 *hdr;
>
>         /* Verifies this variable offset does not overflow */
> -       if (nhoff > (USHRT_MAX - hdr_size))
> +       if (thoff > (USHRT_MAX - hdr_size))
>                 return NULL;
>
> -       hdr = data + nhoff;
> +       hdr = data + thoff;
>         if (hdr + hdr_size <= data_end)
>                 return hdr;
>
> -       if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size))
> +       if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
>                 return NULL;
>
>         return buffer;
> @@ -158,13 +158,13 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
>                         /* Only inspect standard GRE packets with version 0 */
>                         return BPF_OK;
>
> -               keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */
> +               keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
>                 if (GRE_IS_CSUM(gre->flags))
> -                       keys->nhoff += 4; /* Step over chksum and Padding */
> +                       keys->thoff += 4; /* Step over chksum and Padding */
>                 if (GRE_IS_KEY(gre->flags))
> -                       keys->nhoff += 4; /* Step over key */
> +                       keys->thoff += 4; /* Step over key */
>                 if (GRE_IS_SEQ(gre->flags))
> -                       keys->nhoff += 4; /* Step over sequence number */
> +                       keys->thoff += 4; /* Step over sequence number */
>
>                 keys->is_encap = true;
>
> @@ -174,7 +174,7 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
>                         if (!eth)
>                                 return BPF_DROP;
>
> -                       keys->nhoff += sizeof(*eth);
> +                       keys->thoff += sizeof(*eth);
>
>                         return parse_eth_proto(skb, eth->h_proto);
>                 } else {
> @@ -191,7 +191,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
>                 if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
>                         return BPF_DROP;
>
> -               keys->thoff = keys->nhoff;
>                 keys->sport = tcp->source;
>                 keys->dport = tcp->dest;
>                 return BPF_OK;
> @@ -201,7 +200,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
>                 if (!udp)
>                         return BPF_DROP;
>
> -               keys->thoff = keys->nhoff;
>                 keys->sport = udp->source;
>                 keys->dport = udp->dest;
>                 return BPF_OK;
> @@ -252,8 +250,8 @@ PROG(IP)(struct __sk_buff *skb)
>         keys->ipv4_src = iph->saddr;
>         keys->ipv4_dst = iph->daddr;
>
> -       keys->nhoff += iph->ihl << 2;
> -       if (data + keys->nhoff > data_end)
> +       keys->thoff += iph->ihl << 2;
> +       if (data + keys->thoff > data_end)
>                 return BPF_DROP;
>
>         if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
> @@ -285,7 +283,7 @@ PROG(IPV6)(struct __sk_buff *skb)
>         keys->addr_proto = ETH_P_IPV6;
>         memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
>
> -       keys->nhoff += sizeof(struct ipv6hdr);
> +       keys->thoff += sizeof(struct ipv6hdr);
>
>         return parse_ipv6_proto(skb, ip6h->nexthdr);
>  }
> @@ -301,7 +299,7 @@ PROG(IPV6OP)(struct __sk_buff *skb)
>         /* hlen is in 8-octets and does not include the first 8 bytes
>          * of the header
>          */
> -       skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3;
> +       skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3;
>
>         return parse_ipv6_proto(skb, ip6h->nexthdr);
>  }
> @@ -315,7 +313,7 @@ PROG(IPV6FR)(struct __sk_buff *skb)
>         if (!fragh)
>                 return BPF_DROP;
>
> -       keys->nhoff += sizeof(*fragh);
> +       keys->thoff += sizeof(*fragh);
>         keys->is_frag = true;
>         if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
>                 keys->is_first_frag = true;
> @@ -341,7 +339,7 @@ PROG(VLAN)(struct __sk_buff *skb)
>         __be16 proto;
>
>         /* Peek back to see if single or double-tagging */
> -       if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto,
> +       if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto,
>                                sizeof(proto)))
>                 return BPF_DROP;
>
> @@ -354,14 +352,14 @@ PROG(VLAN)(struct __sk_buff *skb)
>                 if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
>                         return BPF_DROP;
>
> -               keys->nhoff += sizeof(*vlan);
> +               keys->thoff += sizeof(*vlan);
>         }
>
>         vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
>         if (!vlan)
>                 return BPF_DROP;
>
> -       keys->nhoff += sizeof(*vlan);
> +       keys->thoff += sizeof(*vlan);
>         /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
>         if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
>             vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
> --
> 2.20.0.rc1.387.gf8505762e3-goog
>
Stanislav Fomichev Dec. 4, 2018, 11:26 p.m. UTC | #2
On 12/04, Song Liu wrote:
> On Mon, Dec 3, 2018 at 8:01 PM Stanislav Fomichev <sdf@google.com> wrote:
> >
> > We are returning thoff from the flow dissector, not the nhoff. Pass
> > thoff along with nhoff to the bpf program (initially thoff == nhoff)
> > and expect flow dissector amend/return thoff, not nhoff.
> >
> > This avoids confusion, when by the time bpf flow dissector exits,
> > nhoff == thoff, which doesn't make much sense (this is relevant
> > in the context of the next patch, where I add simple selftest
> > and manually construct expected flow_keys).
> >
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  net/core/flow_dissector.c              |  1 +
> >  tools/testing/selftests/bpf/bpf_flow.c | 36 ++++++++++++--------------
> >  2 files changed, 18 insertions(+), 19 deletions(-)
> >
> > diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> > index 3c8a78decbc0..ac19da6f390b 100644
> > --- a/net/core/flow_dissector.c
> > +++ b/net/core/flow_dissector.c
> > @@ -708,6 +708,7 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
> >         memset(flow_keys, 0, sizeof(*flow_keys));
> >         cb->qdisc_cb.flow_keys = flow_keys;
> >         flow_keys->nhoff = skb_network_offset(skb);
> > +       flow_keys->thoff = flow_keys->nhoff;
> 
> Do we need this fix without this set? If yes, do we need it for bpf
> tree as well?
No, I don't think so. This just changes input to the flow dissector
slightly (going forward).
It used to be nhoff in, thoff out. Now it's thoff in (with nhoff for
backwards compatibility) and thoff out.

> 
> Thanks,
> Song
> 
> >
> >         bpf_compute_data_pointers((struct sk_buff *)skb);
> >         result = BPF_PROG_RUN(prog, skb);
> > diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c
> > index b9798f558ca7..284660f5aa95 100644
> > --- a/tools/testing/selftests/bpf/bpf_flow.c
> > +++ b/tools/testing/selftests/bpf/bpf_flow.c
> > @@ -70,18 +70,18 @@ static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
> >  {
> >         void *data_end = (void *)(long)skb->data_end;
> >         void *data = (void *)(long)skb->data;
> > -       __u16 nhoff = skb->flow_keys->nhoff;
> > +       __u16 thoff = skb->flow_keys->thoff;
> >         __u8 *hdr;
> >
> >         /* Verifies this variable offset does not overflow */
> > -       if (nhoff > (USHRT_MAX - hdr_size))
> > +       if (thoff > (USHRT_MAX - hdr_size))
> >                 return NULL;
> >
> > -       hdr = data + nhoff;
> > +       hdr = data + thoff;
> >         if (hdr + hdr_size <= data_end)
> >                 return hdr;
> >
> > -       if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size))
> > +       if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
> >                 return NULL;
> >
> >         return buffer;
> > @@ -158,13 +158,13 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
> >                         /* Only inspect standard GRE packets with version 0 */
> >                         return BPF_OK;
> >
> > -               keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */
> > +               keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
> >                 if (GRE_IS_CSUM(gre->flags))
> > -                       keys->nhoff += 4; /* Step over chksum and Padding */
> > +                       keys->thoff += 4; /* Step over chksum and Padding */
> >                 if (GRE_IS_KEY(gre->flags))
> > -                       keys->nhoff += 4; /* Step over key */
> > +                       keys->thoff += 4; /* Step over key */
> >                 if (GRE_IS_SEQ(gre->flags))
> > -                       keys->nhoff += 4; /* Step over sequence number */
> > +                       keys->thoff += 4; /* Step over sequence number */
> >
> >                 keys->is_encap = true;
> >
> > @@ -174,7 +174,7 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
> >                         if (!eth)
> >                                 return BPF_DROP;
> >
> > -                       keys->nhoff += sizeof(*eth);
> > +                       keys->thoff += sizeof(*eth);
> >
> >                         return parse_eth_proto(skb, eth->h_proto);
> >                 } else {
> > @@ -191,7 +191,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
> >                 if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
> >                         return BPF_DROP;
> >
> > -               keys->thoff = keys->nhoff;
> >                 keys->sport = tcp->source;
> >                 keys->dport = tcp->dest;
> >                 return BPF_OK;
> > @@ -201,7 +200,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
> >                 if (!udp)
> >                         return BPF_DROP;
> >
> > -               keys->thoff = keys->nhoff;
> >                 keys->sport = udp->source;
> >                 keys->dport = udp->dest;
> >                 return BPF_OK;
> > @@ -252,8 +250,8 @@ PROG(IP)(struct __sk_buff *skb)
> >         keys->ipv4_src = iph->saddr;
> >         keys->ipv4_dst = iph->daddr;
> >
> > -       keys->nhoff += iph->ihl << 2;
> > -       if (data + keys->nhoff > data_end)
> > +       keys->thoff += iph->ihl << 2;
> > +       if (data + keys->thoff > data_end)
> >                 return BPF_DROP;
> >
> >         if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
> > @@ -285,7 +283,7 @@ PROG(IPV6)(struct __sk_buff *skb)
> >         keys->addr_proto = ETH_P_IPV6;
> >         memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
> >
> > -       keys->nhoff += sizeof(struct ipv6hdr);
> > +       keys->thoff += sizeof(struct ipv6hdr);
> >
> >         return parse_ipv6_proto(skb, ip6h->nexthdr);
> >  }
> > @@ -301,7 +299,7 @@ PROG(IPV6OP)(struct __sk_buff *skb)
> >         /* hlen is in 8-octets and does not include the first 8 bytes
> >          * of the header
> >          */
> > -       skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3;
> > +       skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3;
> >
> >         return parse_ipv6_proto(skb, ip6h->nexthdr);
> >  }
> > @@ -315,7 +313,7 @@ PROG(IPV6FR)(struct __sk_buff *skb)
> >         if (!fragh)
> >                 return BPF_DROP;
> >
> > -       keys->nhoff += sizeof(*fragh);
> > +       keys->thoff += sizeof(*fragh);
> >         keys->is_frag = true;
> >         if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
> >                 keys->is_first_frag = true;
> > @@ -341,7 +339,7 @@ PROG(VLAN)(struct __sk_buff *skb)
> >         __be16 proto;
> >
> >         /* Peek back to see if single or double-tagging */
> > -       if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto,
> > +       if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto,
> >                                sizeof(proto)))
> >                 return BPF_DROP;
> >
> > @@ -354,14 +352,14 @@ PROG(VLAN)(struct __sk_buff *skb)
> >                 if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
> >                         return BPF_DROP;
> >
> > -               keys->nhoff += sizeof(*vlan);
> > +               keys->thoff += sizeof(*vlan);
> >         }
> >
> >         vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
> >         if (!vlan)
> >                 return BPF_DROP;
> >
> > -       keys->nhoff += sizeof(*vlan);
> > +       keys->thoff += sizeof(*vlan);
> >         /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
> >         if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
> >             vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
> > --
> > 2.20.0.rc1.387.gf8505762e3-goog
> >
Song Liu Dec. 5, 2018, 5:53 p.m. UTC | #3
On Tue, Dec 4, 2018 at 3:26 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> On 12/04, Song Liu wrote:
> > On Mon, Dec 3, 2018 at 8:01 PM Stanislav Fomichev <sdf@google.com> wrote:
> > >
> > > We are returning thoff from the flow dissector, not the nhoff. Pass
> > > thoff along with nhoff to the bpf program (initially thoff == nhoff)
> > > and expect flow dissector amend/return thoff, not nhoff.
> > >
> > > This avoids confusion, when by the time bpf flow dissector exits,
> > > nhoff == thoff, which doesn't make much sense (this is relevant
> > > in the context of the next patch, where I add simple selftest
> > > and manually construct expected flow_keys).
> > >
> > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > ---
> > >  net/core/flow_dissector.c              |  1 +
> > >  tools/testing/selftests/bpf/bpf_flow.c | 36 ++++++++++++--------------
> > >  2 files changed, 18 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> > > index 3c8a78decbc0..ac19da6f390b 100644
> > > --- a/net/core/flow_dissector.c
> > > +++ b/net/core/flow_dissector.c
> > > @@ -708,6 +708,7 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
> > >         memset(flow_keys, 0, sizeof(*flow_keys));
> > >         cb->qdisc_cb.flow_keys = flow_keys;
> > >         flow_keys->nhoff = skb_network_offset(skb);
> > > +       flow_keys->thoff = flow_keys->nhoff;
> >
> > Do we need this fix without this set? If yes, do we need it for bpf
> > tree as well?
> No, I don't think so. This just changes input to the flow dissector
> slightly (going forward).
> It used to be nhoff in, thoff out. Now it's thoff in (with nhoff for
> backwards compatibility) and thoff out.

Thanks for the explanation.

Acked-by: Song Liu <songliubraving@fb.com>

>
> >
> > Thanks,
> > Song
> >
> > >
> > >         bpf_compute_data_pointers((struct sk_buff *)skb);
> > >         result = BPF_PROG_RUN(prog, skb);
> > > diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c
> > > index b9798f558ca7..284660f5aa95 100644
> > > --- a/tools/testing/selftests/bpf/bpf_flow.c
> > > +++ b/tools/testing/selftests/bpf/bpf_flow.c
> > > @@ -70,18 +70,18 @@ static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
> > >  {
> > >         void *data_end = (void *)(long)skb->data_end;
> > >         void *data = (void *)(long)skb->data;
> > > -       __u16 nhoff = skb->flow_keys->nhoff;
> > > +       __u16 thoff = skb->flow_keys->thoff;
> > >         __u8 *hdr;
> > >
> > >         /* Verifies this variable offset does not overflow */
> > > -       if (nhoff > (USHRT_MAX - hdr_size))
> > > +       if (thoff > (USHRT_MAX - hdr_size))
> > >                 return NULL;
> > >
> > > -       hdr = data + nhoff;
> > > +       hdr = data + thoff;
> > >         if (hdr + hdr_size <= data_end)
> > >                 return hdr;
> > >
> > > -       if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size))
> > > +       if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
> > >                 return NULL;
> > >
> > >         return buffer;
> > > @@ -158,13 +158,13 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
> > >                         /* Only inspect standard GRE packets with version 0 */
> > >                         return BPF_OK;
> > >
> > > -               keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */
> > > +               keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
> > >                 if (GRE_IS_CSUM(gre->flags))
> > > -                       keys->nhoff += 4; /* Step over chksum and Padding */
> > > +                       keys->thoff += 4; /* Step over chksum and Padding */
> > >                 if (GRE_IS_KEY(gre->flags))
> > > -                       keys->nhoff += 4; /* Step over key */
> > > +                       keys->thoff += 4; /* Step over key */
> > >                 if (GRE_IS_SEQ(gre->flags))
> > > -                       keys->nhoff += 4; /* Step over sequence number */
> > > +                       keys->thoff += 4; /* Step over sequence number */
> > >
> > >                 keys->is_encap = true;
> > >
> > > @@ -174,7 +174,7 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
> > >                         if (!eth)
> > >                                 return BPF_DROP;
> > >
> > > -                       keys->nhoff += sizeof(*eth);
> > > +                       keys->thoff += sizeof(*eth);
> > >
> > >                         return parse_eth_proto(skb, eth->h_proto);
> > >                 } else {
> > > @@ -191,7 +191,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
> > >                 if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
> > >                         return BPF_DROP;
> > >
> > > -               keys->thoff = keys->nhoff;
> > >                 keys->sport = tcp->source;
> > >                 keys->dport = tcp->dest;
> > >                 return BPF_OK;
> > > @@ -201,7 +200,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
> > >                 if (!udp)
> > >                         return BPF_DROP;
> > >
> > > -               keys->thoff = keys->nhoff;
> > >                 keys->sport = udp->source;
> > >                 keys->dport = udp->dest;
> > >                 return BPF_OK;
> > > @@ -252,8 +250,8 @@ PROG(IP)(struct __sk_buff *skb)
> > >         keys->ipv4_src = iph->saddr;
> > >         keys->ipv4_dst = iph->daddr;
> > >
> > > -       keys->nhoff += iph->ihl << 2;
> > > -       if (data + keys->nhoff > data_end)
> > > +       keys->thoff += iph->ihl << 2;
> > > +       if (data + keys->thoff > data_end)
> > >                 return BPF_DROP;
> > >
> > >         if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
> > > @@ -285,7 +283,7 @@ PROG(IPV6)(struct __sk_buff *skb)
> > >         keys->addr_proto = ETH_P_IPV6;
> > >         memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
> > >
> > > -       keys->nhoff += sizeof(struct ipv6hdr);
> > > +       keys->thoff += sizeof(struct ipv6hdr);
> > >
> > >         return parse_ipv6_proto(skb, ip6h->nexthdr);
> > >  }
> > > @@ -301,7 +299,7 @@ PROG(IPV6OP)(struct __sk_buff *skb)
> > >         /* hlen is in 8-octets and does not include the first 8 bytes
> > >          * of the header
> > >          */
> > > -       skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3;
> > > +       skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3;
> > >
> > >         return parse_ipv6_proto(skb, ip6h->nexthdr);
> > >  }
> > > @@ -315,7 +313,7 @@ PROG(IPV6FR)(struct __sk_buff *skb)
> > >         if (!fragh)
> > >                 return BPF_DROP;
> > >
> > > -       keys->nhoff += sizeof(*fragh);
> > > +       keys->thoff += sizeof(*fragh);
> > >         keys->is_frag = true;
> > >         if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
> > >                 keys->is_first_frag = true;
> > > @@ -341,7 +339,7 @@ PROG(VLAN)(struct __sk_buff *skb)
> > >         __be16 proto;
> > >
> > >         /* Peek back to see if single or double-tagging */
> > > -       if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto,
> > > +       if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto,
> > >                                sizeof(proto)))
> > >                 return BPF_DROP;
> > >
> > > @@ -354,14 +352,14 @@ PROG(VLAN)(struct __sk_buff *skb)
> > >                 if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
> > >                         return BPF_DROP;
> > >
> > > -               keys->nhoff += sizeof(*vlan);
> > > +               keys->thoff += sizeof(*vlan);
> > >         }
> > >
> > >         vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
> > >         if (!vlan)
> > >                 return BPF_DROP;
> > >
> > > -       keys->nhoff += sizeof(*vlan);
> > > +       keys->thoff += sizeof(*vlan);
> > >         /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
> > >         if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
> > >             vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
> > > --
> > > 2.20.0.rc1.387.gf8505762e3-goog
> > >
Alexei Starovoitov Dec. 6, 2018, 2:47 a.m. UTC | #4
On Tue, Dec 04, 2018 at 03:26:15PM -0800, Stanislav Fomichev wrote:
> On 12/04, Song Liu wrote:
> > On Mon, Dec 3, 2018 at 8:01 PM Stanislav Fomichev <sdf@google.com> wrote:
> > >
> > > We are returning thoff from the flow dissector, not the nhoff. Pass
> > > thoff along with nhoff to the bpf program (initially thoff == nhoff)
> > > and expect flow dissector amend/return thoff, not nhoff.
> > >
> > > This avoids confusion, when by the time bpf flow dissector exits,
> > > nhoff == thoff, which doesn't make much sense (this is relevant
> > > in the context of the next patch, where I add simple selftest
> > > and manually construct expected flow_keys).
> > >
> > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > ---
> > >  net/core/flow_dissector.c              |  1 +
> > >  tools/testing/selftests/bpf/bpf_flow.c | 36 ++++++++++++--------------
> > >  2 files changed, 18 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> > > index 3c8a78decbc0..ac19da6f390b 100644
> > > --- a/net/core/flow_dissector.c
> > > +++ b/net/core/flow_dissector.c
> > > @@ -708,6 +708,7 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
> > >         memset(flow_keys, 0, sizeof(*flow_keys));
> > >         cb->qdisc_cb.flow_keys = flow_keys;
> > >         flow_keys->nhoff = skb_network_offset(skb);
> > > +       flow_keys->thoff = flow_keys->nhoff;
> > 
> > Do we need this fix without this set? If yes, do we need it for bpf
> > tree as well?
> No, I don't think so. This just changes input to the flow dissector
> slightly (going forward).
> It used to be nhoff in, thoff out. Now it's thoff in (with nhoff for
> backwards compatibility) and thoff out.

That is still an api change.
Also patch 4 is a fix.
I think patches 3 and 4 need to go into bpf tree first.
Then wait for them to merge into bpf-next and resubmit the rest.
diff mbox series

Patch

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3c8a78decbc0..ac19da6f390b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -708,6 +708,7 @@  bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 	memset(flow_keys, 0, sizeof(*flow_keys));
 	cb->qdisc_cb.flow_keys = flow_keys;
 	flow_keys->nhoff = skb_network_offset(skb);
+	flow_keys->thoff = flow_keys->nhoff;
 
 	bpf_compute_data_pointers((struct sk_buff *)skb);
 	result = BPF_PROG_RUN(prog, skb);
diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c
index b9798f558ca7..284660f5aa95 100644
--- a/tools/testing/selftests/bpf/bpf_flow.c
+++ b/tools/testing/selftests/bpf/bpf_flow.c
@@ -70,18 +70,18 @@  static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
 {
 	void *data_end = (void *)(long)skb->data_end;
 	void *data = (void *)(long)skb->data;
-	__u16 nhoff = skb->flow_keys->nhoff;
+	__u16 thoff = skb->flow_keys->thoff;
 	__u8 *hdr;
 
 	/* Verifies this variable offset does not overflow */
-	if (nhoff > (USHRT_MAX - hdr_size))
+	if (thoff > (USHRT_MAX - hdr_size))
 		return NULL;
 
-	hdr = data + nhoff;
+	hdr = data + thoff;
 	if (hdr + hdr_size <= data_end)
 		return hdr;
 
-	if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size))
+	if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
 		return NULL;
 
 	return buffer;
@@ -158,13 +158,13 @@  static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
 			/* Only inspect standard GRE packets with version 0 */
 			return BPF_OK;
 
-		keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */
+		keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
 		if (GRE_IS_CSUM(gre->flags))
-			keys->nhoff += 4; /* Step over chksum and Padding */
+			keys->thoff += 4; /* Step over chksum and Padding */
 		if (GRE_IS_KEY(gre->flags))
-			keys->nhoff += 4; /* Step over key */
+			keys->thoff += 4; /* Step over key */
 		if (GRE_IS_SEQ(gre->flags))
-			keys->nhoff += 4; /* Step over sequence number */
+			keys->thoff += 4; /* Step over sequence number */
 
 		keys->is_encap = true;
 
@@ -174,7 +174,7 @@  static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
 			if (!eth)
 				return BPF_DROP;
 
-			keys->nhoff += sizeof(*eth);
+			keys->thoff += sizeof(*eth);
 
 			return parse_eth_proto(skb, eth->h_proto);
 		} else {
@@ -191,7 +191,6 @@  static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
 		if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
 			return BPF_DROP;
 
-		keys->thoff = keys->nhoff;
 		keys->sport = tcp->source;
 		keys->dport = tcp->dest;
 		return BPF_OK;
@@ -201,7 +200,6 @@  static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
 		if (!udp)
 			return BPF_DROP;
 
-		keys->thoff = keys->nhoff;
 		keys->sport = udp->source;
 		keys->dport = udp->dest;
 		return BPF_OK;
@@ -252,8 +250,8 @@  PROG(IP)(struct __sk_buff *skb)
 	keys->ipv4_src = iph->saddr;
 	keys->ipv4_dst = iph->daddr;
 
-	keys->nhoff += iph->ihl << 2;
-	if (data + keys->nhoff > data_end)
+	keys->thoff += iph->ihl << 2;
+	if (data + keys->thoff > data_end)
 		return BPF_DROP;
 
 	if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
@@ -285,7 +283,7 @@  PROG(IPV6)(struct __sk_buff *skb)
 	keys->addr_proto = ETH_P_IPV6;
 	memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
 
-	keys->nhoff += sizeof(struct ipv6hdr);
+	keys->thoff += sizeof(struct ipv6hdr);
 
 	return parse_ipv6_proto(skb, ip6h->nexthdr);
 }
@@ -301,7 +299,7 @@  PROG(IPV6OP)(struct __sk_buff *skb)
 	/* hlen is in 8-octets and does not include the first 8 bytes
 	 * of the header
 	 */
-	skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3;
+	skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3;
 
 	return parse_ipv6_proto(skb, ip6h->nexthdr);
 }
@@ -315,7 +313,7 @@  PROG(IPV6FR)(struct __sk_buff *skb)
 	if (!fragh)
 		return BPF_DROP;
 
-	keys->nhoff += sizeof(*fragh);
+	keys->thoff += sizeof(*fragh);
 	keys->is_frag = true;
 	if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
 		keys->is_first_frag = true;
@@ -341,7 +339,7 @@  PROG(VLAN)(struct __sk_buff *skb)
 	__be16 proto;
 
 	/* Peek back to see if single or double-tagging */
-	if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto,
+	if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto,
 			       sizeof(proto)))
 		return BPF_DROP;
 
@@ -354,14 +352,14 @@  PROG(VLAN)(struct __sk_buff *skb)
 		if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
 			return BPF_DROP;
 
-		keys->nhoff += sizeof(*vlan);
+		keys->thoff += sizeof(*vlan);
 	}
 
 	vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
 	if (!vlan)
 		return BPF_DROP;
 
-	keys->nhoff += sizeof(*vlan);
+	keys->thoff += sizeof(*vlan);
 	/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
 	if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
 	    vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))