Message ID | 156042464155.25684.9001494922674130772.stgit@alrua-x1 |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | xdp: Allow lookup into devmaps before redirect | expand |
On Thu, Jun 13, 2019 at 8:31 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote: > > From: Toke Høiland-Jørgensen <toke@redhat.com> > > The bpf_redirect_map() helper used by XDP programs doesn't return any > indication of whether it can successfully redirect to the map index it was > given. Instead, BPF programs have to track this themselves, leading to > programs using duplicate maps to track which entries are populated in the > devmap. > > This patch fixes this by moving the map lookup into the bpf_redirect_map() > helper, which makes it possible to return failure to the eBPF program. The > lower bits of the flags argument is used as the return code, which means > that existing users who pass a '0' flag argument will get XDP_ABORTED. I see that we have absolutely no documentation for bpf_xdp_redirect_map. Can you please add it to include/uapi/linux/bpf.h? Don't forget to mention this handling of lower bits of flags. Thanks! > > With this, a BPF program can check the return code from the helper call and > react by, for instance, substituting a different redirect. This works for > any type of map used for redirect. > > Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> > --- > include/linux/filter.h | 1 + > net/core/filter.c | 27 +++++++++++++-------------- > 2 files changed, 14 insertions(+), 14 deletions(-) > > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 43b45d6db36d..f31ae8b9035a 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -580,6 +580,7 @@ struct bpf_skb_data_end { > struct bpf_redirect_info { > u32 ifindex; > u32 flags; > + void *item; This is so generic name that some short comment describing what that item is would help a lot. > struct bpf_map *map; > struct bpf_map *map_to_flush; > u32 kern_flags; > diff --git a/net/core/filter.c b/net/core/filter.c > index 7a996887c500..7d742ea61e2d 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -3608,17 +3608,13 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, > struct bpf_redirect_info *ri) > { > u32 index = ri->ifindex; > - void *fwd = NULL; > + void *fwd = ri->item; > int err; > > ri->ifindex = 0; > + ri->item = NULL; > WRITE_ONCE(ri->map, NULL); > > - fwd = __xdp_map_lookup_elem(map, index); > - if (unlikely(!fwd)) { > - err = -EINVAL; > - goto err; > - } > if (ri->map_to_flush && unlikely(ri->map_to_flush != map)) > xdp_do_flush_map(); > > @@ -3655,18 +3651,13 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, > { > struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); > u32 index = ri->ifindex; > - void *fwd = NULL; > + void *fwd = ri->item; > int err = 0; > > ri->ifindex = 0; > + ri->item = NULL; > WRITE_ONCE(ri->map, NULL); > > - fwd = __xdp_map_lookup_elem(map, index); > - if (unlikely(!fwd)) { > - err = -EINVAL; > - goto err; > - } > - > if (map->map_type == BPF_MAP_TYPE_DEVMAP) { > struct bpf_dtab_netdev *dst = fwd; > > @@ -3735,6 +3726,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) > > ri->ifindex = ifindex; > ri->flags = flags; > + ri->item = NULL; > WRITE_ONCE(ri->map, NULL); > > return XDP_REDIRECT; > @@ -3753,9 +3745,16 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, > { > struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); > > - if (unlikely(flags)) > + /* Lower bits of the flags are used as return code on lookup failure */ > + if (unlikely(flags > XDP_TX)) > return XDP_ABORTED; > > + ri->item = __xdp_map_lookup_elem(map, ifindex); > + if (unlikely(!ri->item)) { > + WRITE_ONCE(ri->map, NULL); > + return flags; > + } > + > ri->ifindex = ifindex; > ri->flags = flags; > WRITE_ONCE(ri->map, map); >
Andrii Nakryiko <andrii.nakryiko@gmail.com> writes: > On Thu, Jun 13, 2019 at 8:31 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote: >> >> From: Toke Høiland-Jørgensen <toke@redhat.com> >> >> The bpf_redirect_map() helper used by XDP programs doesn't return any >> indication of whether it can successfully redirect to the map index it was >> given. Instead, BPF programs have to track this themselves, leading to >> programs using duplicate maps to track which entries are populated in the >> devmap. >> >> This patch fixes this by moving the map lookup into the bpf_redirect_map() >> helper, which makes it possible to return failure to the eBPF program. The >> lower bits of the flags argument is used as the return code, which means >> that existing users who pass a '0' flag argument will get XDP_ABORTED. > > I see that we have absolutely no documentation for > bpf_xdp_redirect_map. Can you please add it to > include/uapi/linux/bpf.h? Don't forget to mention this handling of > lower bits of flags. Thanks! Can do. >> With this, a BPF program can check the return code from the helper call and >> react by, for instance, substituting a different redirect. This works for >> any type of map used for redirect. >> >> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> >> --- >> include/linux/filter.h | 1 + >> net/core/filter.c | 27 +++++++++++++-------------- >> 2 files changed, 14 insertions(+), 14 deletions(-) >> >> diff --git a/include/linux/filter.h b/include/linux/filter.h >> index 43b45d6db36d..f31ae8b9035a 100644 >> --- a/include/linux/filter.h >> +++ b/include/linux/filter.h >> @@ -580,6 +580,7 @@ struct bpf_skb_data_end { >> struct bpf_redirect_info { >> u32 ifindex; >> u32 flags; >> + void *item; > > This is so generic name that some short comment describing what that > item is would help a lot. Can also just rename it to something more descriptive? Like 'map_entry? > >> struct bpf_map *map; >> struct bpf_map *map_to_flush; >> u32 kern_flags; >> diff --git a/net/core/filter.c b/net/core/filter.c >> index 7a996887c500..7d742ea61e2d 100644 >> --- a/net/core/filter.c >> +++ b/net/core/filter.c >> @@ -3608,17 +3608,13 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, >> struct bpf_redirect_info *ri) >> { >> u32 index = ri->ifindex; >> - void *fwd = NULL; >> + void *fwd = ri->item; >> int err; >> >> ri->ifindex = 0; >> + ri->item = NULL; >> WRITE_ONCE(ri->map, NULL); >> >> - fwd = __xdp_map_lookup_elem(map, index); >> - if (unlikely(!fwd)) { >> - err = -EINVAL; >> - goto err; >> - } >> if (ri->map_to_flush && unlikely(ri->map_to_flush != map)) >> xdp_do_flush_map(); >> >> @@ -3655,18 +3651,13 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, >> { >> struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); >> u32 index = ri->ifindex; >> - void *fwd = NULL; >> + void *fwd = ri->item; >> int err = 0; >> >> ri->ifindex = 0; >> + ri->item = NULL; >> WRITE_ONCE(ri->map, NULL); >> >> - fwd = __xdp_map_lookup_elem(map, index); >> - if (unlikely(!fwd)) { >> - err = -EINVAL; >> - goto err; >> - } >> - >> if (map->map_type == BPF_MAP_TYPE_DEVMAP) { >> struct bpf_dtab_netdev *dst = fwd; >> >> @@ -3735,6 +3726,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) >> >> ri->ifindex = ifindex; >> ri->flags = flags; >> + ri->item = NULL; >> WRITE_ONCE(ri->map, NULL); >> >> return XDP_REDIRECT; >> @@ -3753,9 +3745,16 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, >> { >> struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); >> >> - if (unlikely(flags)) >> + /* Lower bits of the flags are used as return code on lookup failure */ >> + if (unlikely(flags > XDP_TX)) >> return XDP_ABORTED; >> >> + ri->item = __xdp_map_lookup_elem(map, ifindex); >> + if (unlikely(!ri->item)) { >> + WRITE_ONCE(ri->map, NULL); >> + return flags; >> + } >> + >> ri->ifindex = ifindex; >> ri->flags = flags; >> WRITE_ONCE(ri->map, map); >>
diff --git a/include/linux/filter.h b/include/linux/filter.h index 43b45d6db36d..f31ae8b9035a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -580,6 +580,7 @@ struct bpf_skb_data_end { struct bpf_redirect_info { u32 ifindex; u32 flags; + void *item; struct bpf_map *map; struct bpf_map *map_to_flush; u32 kern_flags; diff --git a/net/core/filter.c b/net/core/filter.c index 7a996887c500..7d742ea61e2d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3608,17 +3608,13 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, struct bpf_redirect_info *ri) { u32 index = ri->ifindex; - void *fwd = NULL; + void *fwd = ri->item; int err; ri->ifindex = 0; + ri->item = NULL; WRITE_ONCE(ri->map, NULL); - fwd = __xdp_map_lookup_elem(map, index); - if (unlikely(!fwd)) { - err = -EINVAL; - goto err; - } if (ri->map_to_flush && unlikely(ri->map_to_flush != map)) xdp_do_flush_map(); @@ -3655,18 +3651,13 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); u32 index = ri->ifindex; - void *fwd = NULL; + void *fwd = ri->item; int err = 0; ri->ifindex = 0; + ri->item = NULL; WRITE_ONCE(ri->map, NULL); - fwd = __xdp_map_lookup_elem(map, index); - if (unlikely(!fwd)) { - err = -EINVAL; - goto err; - } - if (map->map_type == BPF_MAP_TYPE_DEVMAP) { struct bpf_dtab_netdev *dst = fwd; @@ -3735,6 +3726,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) ri->ifindex = ifindex; ri->flags = flags; + ri->item = NULL; WRITE_ONCE(ri->map, NULL); return XDP_REDIRECT; @@ -3753,9 +3745,16 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - if (unlikely(flags)) + /* Lower bits of the flags are used as return code on lookup failure */ + if (unlikely(flags > XDP_TX)) return XDP_ABORTED; + ri->item = __xdp_map_lookup_elem(map, ifindex); + if (unlikely(!ri->item)) { + WRITE_ONCE(ri->map, NULL); + return flags; + } + ri->ifindex = ifindex; ri->flags = flags; WRITE_ONCE(ri->map, map);