diff mbox series

[bpf-next,v4,2/3] bpf_xdp_redirect_map: Perform map lookup in eBPF helper

Message ID 156042464155.25684.9001494922674130772.stgit@alrua-x1
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series xdp: Allow lookup into devmaps before redirect | expand

Commit Message

Toke Høiland-Jørgensen June 13, 2019, 11:17 a.m. UTC
From: Toke Høiland-Jørgensen <toke@redhat.com>

The bpf_redirect_map() helper used by XDP programs doesn't return any
indication of whether it can successfully redirect to the map index it was
given. Instead, BPF programs have to track this themselves, leading to
programs using duplicate maps to track which entries are populated in the
devmap.

This patch fixes this by moving the map lookup into the bpf_redirect_map()
helper, which makes it possible to return failure to the eBPF program. The
lower bits of the flags argument is used as the return code, which means
that existing users who pass a '0' flag argument will get XDP_ABORTED.

With this, a BPF program can check the return code from the helper call and
react by, for instance, substituting a different redirect. This works for
any type of map used for redirect.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
 include/linux/filter.h |    1 +
 net/core/filter.c      |   27 +++++++++++++--------------
 2 files changed, 14 insertions(+), 14 deletions(-)

Comments

Andrii Nakryiko June 14, 2019, 11:07 p.m. UTC | #1
On Thu, Jun 13, 2019 at 8:31 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>
> From: Toke Høiland-Jørgensen <toke@redhat.com>
>
> The bpf_redirect_map() helper used by XDP programs doesn't return any
> indication of whether it can successfully redirect to the map index it was
> given. Instead, BPF programs have to track this themselves, leading to
> programs using duplicate maps to track which entries are populated in the
> devmap.
>
> This patch fixes this by moving the map lookup into the bpf_redirect_map()
> helper, which makes it possible to return failure to the eBPF program. The
> lower bits of the flags argument is used as the return code, which means
> that existing users who pass a '0' flag argument will get XDP_ABORTED.

I see that we have absolutely no documentation for
bpf_xdp_redirect_map. Can you please add it to
include/uapi/linux/bpf.h? Don't forget to mention this handling of
lower bits of flags. Thanks!

>
> With this, a BPF program can check the return code from the helper call and
> react by, for instance, substituting a different redirect. This works for
> any type of map used for redirect.
>
> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
> ---
>  include/linux/filter.h |    1 +
>  net/core/filter.c      |   27 +++++++++++++--------------
>  2 files changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 43b45d6db36d..f31ae8b9035a 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -580,6 +580,7 @@ struct bpf_skb_data_end {
>  struct bpf_redirect_info {
>         u32 ifindex;
>         u32 flags;
> +       void *item;

This is so generic name that some short comment describing what that
item is would help a lot.

>         struct bpf_map *map;
>         struct bpf_map *map_to_flush;
>         u32 kern_flags;
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 7a996887c500..7d742ea61e2d 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3608,17 +3608,13 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
>                                struct bpf_redirect_info *ri)
>  {
>         u32 index = ri->ifindex;
> -       void *fwd = NULL;
> +       void *fwd = ri->item;
>         int err;
>
>         ri->ifindex = 0;
> +       ri->item = NULL;
>         WRITE_ONCE(ri->map, NULL);
>
> -       fwd = __xdp_map_lookup_elem(map, index);
> -       if (unlikely(!fwd)) {
> -               err = -EINVAL;
> -               goto err;
> -       }
>         if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
>                 xdp_do_flush_map();
>
> @@ -3655,18 +3651,13 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
>  {
>         struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>         u32 index = ri->ifindex;
> -       void *fwd = NULL;
> +       void *fwd = ri->item;
>         int err = 0;
>
>         ri->ifindex = 0;
> +       ri->item = NULL;
>         WRITE_ONCE(ri->map, NULL);
>
> -       fwd = __xdp_map_lookup_elem(map, index);
> -       if (unlikely(!fwd)) {
> -               err = -EINVAL;
> -               goto err;
> -       }
> -
>         if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
>                 struct bpf_dtab_netdev *dst = fwd;
>
> @@ -3735,6 +3726,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
>
>         ri->ifindex = ifindex;
>         ri->flags = flags;
> +       ri->item = NULL;
>         WRITE_ONCE(ri->map, NULL);
>
>         return XDP_REDIRECT;
> @@ -3753,9 +3745,16 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
>  {
>         struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>
> -       if (unlikely(flags))
> +       /* Lower bits of the flags are used as return code on lookup failure */
> +       if (unlikely(flags > XDP_TX))
>                 return XDP_ABORTED;
>
> +       ri->item = __xdp_map_lookup_elem(map, ifindex);
> +       if (unlikely(!ri->item)) {
> +               WRITE_ONCE(ri->map, NULL);
> +               return flags;
> +       }
> +
>         ri->ifindex = ifindex;
>         ri->flags = flags;
>         WRITE_ONCE(ri->map, map);
>
Toke Høiland-Jørgensen June 15, 2019, 10:11 a.m. UTC | #2
Andrii Nakryiko <andrii.nakryiko@gmail.com> writes:

> On Thu, Jun 13, 2019 at 8:31 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>>
>> From: Toke Høiland-Jørgensen <toke@redhat.com>
>>
>> The bpf_redirect_map() helper used by XDP programs doesn't return any
>> indication of whether it can successfully redirect to the map index it was
>> given. Instead, BPF programs have to track this themselves, leading to
>> programs using duplicate maps to track which entries are populated in the
>> devmap.
>>
>> This patch fixes this by moving the map lookup into the bpf_redirect_map()
>> helper, which makes it possible to return failure to the eBPF program. The
>> lower bits of the flags argument is used as the return code, which means
>> that existing users who pass a '0' flag argument will get XDP_ABORTED.
>
> I see that we have absolutely no documentation for
> bpf_xdp_redirect_map. Can you please add it to
> include/uapi/linux/bpf.h? Don't forget to mention this handling of
> lower bits of flags. Thanks!

Can do.

>> With this, a BPF program can check the return code from the helper call and
>> react by, for instance, substituting a different redirect. This works for
>> any type of map used for redirect.
>>
>> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
>> ---
>>  include/linux/filter.h |    1 +
>>  net/core/filter.c      |   27 +++++++++++++--------------
>>  2 files changed, 14 insertions(+), 14 deletions(-)
>>
>> diff --git a/include/linux/filter.h b/include/linux/filter.h
>> index 43b45d6db36d..f31ae8b9035a 100644
>> --- a/include/linux/filter.h
>> +++ b/include/linux/filter.h
>> @@ -580,6 +580,7 @@ struct bpf_skb_data_end {
>>  struct bpf_redirect_info {
>>         u32 ifindex;
>>         u32 flags;
>> +       void *item;
>
> This is so generic name that some short comment describing what that
> item is would help a lot.

Can also just rename it to something more descriptive? Like 'map_entry?

>
>>         struct bpf_map *map;
>>         struct bpf_map *map_to_flush;
>>         u32 kern_flags;
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 7a996887c500..7d742ea61e2d 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -3608,17 +3608,13 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
>>                                struct bpf_redirect_info *ri)
>>  {
>>         u32 index = ri->ifindex;
>> -       void *fwd = NULL;
>> +       void *fwd = ri->item;
>>         int err;
>>
>>         ri->ifindex = 0;
>> +       ri->item = NULL;
>>         WRITE_ONCE(ri->map, NULL);
>>
>> -       fwd = __xdp_map_lookup_elem(map, index);
>> -       if (unlikely(!fwd)) {
>> -               err = -EINVAL;
>> -               goto err;
>> -       }
>>         if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
>>                 xdp_do_flush_map();
>>
>> @@ -3655,18 +3651,13 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
>>  {
>>         struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>>         u32 index = ri->ifindex;
>> -       void *fwd = NULL;
>> +       void *fwd = ri->item;
>>         int err = 0;
>>
>>         ri->ifindex = 0;
>> +       ri->item = NULL;
>>         WRITE_ONCE(ri->map, NULL);
>>
>> -       fwd = __xdp_map_lookup_elem(map, index);
>> -       if (unlikely(!fwd)) {
>> -               err = -EINVAL;
>> -               goto err;
>> -       }
>> -
>>         if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
>>                 struct bpf_dtab_netdev *dst = fwd;
>>
>> @@ -3735,6 +3726,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
>>
>>         ri->ifindex = ifindex;
>>         ri->flags = flags;
>> +       ri->item = NULL;
>>         WRITE_ONCE(ri->map, NULL);
>>
>>         return XDP_REDIRECT;
>> @@ -3753,9 +3745,16 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
>>  {
>>         struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>>
>> -       if (unlikely(flags))
>> +       /* Lower bits of the flags are used as return code on lookup failure */
>> +       if (unlikely(flags > XDP_TX))
>>                 return XDP_ABORTED;
>>
>> +       ri->item = __xdp_map_lookup_elem(map, ifindex);
>> +       if (unlikely(!ri->item)) {
>> +               WRITE_ONCE(ri->map, NULL);
>> +               return flags;
>> +       }
>> +
>>         ri->ifindex = ifindex;
>>         ri->flags = flags;
>>         WRITE_ONCE(ri->map, map);
>>
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 43b45d6db36d..f31ae8b9035a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -580,6 +580,7 @@  struct bpf_skb_data_end {
 struct bpf_redirect_info {
 	u32 ifindex;
 	u32 flags;
+	void *item;
 	struct bpf_map *map;
 	struct bpf_map *map_to_flush;
 	u32 kern_flags;
diff --git a/net/core/filter.c b/net/core/filter.c
index 7a996887c500..7d742ea61e2d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3608,17 +3608,13 @@  static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 			       struct bpf_redirect_info *ri)
 {
 	u32 index = ri->ifindex;
-	void *fwd = NULL;
+	void *fwd = ri->item;
 	int err;
 
 	ri->ifindex = 0;
+	ri->item = NULL;
 	WRITE_ONCE(ri->map, NULL);
 
-	fwd = __xdp_map_lookup_elem(map, index);
-	if (unlikely(!fwd)) {
-		err = -EINVAL;
-		goto err;
-	}
 	if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
 		xdp_do_flush_map();
 
@@ -3655,18 +3651,13 @@  static int xdp_do_generic_redirect_map(struct net_device *dev,
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	u32 index = ri->ifindex;
-	void *fwd = NULL;
+	void *fwd = ri->item;
 	int err = 0;
 
 	ri->ifindex = 0;
+	ri->item = NULL;
 	WRITE_ONCE(ri->map, NULL);
 
-	fwd = __xdp_map_lookup_elem(map, index);
-	if (unlikely(!fwd)) {
-		err = -EINVAL;
-		goto err;
-	}
-
 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
 		struct bpf_dtab_netdev *dst = fwd;
 
@@ -3735,6 +3726,7 @@  BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 
 	ri->ifindex = ifindex;
 	ri->flags = flags;
+	ri->item = NULL;
 	WRITE_ONCE(ri->map, NULL);
 
 	return XDP_REDIRECT;
@@ -3753,9 +3745,16 @@  BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
-	if (unlikely(flags))
+	/* Lower bits of the flags are used as return code on lookup failure */
+	if (unlikely(flags > XDP_TX))
 		return XDP_ABORTED;
 
+	ri->item = __xdp_map_lookup_elem(map, ifindex);
+	if (unlikely(!ri->item)) {
+		WRITE_ONCE(ri->map, NULL);
+		return flags;
+	}
+
 	ri->ifindex = ifindex;
 	ri->flags = flags;
 	WRITE_ONCE(ri->map, map);