[bpf-next,v3,2/3] bpf_xdp_redirect_map: Perform map lookup in eBPF helper
diff mbox series

Message ID 156026784011.26748.7290735899755011809.stgit@alrua-x1
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series
  • xdp: Allow lookup into devmaps before redirect
Related show

Commit Message

Toke Høiland-Jørgensen June 11, 2019, 3:44 p.m. UTC
From: Toke Høiland-Jørgensen <toke@redhat.com>

The bpf_redirect_map() helper used by XDP programs doesn't return any
indication of whether it can successfully redirect to the map index it was
given. Instead, BPF programs have to track this themselves, leading to
programs using duplicate maps to track which entries are populated in the
devmap.

This patch fixes this by moving the map lookup into the bpf_redirect_map()
helper, which makes it possible to return failure to the eBPF program. The
lower bits of the flags argument is used as the return code, which means
that existing users who pass a '0' flag argument will get XDP_ABORTED.

With this, a BPF program can check the return code from the helper call and
react by, for instance, substituting a different redirect. This works for
any type of map used for redirect.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
 include/linux/filter.h   |    1 +
 include/uapi/linux/bpf.h |    8 ++++++++
 net/core/filter.c        |   26 ++++++++++++--------------
 3 files changed, 21 insertions(+), 14 deletions(-)

Comments

Jesper Dangaard Brouer June 11, 2019, 6 p.m. UTC | #1
On Tue, 11 Jun 2019 17:44:00 +0200
Toke Høiland-Jørgensen <toke@redhat.com> wrote:

> From: Toke Høiland-Jørgensen <toke@redhat.com>
> 
> The bpf_redirect_map() helper used by XDP programs doesn't return any
> indication of whether it can successfully redirect to the map index it was
> given. Instead, BPF programs have to track this themselves, leading to
> programs using duplicate maps to track which entries are populated in the
> devmap.
> 
> This patch fixes this by moving the map lookup into the bpf_redirect_map()
> helper, which makes it possible to return failure to the eBPF program. The
> lower bits of the flags argument is used as the return code, which means
> that existing users who pass a '0' flag argument will get XDP_ABORTED.
> 
> With this, a BPF program can check the return code from the helper call and
> react by, for instance, substituting a different redirect. This works for
> any type of map used for redirect.
> 
> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
> ---
>  include/linux/filter.h   |    1 +
>  include/uapi/linux/bpf.h |    8 ++++++++
>  net/core/filter.c        |   26 ++++++++++++--------------
>  3 files changed, 21 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 43b45d6db36d..f31ae8b9035a 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -580,6 +580,7 @@ struct bpf_skb_data_end {
>  struct bpf_redirect_info {
>  	u32 ifindex;
>  	u32 flags;
> +	void *item;
>  	struct bpf_map *map;
>  	struct bpf_map *map_to_flush;
>  	u32 kern_flags;
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 7c6aef253173..9931cf02de19 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3098,6 +3098,14 @@ enum xdp_action {
>  	XDP_REDIRECT,
>  };
>  
> +/* Flags for bpf_xdp_redirect_map helper */
> +
> +/* The lower flag bits will be the return code of bpf_xdp_redirect_map() helper
> + * if the map lookup fails.
> + */
> +#define XDP_REDIRECT_INVALID_MASK (XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX)
> +#define XDP_REDIRECT_ALL_FLAGS XDP_REDIRECT_INVALID_MASK
> +

Slightly confused about the naming of the define, see later.

>  /* user accessible metadata for XDP packet hook
>   * new fields must be added to the end of this structure
>   */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 7a996887c500..dd43be497480 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3608,17 +3608,13 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
>  			       struct bpf_redirect_info *ri)
>  {
>  	u32 index = ri->ifindex;
> -	void *fwd = NULL;
> +	void *fwd = ri->item;
>  	int err;
>  
>  	ri->ifindex = 0;
> +	ri->item = NULL;
>  	WRITE_ONCE(ri->map, NULL);
>  
> -	fwd = __xdp_map_lookup_elem(map, index);
> -	if (unlikely(!fwd)) {
> -		err = -EINVAL;
> -		goto err;
> -	}
>  	if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
>  		xdp_do_flush_map();
>  
> @@ -3655,18 +3651,13 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
>  {
>  	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>  	u32 index = ri->ifindex;
> -	void *fwd = NULL;
> +	void *fwd = ri->item;
>  	int err = 0;
>  
>  	ri->ifindex = 0;
> +	ri->item = NULL;
>  	WRITE_ONCE(ri->map, NULL);
>  
> -	fwd = __xdp_map_lookup_elem(map, index);
> -	if (unlikely(!fwd)) {
> -		err = -EINVAL;
> -		goto err;
> -	}
> -
>  	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
>  		struct bpf_dtab_netdev *dst = fwd;
>  
> @@ -3735,6 +3726,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
>  
>  	ri->ifindex = ifindex;
>  	ri->flags = flags;
> +	ri->item = NULL;
>  	WRITE_ONCE(ri->map, NULL);
>  
>  	return XDP_REDIRECT;
> @@ -3753,9 +3745,15 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
>  {
>  	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>  
> -	if (unlikely(flags))
> +	if (unlikely(flags & ~XDP_REDIRECT_ALL_FLAGS))
>  		return XDP_ABORTED;
>  
> +	ri->item = __xdp_map_lookup_elem(map, ifindex);
> +	if (unlikely(!ri->item)) {
> +		WRITE_ONCE(ri->map, NULL);
> +		return (flags & XDP_REDIRECT_INVALID_MASK);

Maybe I'm reading it wrong, but shouldn't the mask be called the "valid" mask?

> +	}
> +
>  	ri->ifindex = ifindex;
>  	ri->flags = flags;
>  	WRITE_ONCE(ri->map, map);
>
Toke Høiland-Jørgensen June 11, 2019, 6:17 p.m. UTC | #2
Jesper Dangaard Brouer <brouer@redhat.com> writes:

> On Tue, 11 Jun 2019 17:44:00 +0200
> Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>
>> From: Toke Høiland-Jørgensen <toke@redhat.com>
>> 
>> The bpf_redirect_map() helper used by XDP programs doesn't return any
>> indication of whether it can successfully redirect to the map index it was
>> given. Instead, BPF programs have to track this themselves, leading to
>> programs using duplicate maps to track which entries are populated in the
>> devmap.
>> 
>> This patch fixes this by moving the map lookup into the bpf_redirect_map()
>> helper, which makes it possible to return failure to the eBPF program. The
>> lower bits of the flags argument is used as the return code, which means
>> that existing users who pass a '0' flag argument will get XDP_ABORTED.
>> 
>> With this, a BPF program can check the return code from the helper call and
>> react by, for instance, substituting a different redirect. This works for
>> any type of map used for redirect.
>> 
>> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
>> ---
>>  include/linux/filter.h   |    1 +
>>  include/uapi/linux/bpf.h |    8 ++++++++
>>  net/core/filter.c        |   26 ++++++++++++--------------
>>  3 files changed, 21 insertions(+), 14 deletions(-)
>> 
>> diff --git a/include/linux/filter.h b/include/linux/filter.h
>> index 43b45d6db36d..f31ae8b9035a 100644
>> --- a/include/linux/filter.h
>> +++ b/include/linux/filter.h
>> @@ -580,6 +580,7 @@ struct bpf_skb_data_end {
>>  struct bpf_redirect_info {
>>  	u32 ifindex;
>>  	u32 flags;
>> +	void *item;
>>  	struct bpf_map *map;
>>  	struct bpf_map *map_to_flush;
>>  	u32 kern_flags;
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 7c6aef253173..9931cf02de19 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -3098,6 +3098,14 @@ enum xdp_action {
>>  	XDP_REDIRECT,
>>  };
>>  
>> +/* Flags for bpf_xdp_redirect_map helper */
>> +
>> +/* The lower flag bits will be the return code of bpf_xdp_redirect_map() helper
>> + * if the map lookup fails.
>> + */
>> +#define XDP_REDIRECT_INVALID_MASK (XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX)
>> +#define XDP_REDIRECT_ALL_FLAGS XDP_REDIRECT_INVALID_MASK
>> +
>
> Slightly confused about the naming of the define, see later.
>
>>  /* user accessible metadata for XDP packet hook
>>   * new fields must be added to the end of this structure
>>   */
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 7a996887c500..dd43be497480 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -3608,17 +3608,13 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
>>  			       struct bpf_redirect_info *ri)
>>  {
>>  	u32 index = ri->ifindex;
>> -	void *fwd = NULL;
>> +	void *fwd = ri->item;
>>  	int err;
>>  
>>  	ri->ifindex = 0;
>> +	ri->item = NULL;
>>  	WRITE_ONCE(ri->map, NULL);
>>  
>> -	fwd = __xdp_map_lookup_elem(map, index);
>> -	if (unlikely(!fwd)) {
>> -		err = -EINVAL;
>> -		goto err;
>> -	}
>>  	if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
>>  		xdp_do_flush_map();
>>  
>> @@ -3655,18 +3651,13 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
>>  {
>>  	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>>  	u32 index = ri->ifindex;
>> -	void *fwd = NULL;
>> +	void *fwd = ri->item;
>>  	int err = 0;
>>  
>>  	ri->ifindex = 0;
>> +	ri->item = NULL;
>>  	WRITE_ONCE(ri->map, NULL);
>>  
>> -	fwd = __xdp_map_lookup_elem(map, index);
>> -	if (unlikely(!fwd)) {
>> -		err = -EINVAL;
>> -		goto err;
>> -	}
>> -
>>  	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
>>  		struct bpf_dtab_netdev *dst = fwd;
>>  
>> @@ -3735,6 +3726,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
>>  
>>  	ri->ifindex = ifindex;
>>  	ri->flags = flags;
>> +	ri->item = NULL;
>>  	WRITE_ONCE(ri->map, NULL);
>>  
>>  	return XDP_REDIRECT;
>> @@ -3753,9 +3745,15 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
>>  {
>>  	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>>  
>> -	if (unlikely(flags))
>> +	if (unlikely(flags & ~XDP_REDIRECT_ALL_FLAGS))
>>  		return XDP_ABORTED;
>>  
>> +	ri->item = __xdp_map_lookup_elem(map, ifindex);
>> +	if (unlikely(!ri->item)) {
>> +		WRITE_ONCE(ri->map, NULL);
>> +		return (flags & XDP_REDIRECT_INVALID_MASK);
>
> Maybe I'm reading it wrong, but shouldn't the mask be called the "valid" mask?

It's the mask that is applied when the index looked up is invalid (i.e.,
the entry doesn't exist)? But yeah, can see how the name can be
confusing; maybe it should just be "RETURN_MASK" or something like that?

-Toke
Jakub Kicinski June 11, 2019, 9:48 p.m. UTC | #3
On Tue, 11 Jun 2019 17:44:00 +0200, Toke Høiland-Jørgensen wrote:
> +#define XDP_REDIRECT_INVALID_MASK (XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX)

It feels a little strange to OR in values which are not bits, even if
it happens to work today (since those are values of 0, 1, 2, 3)...
Toke Høiland-Jørgensen June 12, 2019, 9:49 a.m. UTC | #4
Jakub Kicinski <jakub.kicinski@netronome.com> writes:

> On Tue, 11 Jun 2019 17:44:00 +0200, Toke Høiland-Jørgensen wrote:
>> +#define XDP_REDIRECT_INVALID_MASK (XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX)
>
> It feels a little strange to OR in values which are not bits, even if
> it happens to work today (since those are values of 0, 1, 2, 3)...

Yeah, I agree. But it also nicely expresses the extent in code.
Otherwise that would need to be in a comment, like

// we allow return codes of ABORTED/DROP/PASS/TX
#define XDP_REDIRECT_INVALID_MASK 3


Or do you have a better idea?

-Toke
Jakub Kicinski June 12, 2019, 7:45 p.m. UTC | #5
On Wed, 12 Jun 2019 11:49:17 +0200, Toke Høiland-Jørgensen wrote:
> Jakub Kicinski <jakub.kicinski@netronome.com> writes:
> 
> > On Tue, 11 Jun 2019 17:44:00 +0200, Toke Høiland-Jørgensen wrote:  
> >> +#define XDP_REDIRECT_INVALID_MASK (XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX)  
> >
> > It feels a little strange to OR in values which are not bits, even if
> > it happens to work today (since those are values of 0, 1, 2, 3)...  
> 
> Yeah, I agree. But it also nicely expresses the extent in code.
> Otherwise that would need to be in a comment, like
> 
> // we allow return codes of ABORTED/DROP/PASS/TX
> #define XDP_REDIRECT_INVALID_MASK 3
> 
> 
> Or do you have a better idea?

flags > XDP_TX

In the future when we add more fields in flags:

if (flags & ~XDP_REDIRECT_FLAGS_MASK)
	return -EBLA;
if ((flags & XDP_REDIRECT_RETCODE_MASK) > XDP_TX))
	return -EFOO;

?
Maciej Fijalkowski June 12, 2019, 8:01 p.m. UTC | #6
On Tue, 11 Jun 2019 20:17:02 +0200
Toke Høiland-Jørgensen <toke@redhat.com> wrote:

> Jesper Dangaard Brouer <brouer@redhat.com> writes:
> 
> > On Tue, 11 Jun 2019 17:44:00 +0200
> > Toke Høiland-Jørgensen <toke@redhat.com> wrote:
> >  
> >> From: Toke Høiland-Jørgensen <toke@redhat.com>
> >> 
> >> The bpf_redirect_map() helper used by XDP programs doesn't return any
> >> indication of whether it can successfully redirect to the map index it was
> >> given. Instead, BPF programs have to track this themselves, leading to
> >> programs using duplicate maps to track which entries are populated in the
> >> devmap.
> >> 
> >> This patch fixes this by moving the map lookup into the bpf_redirect_map()
> >> helper, which makes it possible to return failure to the eBPF program. The
> >> lower bits of the flags argument is used as the return code, which means
> >> that existing users who pass a '0' flag argument will get XDP_ABORTED.
> >> 
> >> With this, a BPF program can check the return code from the helper call and
> >> react by, for instance, substituting a different redirect. This works for
> >> any type of map used for redirect.
> >> 
> >> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
> >> ---
> >>  include/linux/filter.h   |    1 +
> >>  include/uapi/linux/bpf.h |    8 ++++++++
> >>  net/core/filter.c        |   26 ++++++++++++--------------
> >>  3 files changed, 21 insertions(+), 14 deletions(-)
> >> 
> >> diff --git a/include/linux/filter.h b/include/linux/filter.h
> >> index 43b45d6db36d..f31ae8b9035a 100644
> >> --- a/include/linux/filter.h
> >> +++ b/include/linux/filter.h
> >> @@ -580,6 +580,7 @@ struct bpf_skb_data_end {
> >>  struct bpf_redirect_info {
> >>  	u32 ifindex;
> >>  	u32 flags;
> >> +	void *item;
> >>  	struct bpf_map *map;
> >>  	struct bpf_map *map_to_flush;
> >>  	u32 kern_flags;
> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> >> index 7c6aef253173..9931cf02de19 100644
> >> --- a/include/uapi/linux/bpf.h
> >> +++ b/include/uapi/linux/bpf.h
> >> @@ -3098,6 +3098,14 @@ enum xdp_action {
> >>  	XDP_REDIRECT,
> >>  };
> >>  
> >> +/* Flags for bpf_xdp_redirect_map helper */
> >> +
> >> +/* The lower flag bits will be the return code of bpf_xdp_redirect_map() helper
> >> + * if the map lookup fails.
> >> + */
> >> +#define XDP_REDIRECT_INVALID_MASK (XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX)
> >> +#define XDP_REDIRECT_ALL_FLAGS XDP_REDIRECT_INVALID_MASK
> >> +  
> >
> > Slightly confused about the naming of the define, see later.
> >  
> >>  /* user accessible metadata for XDP packet hook
> >>   * new fields must be added to the end of this structure
> >>   */
> >> diff --git a/net/core/filter.c b/net/core/filter.c
> >> index 7a996887c500..dd43be497480 100644
> >> --- a/net/core/filter.c
> >> +++ b/net/core/filter.c
> >> @@ -3608,17 +3608,13 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
> >>  			       struct bpf_redirect_info *ri)
> >>  {
> >>  	u32 index = ri->ifindex;
> >> -	void *fwd = NULL;
> >> +	void *fwd = ri->item;
> >>  	int err;
> >>  
> >>  	ri->ifindex = 0;
> >> +	ri->item = NULL;
> >>  	WRITE_ONCE(ri->map, NULL);
> >>  
> >> -	fwd = __xdp_map_lookup_elem(map, index);
> >> -	if (unlikely(!fwd)) {
> >> -		err = -EINVAL;
> >> -		goto err;
> >> -	}
> >>  	if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
> >>  		xdp_do_flush_map();
> >>  
> >> @@ -3655,18 +3651,13 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
> >>  {
> >>  	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
> >>  	u32 index = ri->ifindex;
> >> -	void *fwd = NULL;
> >> +	void *fwd = ri->item;
> >>  	int err = 0;
> >>  
> >>  	ri->ifindex = 0;
> >> +	ri->item = NULL;
> >>  	WRITE_ONCE(ri->map, NULL);
> >>  
> >> -	fwd = __xdp_map_lookup_elem(map, index);
> >> -	if (unlikely(!fwd)) {
> >> -		err = -EINVAL;
> >> -		goto err;
> >> -	}
> >> -
> >>  	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
> >>  		struct bpf_dtab_netdev *dst = fwd;
> >>  
> >> @@ -3735,6 +3726,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
> >>  
> >>  	ri->ifindex = ifindex;
> >>  	ri->flags = flags;
> >> +	ri->item = NULL;
> >>  	WRITE_ONCE(ri->map, NULL);
> >>  
> >>  	return XDP_REDIRECT;
> >> @@ -3753,9 +3745,15 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
> >>  {
> >>  	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
> >>  
> >> -	if (unlikely(flags))
> >> +	if (unlikely(flags & ~XDP_REDIRECT_ALL_FLAGS))
> >>  		return XDP_ABORTED;
> >>  

Here you don't allow the flags to get different value than
XDP_REDIRECT_ALL_FLAGS.

> >> +	ri->item = __xdp_map_lookup_elem(map, ifindex);
> >> +	if (unlikely(!ri->item)) {
> >> +		WRITE_ONCE(ri->map, NULL);
> >> +		return (flags & XDP_REDIRECT_INVALID_MASK);  

So here you could just return flags? Don't we know that the flags value is
legit here? Am I missing something? TBH the v2 was more clear to me.

> >
> > Maybe I'm reading it wrong, but shouldn't the mask be called the "valid" mask?  
> 
> It's the mask that is applied when the index looked up is invalid (i.e.,
> the entry doesn't exist)? But yeah, can see how the name can be
> confusing; maybe it should just be "RETURN_MASK" or something like that?

Maybe something along ALLOWED_RETVAL_MASK?

> 
> -Toke
Toke Høiland-Jørgensen June 12, 2019, 9:33 p.m. UTC | #7
Maciej Fijalkowski <maciejromanfijalkowski@gmail.com> writes:

> On Tue, 11 Jun 2019 20:17:02 +0200
> Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>
>> Jesper Dangaard Brouer <brouer@redhat.com> writes:
>> 
>> > On Tue, 11 Jun 2019 17:44:00 +0200
>> > Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>> >  
>> >> From: Toke Høiland-Jørgensen <toke@redhat.com>
>> >> 
>> >> The bpf_redirect_map() helper used by XDP programs doesn't return any
>> >> indication of whether it can successfully redirect to the map index it was
>> >> given. Instead, BPF programs have to track this themselves, leading to
>> >> programs using duplicate maps to track which entries are populated in the
>> >> devmap.
>> >> 
>> >> This patch fixes this by moving the map lookup into the bpf_redirect_map()
>> >> helper, which makes it possible to return failure to the eBPF program. The
>> >> lower bits of the flags argument is used as the return code, which means
>> >> that existing users who pass a '0' flag argument will get XDP_ABORTED.
>> >> 
>> >> With this, a BPF program can check the return code from the helper call and
>> >> react by, for instance, substituting a different redirect. This works for
>> >> any type of map used for redirect.
>> >> 
>> >> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
>> >> ---
>> >>  include/linux/filter.h   |    1 +
>> >>  include/uapi/linux/bpf.h |    8 ++++++++
>> >>  net/core/filter.c        |   26 ++++++++++++--------------
>> >>  3 files changed, 21 insertions(+), 14 deletions(-)
>> >> 
>> >> diff --git a/include/linux/filter.h b/include/linux/filter.h
>> >> index 43b45d6db36d..f31ae8b9035a 100644
>> >> --- a/include/linux/filter.h
>> >> +++ b/include/linux/filter.h
>> >> @@ -580,6 +580,7 @@ struct bpf_skb_data_end {
>> >>  struct bpf_redirect_info {
>> >>  	u32 ifindex;
>> >>  	u32 flags;
>> >> +	void *item;
>> >>  	struct bpf_map *map;
>> >>  	struct bpf_map *map_to_flush;
>> >>  	u32 kern_flags;
>> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> >> index 7c6aef253173..9931cf02de19 100644
>> >> --- a/include/uapi/linux/bpf.h
>> >> +++ b/include/uapi/linux/bpf.h
>> >> @@ -3098,6 +3098,14 @@ enum xdp_action {
>> >>  	XDP_REDIRECT,
>> >>  };
>> >>  
>> >> +/* Flags for bpf_xdp_redirect_map helper */
>> >> +
>> >> +/* The lower flag bits will be the return code of bpf_xdp_redirect_map() helper
>> >> + * if the map lookup fails.
>> >> + */
>> >> +#define XDP_REDIRECT_INVALID_MASK (XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX)
>> >> +#define XDP_REDIRECT_ALL_FLAGS XDP_REDIRECT_INVALID_MASK
>> >> +  
>> >
>> > Slightly confused about the naming of the define, see later.
>> >  
>> >>  /* user accessible metadata for XDP packet hook
>> >>   * new fields must be added to the end of this structure
>> >>   */
>> >> diff --git a/net/core/filter.c b/net/core/filter.c
>> >> index 7a996887c500..dd43be497480 100644
>> >> --- a/net/core/filter.c
>> >> +++ b/net/core/filter.c
>> >> @@ -3608,17 +3608,13 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
>> >>  			       struct bpf_redirect_info *ri)
>> >>  {
>> >>  	u32 index = ri->ifindex;
>> >> -	void *fwd = NULL;
>> >> +	void *fwd = ri->item;
>> >>  	int err;
>> >>  
>> >>  	ri->ifindex = 0;
>> >> +	ri->item = NULL;
>> >>  	WRITE_ONCE(ri->map, NULL);
>> >>  
>> >> -	fwd = __xdp_map_lookup_elem(map, index);
>> >> -	if (unlikely(!fwd)) {
>> >> -		err = -EINVAL;
>> >> -		goto err;
>> >> -	}
>> >>  	if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
>> >>  		xdp_do_flush_map();
>> >>  
>> >> @@ -3655,18 +3651,13 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
>> >>  {
>> >>  	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>> >>  	u32 index = ri->ifindex;
>> >> -	void *fwd = NULL;
>> >> +	void *fwd = ri->item;
>> >>  	int err = 0;
>> >>  
>> >>  	ri->ifindex = 0;
>> >> +	ri->item = NULL;
>> >>  	WRITE_ONCE(ri->map, NULL);
>> >>  
>> >> -	fwd = __xdp_map_lookup_elem(map, index);
>> >> -	if (unlikely(!fwd)) {
>> >> -		err = -EINVAL;
>> >> -		goto err;
>> >> -	}
>> >> -
>> >>  	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
>> >>  		struct bpf_dtab_netdev *dst = fwd;
>> >>  
>> >> @@ -3735,6 +3726,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
>> >>  
>> >>  	ri->ifindex = ifindex;
>> >>  	ri->flags = flags;
>> >> +	ri->item = NULL;
>> >>  	WRITE_ONCE(ri->map, NULL);
>> >>  
>> >>  	return XDP_REDIRECT;
>> >> @@ -3753,9 +3745,15 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
>> >>  {
>> >>  	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>> >>  
>> >> -	if (unlikely(flags))
>> >> +	if (unlikely(flags & ~XDP_REDIRECT_ALL_FLAGS))
>> >>  		return XDP_ABORTED;
>> >>  
>
> Here you don't allow the flags to get different value than
> XDP_REDIRECT_ALL_FLAGS.
>
>> >> +	ri->item = __xdp_map_lookup_elem(map, ifindex);
>> >> +	if (unlikely(!ri->item)) {
>> >> +		WRITE_ONCE(ri->map, NULL);
>> >> +		return (flags & XDP_REDIRECT_INVALID_MASK);  
>
> So here you could just return flags? Don't we know that the flags
> value is legit here? Am I missing something?

No, you're right. I was just worried that we would forget to change this
when we add another flag later on, so I thought I'd mask it from the
get-go. Can't the compiler figure out that the second mask is unnecessary?

> TBH the v2 was more clear to me.

Clearer how? As in, you prefer just always returning XDP_PASS on error?

-Toke

>> > Maybe I'm reading it wrong, but shouldn't the mask be called the "valid" mask?  
>> 
>> It's the mask that is applied when the index looked up is invalid (i.e.,
>> the entry doesn't exist)? But yeah, can see how the name can be
>> confusing; maybe it should just be "RETURN_MASK" or something like that?
>
> Maybe something along ALLOWED_RETVAL_MASK?

Yeah, good idea! :)

-Toke

Patch
diff mbox series

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 43b45d6db36d..f31ae8b9035a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -580,6 +580,7 @@  struct bpf_skb_data_end {
 struct bpf_redirect_info {
 	u32 ifindex;
 	u32 flags;
+	void *item;
 	struct bpf_map *map;
 	struct bpf_map *map_to_flush;
 	u32 kern_flags;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7c6aef253173..9931cf02de19 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3098,6 +3098,14 @@  enum xdp_action {
 	XDP_REDIRECT,
 };
 
+/* Flags for bpf_xdp_redirect_map helper */
+
+/* The lower flag bits will be the return code of bpf_xdp_redirect_map() helper
+ * if the map lookup fails.
+ */
+#define XDP_REDIRECT_INVALID_MASK (XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX)
+#define XDP_REDIRECT_ALL_FLAGS XDP_REDIRECT_INVALID_MASK
+
 /* user accessible metadata for XDP packet hook
  * new fields must be added to the end of this structure
  */
diff --git a/net/core/filter.c b/net/core/filter.c
index 7a996887c500..dd43be497480 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3608,17 +3608,13 @@  static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 			       struct bpf_redirect_info *ri)
 {
 	u32 index = ri->ifindex;
-	void *fwd = NULL;
+	void *fwd = ri->item;
 	int err;
 
 	ri->ifindex = 0;
+	ri->item = NULL;
 	WRITE_ONCE(ri->map, NULL);
 
-	fwd = __xdp_map_lookup_elem(map, index);
-	if (unlikely(!fwd)) {
-		err = -EINVAL;
-		goto err;
-	}
 	if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
 		xdp_do_flush_map();
 
@@ -3655,18 +3651,13 @@  static int xdp_do_generic_redirect_map(struct net_device *dev,
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	u32 index = ri->ifindex;
-	void *fwd = NULL;
+	void *fwd = ri->item;
 	int err = 0;
 
 	ri->ifindex = 0;
+	ri->item = NULL;
 	WRITE_ONCE(ri->map, NULL);
 
-	fwd = __xdp_map_lookup_elem(map, index);
-	if (unlikely(!fwd)) {
-		err = -EINVAL;
-		goto err;
-	}
-
 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
 		struct bpf_dtab_netdev *dst = fwd;
 
@@ -3735,6 +3726,7 @@  BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 
 	ri->ifindex = ifindex;
 	ri->flags = flags;
+	ri->item = NULL;
 	WRITE_ONCE(ri->map, NULL);
 
 	return XDP_REDIRECT;
@@ -3753,9 +3745,15 @@  BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
-	if (unlikely(flags))
+	if (unlikely(flags & ~XDP_REDIRECT_ALL_FLAGS))
 		return XDP_ABORTED;
 
+	ri->item = __xdp_map_lookup_elem(map, ifindex);
+	if (unlikely(!ri->item)) {
+		WRITE_ONCE(ri->map, NULL);
+		return (flags & XDP_REDIRECT_INVALID_MASK);
+	}
+
 	ri->ifindex = ifindex;
 	ri->flags = flags;
 	WRITE_ONCE(ri->map, map);