diff mbox series

[RFC,bpf-next,2/3] bpf: Add helper to do forwarding lookups in kernel FDB table

Message ID 1596170660-5582-3-git-send-email-komachi.yoshiki@gmail.com
State RFC
Delegated to: BPF Maintainers
Headers show
Series Add a new bpf helper for FDB lookup | expand

Commit Message

Yoshiki Komachi July 31, 2020, 4:44 a.m. UTC
This patch adds a new bpf helper to access FDB in the kernel tables
from XDP programs. The helper enables us to find the destination port
of master bridge in XDP layer with high speed. If an entry in the
tables is successfully found, egress device index will be returned.

In cases of failure, packets will be dropped or forwarded to upper
networking stack in the kernel by XDP programs. Multicast and broadcast
packets are currently not supported. Thus, these will need to be
passed to upper layer on the basis of XDP_PASS action.

The API uses destination MAC and VLAN ID as keys, so XDP programs
need to extract these from forwarded packets.

Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com>
---
 include/uapi/linux/bpf.h       | 28 +++++++++++++++++++++
 net/core/filter.c              | 45 ++++++++++++++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  1 +
 tools/include/uapi/linux/bpf.h | 28 +++++++++++++++++++++
 4 files changed, 102 insertions(+)

Comments

Maciej Fijalkowski July 31, 2020, 11:52 a.m. UTC | #1
On Fri, Jul 31, 2020 at 01:44:19PM +0900, Yoshiki Komachi wrote:
> This patch adds a new bpf helper to access FDB in the kernel tables
> from XDP programs. The helper enables us to find the destination port
> of master bridge in XDP layer with high speed. If an entry in the
> tables is successfully found, egress device index will be returned.
> 
> In cases of failure, packets will be dropped or forwarded to upper
> networking stack in the kernel by XDP programs. Multicast and broadcast
> packets are currently not supported. Thus, these will need to be
> passed to upper layer on the basis of XDP_PASS action.
> 
> The API uses destination MAC and VLAN ID as keys, so XDP programs
> need to extract these from forwarded packets.
> 
> Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com>
> ---
>  include/uapi/linux/bpf.h       | 28 +++++++++++++++++++++
>  net/core/filter.c              | 45 ++++++++++++++++++++++++++++++++++
>  scripts/bpf_helpers_doc.py     |  1 +
>  tools/include/uapi/linux/bpf.h | 28 +++++++++++++++++++++
>  4 files changed, 102 insertions(+)
> 

[...]

> diff --git a/net/core/filter.c b/net/core/filter.c
> index 654c346b7d91..68800d1b8cd5 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -45,6 +45,7 @@
>  #include <linux/filter.h>
>  #include <linux/ratelimit.h>
>  #include <linux/seccomp.h>
> +#include <linux/if_bridge.h>
>  #include <linux/if_vlan.h>
>  #include <linux/bpf.h>
>  #include <linux/btf.h>
> @@ -5084,6 +5085,46 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
>  	.arg4_type	= ARG_ANYTHING,
>  };
>  
> +#if IS_ENABLED(CONFIG_BRIDGE)
> +BPF_CALL_4(bpf_xdp_fdb_lookup, struct xdp_buff *, ctx,
> +	   struct bpf_fdb_lookup *, params, int, plen, u32, flags)
> +{
> +	struct net_device *src, *dst;
> +	struct net *net;
> +
> +	if (plen < sizeof(*params))
> +		return -EINVAL;
> +
> +	net = dev_net(ctx->rxq->dev);
> +
> +	if (is_multicast_ether_addr(params->addr) ||
> +	    is_broadcast_ether_addr(params->addr))
> +		return BPF_FDB_LKUP_RET_NOENT;

small nit: you could move that validation before dev_net() call.

> +
> +	src = dev_get_by_index_rcu(net, params->ifindex);
> +	if (unlikely(!src))
> +		return -ENODEV;
> +
> +	dst = br_fdb_find_port_xdp(src, params->addr, params->vlan_id);
> +	if (dst) {
> +		params->ifindex = dst->ifindex;
> +		return BPF_FDB_LKUP_RET_SUCCESS;
> +	}
> +
> +	return BPF_FDB_LKUP_RET_NOENT;
> +}
David Ahern July 31, 2020, 5:15 p.m. UTC | #2
On 7/30/20 10:44 PM, Yoshiki Komachi wrote:
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 654c346b7d91..68800d1b8cd5 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5084,6 +5085,46 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
>  	.arg4_type	= ARG_ANYTHING,
>  };
>  
> +#if IS_ENABLED(CONFIG_BRIDGE)
> +BPF_CALL_4(bpf_xdp_fdb_lookup, struct xdp_buff *, ctx,
> +	   struct bpf_fdb_lookup *, params, int, plen, u32, flags)
> +{
> +	struct net_device *src, *dst;
> +	struct net *net;
> +
> +	if (plen < sizeof(*params))
> +		return -EINVAL;

I need to look at the details more closely, but on first reading 2
things caught me eye:
1. you need to make sure flags is 0 since there are no supported flags
at the moment, and

> +
> +	net = dev_net(ctx->rxq->dev);
> +
> +	if (is_multicast_ether_addr(params->addr) ||
> +	    is_broadcast_ether_addr(params->addr))
> +		return BPF_FDB_LKUP_RET_NOENT;
> +
> +	src = dev_get_by_index_rcu(net, params->ifindex);
> +	if (unlikely(!src))
> +		return -ENODEV;
> +
> +	dst = br_fdb_find_port_xdp(src, params->addr, params->vlan_id);

2. this needs to be done via netdev ops to avoid referencing bridge code
which can be compiled as a module. I suspect the build robots will id
this part soon.
Daniel Borkmann July 31, 2020, 9:12 p.m. UTC | #3
On 7/31/20 6:44 AM, Yoshiki Komachi wrote:
> This patch adds a new bpf helper to access FDB in the kernel tables
> from XDP programs. The helper enables us to find the destination port
> of master bridge in XDP layer with high speed. If an entry in the
> tables is successfully found, egress device index will be returned.
> 
> In cases of failure, packets will be dropped or forwarded to upper
> networking stack in the kernel by XDP programs. Multicast and broadcast
> packets are currently not supported. Thus, these will need to be
> passed to upper layer on the basis of XDP_PASS action.
> 
> The API uses destination MAC and VLAN ID as keys, so XDP programs
> need to extract these from forwarded packets.
> 
> Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com>

Few initial comments below:

> ---
>   include/uapi/linux/bpf.h       | 28 +++++++++++++++++++++
>   net/core/filter.c              | 45 ++++++++++++++++++++++++++++++++++
>   scripts/bpf_helpers_doc.py     |  1 +
>   tools/include/uapi/linux/bpf.h | 28 +++++++++++++++++++++
>   4 files changed, 102 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 54d0c886e3ba..f2e729dd1721 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2149,6 +2149,22 @@ union bpf_attr {
>    *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
>    *		  packet is not forwarded or needs assist from full stack
>    *
> + * long bpf_fdb_lookup(void *ctx, struct bpf_fdb_lookup *params, int plen, u32 flags)
> + *	Description
> + *		Do FDB lookup in kernel tables using parameters in *params*.
> + *		If lookup is successful (ie., FDB lookup finds a destination entry),
> + *		ifindex is set to the egress device index from the FDB lookup.
> + *		Both multicast and broadcast packets are currently unsupported
> + *		in XDP layer.
> + *
> + *		*plen* argument is the size of the passed **struct bpf_fdb_lookup**.
> + *		*ctx* is only **struct xdp_md** for XDP programs.
> + *
> + *     Return
> + *		* < 0 if any input argument is invalid
> + *		*   0 on success (destination port is found)
> + *		* > 0 on failure (there is no entry)
> + *
>    * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
>    *	Description
>    *		Add an entry to, or update a sockhash *map* referencing sockets.
> @@ -3449,6 +3465,7 @@ union bpf_attr {
>   	FN(get_stack),			\
>   	FN(skb_load_bytes_relative),	\
>   	FN(fib_lookup),			\
> +	FN(fdb_lookup),			\

This breaks UAPI. Needs to be added to the very end of the list.

>   	FN(sock_hash_update),		\
>   	FN(msg_redirect_hash),		\
>   	FN(sk_redirect_hash),		\
> @@ -4328,6 +4345,17 @@ struct bpf_fib_lookup {
>   	__u8	dmac[6];     /* ETH_ALEN */
>   };
>   
> +enum {
> +	BPF_FDB_LKUP_RET_SUCCESS,      /* lookup successful */
> +	BPF_FDB_LKUP_RET_NOENT,        /* entry is not found */
> +};
> +
> +struct bpf_fdb_lookup {
> +	unsigned char addr[6];     /* ETH_ALEN */
> +	__u16 vlan_id;
> +	__u32 ifindex;
> +};
> +
>   enum bpf_task_fd_type {
>   	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
>   	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 654c346b7d91..68800d1b8cd5 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -45,6 +45,7 @@
>   #include <linux/filter.h>
>   #include <linux/ratelimit.h>
>   #include <linux/seccomp.h>
> +#include <linux/if_bridge.h>
>   #include <linux/if_vlan.h>
>   #include <linux/bpf.h>
>   #include <linux/btf.h>
> @@ -5084,6 +5085,46 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
>   	.arg4_type	= ARG_ANYTHING,
>   };
>   
> +#if IS_ENABLED(CONFIG_BRIDGE)
> +BPF_CALL_4(bpf_xdp_fdb_lookup, struct xdp_buff *, ctx,
> +	   struct bpf_fdb_lookup *, params, int, plen, u32, flags)
> +{
> +	struct net_device *src, *dst;
> +	struct net *net;
> +
> +	if (plen < sizeof(*params))
> +		return -EINVAL;

Given flags are not used, this needs to reject anything invalid otherwise
you're not able to extend it in future.

> +	net = dev_net(ctx->rxq->dev);
> +
> +	if (is_multicast_ether_addr(params->addr) ||
> +	    is_broadcast_ether_addr(params->addr))
> +		return BPF_FDB_LKUP_RET_NOENT;
> +
> +	src = dev_get_by_index_rcu(net, params->ifindex);
> +	if (unlikely(!src))
> +		return -ENODEV;
> +
> +	dst = br_fdb_find_port_xdp(src, params->addr, params->vlan_id);
> +	if (dst) {
> +		params->ifindex = dst->ifindex;
> +		return BPF_FDB_LKUP_RET_SUCCESS;
> +	}

Currently the helper description says nothing that this is /only/ limited to
bridges. I think it would be better to also name the helper bpf_br_fdb_lookup()
as well if so to avoid any confusion.

> +	return BPF_FDB_LKUP_RET_NOENT;
> +}
> +
> +static const struct bpf_func_proto bpf_xdp_fdb_lookup_proto = {
> +	.func		= bpf_xdp_fdb_lookup,
> +	.gpl_only	= true,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type      = ARG_PTR_TO_CTX,
> +	.arg2_type      = ARG_PTR_TO_MEM,
> +	.arg3_type      = ARG_CONST_SIZE,
> +	.arg4_type	= ARG_ANYTHING,
> +};
> +#endif

This should also have a tc pendant (similar as done in routing lookup helper)
in case native XDP is not available. This will be useful for those that have
the same code compilable for both tc/XDP.

>   #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
>   static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
>   {
> @@ -6477,6 +6518,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>   		return &bpf_xdp_adjust_tail_proto;
>   	case BPF_FUNC_fib_lookup:
>   		return &bpf_xdp_fib_lookup_proto;
> +#if IS_ENABLED(CONFIG_BRIDGE)
> +	case BPF_FUNC_fdb_lookup:
> +		return &bpf_xdp_fdb_lookup_proto;
> +#endif
>   #ifdef CONFIG_INET
>   	case BPF_FUNC_sk_lookup_udp:
>   		return &bpf_xdp_sk_lookup_udp_proto;
Yoshiki Komachi Aug. 4, 2020, 8:44 a.m. UTC | #4
> 2020/07/31 20:52、Maciej Fijalkowski <maciej.fijalkowski@intel.com>のメール:
> 
> On Fri, Jul 31, 2020 at 01:44:19PM +0900, Yoshiki Komachi wrote:
>> This patch adds a new bpf helper to access FDB in the kernel tables
>> from XDP programs. The helper enables us to find the destination port
>> of master bridge in XDP layer with high speed. If an entry in the
>> tables is successfully found, egress device index will be returned.
>> 
>> In cases of failure, packets will be dropped or forwarded to upper
>> networking stack in the kernel by XDP programs. Multicast and broadcast
>> packets are currently not supported. Thus, these will need to be
>> passed to upper layer on the basis of XDP_PASS action.
>> 
>> The API uses destination MAC and VLAN ID as keys, so XDP programs
>> need to extract these from forwarded packets.
>> 
>> Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com>
>> ---
>> include/uapi/linux/bpf.h       | 28 +++++++++++++++++++++
>> net/core/filter.c              | 45 ++++++++++++++++++++++++++++++++++
>> scripts/bpf_helpers_doc.py     |  1 +
>> tools/include/uapi/linux/bpf.h | 28 +++++++++++++++++++++
>> 4 files changed, 102 insertions(+)
>> 
> 
> [...]
> 
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 654c346b7d91..68800d1b8cd5 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -45,6 +45,7 @@
>> #include <linux/filter.h>
>> #include <linux/ratelimit.h>
>> #include <linux/seccomp.h>
>> +#include <linux/if_bridge.h>
>> #include <linux/if_vlan.h>
>> #include <linux/bpf.h>
>> #include <linux/btf.h>
>> @@ -5084,6 +5085,46 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
>> 	.arg4_type	= ARG_ANYTHING,
>> };
>> 
>> +#if IS_ENABLED(CONFIG_BRIDGE)
>> +BPF_CALL_4(bpf_xdp_fdb_lookup, struct xdp_buff *, ctx,
>> +	   struct bpf_fdb_lookup *, params, int, plen, u32, flags)
>> +{
>> +	struct net_device *src, *dst;
>> +	struct net *net;
>> +
>> +	if (plen < sizeof(*params))
>> +		return -EINVAL;
>> +
>> +	net = dev_net(ctx->rxq->dev);
>> +
>> +	if (is_multicast_ether_addr(params->addr) ||
>> +	    is_broadcast_ether_addr(params->addr))
>> +		return BPF_FDB_LKUP_RET_NOENT;
> 
> small nit: you could move that validation before dev_net() call.

Thanks for your quick response.
I will try to fix it in the next version.

Best regards,

>> +
>> +	src = dev_get_by_index_rcu(net, params->ifindex);
>> +	if (unlikely(!src))
>> +		return -ENODEV;
>> +
>> +	dst = br_fdb_find_port_xdp(src, params->addr, params->vlan_id);
>> +	if (dst) {
>> +		params->ifindex = dst->ifindex;
>> +		return BPF_FDB_LKUP_RET_SUCCESS;
>> +	}
>> +
>> +	return BPF_FDB_LKUP_RET_NOENT;
>> +}

—
Yoshiki Komachi
komachi.yoshiki@gmail.com
Yoshiki Komachi Aug. 4, 2020, 11:27 a.m. UTC | #5
> 2020/08/01 2:15、David Ahern <dsahern@gmail.com>のメール:
> 
> On 7/30/20 10:44 PM, Yoshiki Komachi wrote:
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 654c346b7d91..68800d1b8cd5 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -5084,6 +5085,46 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
>> 	.arg4_type	= ARG_ANYTHING,
>> };
>> 
>> +#if IS_ENABLED(CONFIG_BRIDGE)
>> +BPF_CALL_4(bpf_xdp_fdb_lookup, struct xdp_buff *, ctx,
>> +	   struct bpf_fdb_lookup *, params, int, plen, u32, flags)
>> +{
>> +	struct net_device *src, *dst;
>> +	struct net *net;
>> +
>> +	if (plen < sizeof(*params))
>> +		return -EINVAL;
> 
> I need to look at the details more closely, but on first reading 2
> things caught me eye:
> 1. you need to make sure flags is 0 since there are no supported flags
> at the moment, and

Thanks for your initial comments!

I will make sure whether this flag is required or not.

>> +
>> +	net = dev_net(ctx->rxq->dev);
>> +
>> +	if (is_multicast_ether_addr(params->addr) ||
>> +	    is_broadcast_ether_addr(params->addr))
>> +		return BPF_FDB_LKUP_RET_NOENT;
>> +
>> +	src = dev_get_by_index_rcu(net, params->ifindex);
>> +	if (unlikely(!src))
>> +		return -ENODEV;
>> +
>> +	dst = br_fdb_find_port_xdp(src, params->addr, params->vlan_id);
> 
> 2. this needs to be done via netdev ops to avoid referencing bridge code
> which can be compiled as a module. I suspect the build robots will id
> this part soon.

I guess that no build errors will occur because the API is allowed when
CONFIG_BRIDGE is enabled.

I successfully build my kernel applying this patch, and I don’t receive any
messages from build robots for now.

Thanks & Best regards,


—
Yoshiki Komachi
komachi.yoshiki@gmail.com
Yoshiki Komachi Aug. 5, 2020, 4:45 a.m. UTC | #6
> 2020/08/01 6:12、Daniel Borkmann <daniel@iogearbox.net>のメール:
> 
> On 7/31/20 6:44 AM, Yoshiki Komachi wrote:
>> This patch adds a new bpf helper to access FDB in the kernel tables
>> from XDP programs. The helper enables us to find the destination port
>> of master bridge in XDP layer with high speed. If an entry in the
>> tables is successfully found, egress device index will be returned.
>> In cases of failure, packets will be dropped or forwarded to upper
>> networking stack in the kernel by XDP programs. Multicast and broadcast
>> packets are currently not supported. Thus, these will need to be
>> passed to upper layer on the basis of XDP_PASS action.
>> The API uses destination MAC and VLAN ID as keys, so XDP programs
>> need to extract these from forwarded packets.
>> Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com>
> 
> Few initial comments below:
> 
>> ---
>>  include/uapi/linux/bpf.h       | 28 +++++++++++++++++++++
>>  net/core/filter.c              | 45 ++++++++++++++++++++++++++++++++++
>>  scripts/bpf_helpers_doc.py     |  1 +
>>  tools/include/uapi/linux/bpf.h | 28 +++++++++++++++++++++
>>  4 files changed, 102 insertions(+)
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 54d0c886e3ba..f2e729dd1721 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -2149,6 +2149,22 @@ union bpf_attr {
>>   *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
>>   *		  packet is not forwarded or needs assist from full stack
>>   *
>> + * long bpf_fdb_lookup(void *ctx, struct bpf_fdb_lookup *params, int plen, u32 flags)
>> + *	Description
>> + *		Do FDB lookup in kernel tables using parameters in *params*.
>> + *		If lookup is successful (ie., FDB lookup finds a destination entry),
>> + *		ifindex is set to the egress device index from the FDB lookup.
>> + *		Both multicast and broadcast packets are currently unsupported
>> + *		in XDP layer.
>> + *
>> + *		*plen* argument is the size of the passed **struct bpf_fdb_lookup**.
>> + *		*ctx* is only **struct xdp_md** for XDP programs.
>> + *
>> + *     Return
>> + *		* < 0 if any input argument is invalid
>> + *		*   0 on success (destination port is found)
>> + *		* > 0 on failure (there is no entry)
>> + *
>>   * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
>>   *	Description
>>   *		Add an entry to, or update a sockhash *map* referencing sockets.
>> @@ -3449,6 +3465,7 @@ union bpf_attr {
>>  	FN(get_stack),			\
>>  	FN(skb_load_bytes_relative),	\
>>  	FN(fib_lookup),			\
>> +	FN(fdb_lookup),			\
> 
> This breaks UAPI. Needs to be added to the very end of the list.

I figured it out and will move it to the very end of the list. Thanks!

>>  	FN(sock_hash_update),		\
>>  	FN(msg_redirect_hash),		\
>>  	FN(sk_redirect_hash),		\
>> @@ -4328,6 +4345,17 @@ struct bpf_fib_lookup {
>>  	__u8	dmac[6];     /* ETH_ALEN */
>>  };
>>  +enum {
>> +	BPF_FDB_LKUP_RET_SUCCESS,      /* lookup successful */
>> +	BPF_FDB_LKUP_RET_NOENT,        /* entry is not found */
>> +};
>> +
>> +struct bpf_fdb_lookup {
>> +	unsigned char addr[6];     /* ETH_ALEN */
>> +	__u16 vlan_id;
>> +	__u32 ifindex;
>> +};
>> +
>>  enum bpf_task_fd_type {
>>  	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
>>  	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 654c346b7d91..68800d1b8cd5 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -45,6 +45,7 @@
>>  #include <linux/filter.h>
>>  #include <linux/ratelimit.h>
>>  #include <linux/seccomp.h>
>> +#include <linux/if_bridge.h>
>>  #include <linux/if_vlan.h>
>>  #include <linux/bpf.h>
>>  #include <linux/btf.h>
>> @@ -5084,6 +5085,46 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
>>  	.arg4_type	= ARG_ANYTHING,
>>  };
>>  +#if IS_ENABLED(CONFIG_BRIDGE)
>> +BPF_CALL_4(bpf_xdp_fdb_lookup, struct xdp_buff *, ctx,
>> +	   struct bpf_fdb_lookup *, params, int, plen, u32, flags)
>> +{
>> +	struct net_device *src, *dst;
>> +	struct net *net;
>> +
>> +	if (plen < sizeof(*params))
>> +		return -EINVAL;
> 
> Given flags are not used, this needs to reject anything invalid otherwise
> you're not able to extend it in future.

I added this flags based on the bpf_fib_lookup() helper, but these are not
used at this point.

I will make sure whether this flags are required or not.

>> +	net = dev_net(ctx->rxq->dev);
>> +
>> +	if (is_multicast_ether_addr(params->addr) ||
>> +	    is_broadcast_ether_addr(params->addr))
>> +		return BPF_FDB_LKUP_RET_NOENT;
>> +
>> +	src = dev_get_by_index_rcu(net, params->ifindex);
>> +	if (unlikely(!src))
>> +		return -ENODEV;
>> +
>> +	dst = br_fdb_find_port_xdp(src, params->addr, params->vlan_id);
>> +	if (dst) {
>> +		params->ifindex = dst->ifindex;
>> +		return BPF_FDB_LKUP_RET_SUCCESS;
>> +	}
> 
> Currently the helper description says nothing that this is /only/ limited to
> bridges. I think it would be better to also name the helper bpf_br_fdb_lookup()
> as well if so to avoid any confusion.

For now, the helper enables only bridges to access FDB table in the kernel
as you understand, so I will try to rename the helper in the next version.

>> +	return BPF_FDB_LKUP_RET_NOENT;
>> +}
>> +
>> +static const struct bpf_func_proto bpf_xdp_fdb_lookup_proto = {
>> +	.func		= bpf_xdp_fdb_lookup,
>> +	.gpl_only	= true,
>> +	.ret_type	= RET_INTEGER,
>> +	.arg1_type      = ARG_PTR_TO_CTX,
>> +	.arg2_type      = ARG_PTR_TO_MEM,
>> +	.arg3_type      = ARG_CONST_SIZE,
>> +	.arg4_type	= ARG_ANYTHING,
>> +};
>> +#endif
> 
> This should also have a tc pendant (similar as done in routing lookup helper)
> in case native XDP is not available. This will be useful for those that have
> the same code compilable for both tc/XDP.

Thanks, I agree with your idea. On the basis of the bpf_fib_lookup() helper,
I will try to add the feature so that the helper can also be used in the TC hook.

Best regards,

>>  #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
>>  static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
>>  {
>> @@ -6477,6 +6518,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>>  		return &bpf_xdp_adjust_tail_proto;
>>  	case BPF_FUNC_fib_lookup:
>>  		return &bpf_xdp_fib_lookup_proto;
>> +#if IS_ENABLED(CONFIG_BRIDGE)
>> +	case BPF_FUNC_fdb_lookup:
>> +		return &bpf_xdp_fdb_lookup_proto;
>> +#endif
>>  #ifdef CONFIG_INET
>>  	case BPF_FUNC_sk_lookup_udp:
>>  		return &bpf_xdp_sk_lookup_udp_proto;

—
Yoshiki Komachi
komachi.yoshiki@gmail.com
David Ahern Aug. 5, 2020, 4:38 p.m. UTC | #7
On 8/4/20 5:27 AM, Yoshiki Komachi wrote:
> 
> I guess that no build errors will occur because the API is allowed when
> CONFIG_BRIDGE is enabled.
> 
> I successfully build my kernel applying this patch, and I don’t receive any
> messages from build robots for now.

If CONFIG_BRIDGE is a module, build should fail: filter.c is built-in
trying to access a symbol from module.
Yoshiki Komachi Aug. 7, 2020, 8:06 a.m. UTC | #8
> 2020/08/06 1:38、David Ahern <dsahern@gmail.com>のメール:
> 
> On 8/4/20 5:27 AM, Yoshiki Komachi wrote:
>> 
>> I guess that no build errors will occur because the API is allowed when
>> CONFIG_BRIDGE is enabled.
>> 
>> I successfully build my kernel applying this patch, and I don’t receive any
>> messages from build robots for now.
> 
> If CONFIG_BRIDGE is a module, build should fail: filter.c is built-in
> trying to access a symbol from module.

When I tried building my kernel with CONFIG_BRIDGE set as a module, I got
the following error as you pointed out:

    ld: net/core/filter.o: in function `____bpf_xdp_fdb_lookup':
    /root/bpf-next/net/core/filter.c:5108: undefined reference to `br_fdb_find_port_xdp'

It may be necessary to fix it to support kernels built with CONFIG_BRIDGE set
as a mfodule, so let me make sure if it should be called via netdev ops to get
destination port in a bridge again.

Thanks & Best regards,


—
Yoshiki Komachi
komachi.yoshiki@gmail.com
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 54d0c886e3ba..f2e729dd1721 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2149,6 +2149,22 @@  union bpf_attr {
  *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
  *		  packet is not forwarded or needs assist from full stack
  *
+ * long bpf_fdb_lookup(void *ctx, struct bpf_fdb_lookup *params, int plen, u32 flags)
+ *	Description
+ *		Do FDB lookup in kernel tables using parameters in *params*.
+ *		If lookup is successful (ie., FDB lookup finds a destination entry),
+ *		ifindex is set to the egress device index from the FDB lookup.
+ *		Both multicast and broadcast packets are currently unsupported
+ *		in XDP layer.
+ *
+ *		*plen* argument is the size of the passed **struct bpf_fdb_lookup**.
+ *		*ctx* is only **struct xdp_md** for XDP programs.
+ *
+ *     Return
+ *		* < 0 if any input argument is invalid
+ *		*   0 on success (destination port is found)
+ *		* > 0 on failure (there is no entry)
+ *
  * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
  *	Description
  *		Add an entry to, or update a sockhash *map* referencing sockets.
@@ -3449,6 +3465,7 @@  union bpf_attr {
 	FN(get_stack),			\
 	FN(skb_load_bytes_relative),	\
 	FN(fib_lookup),			\
+	FN(fdb_lookup),			\
 	FN(sock_hash_update),		\
 	FN(msg_redirect_hash),		\
 	FN(sk_redirect_hash),		\
@@ -4328,6 +4345,17 @@  struct bpf_fib_lookup {
 	__u8	dmac[6];     /* ETH_ALEN */
 };
 
+enum {
+	BPF_FDB_LKUP_RET_SUCCESS,      /* lookup successful */
+	BPF_FDB_LKUP_RET_NOENT,        /* entry is not found */
+};
+
+struct bpf_fdb_lookup {
+	unsigned char addr[6];     /* ETH_ALEN */
+	__u16 vlan_id;
+	__u32 ifindex;
+};
+
 enum bpf_task_fd_type {
 	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
 	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
diff --git a/net/core/filter.c b/net/core/filter.c
index 654c346b7d91..68800d1b8cd5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -45,6 +45,7 @@ 
 #include <linux/filter.h>
 #include <linux/ratelimit.h>
 #include <linux/seccomp.h>
+#include <linux/if_bridge.h>
 #include <linux/if_vlan.h>
 #include <linux/bpf.h>
 #include <linux/btf.h>
@@ -5084,6 +5085,46 @@  static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
 	.arg4_type	= ARG_ANYTHING,
 };
 
+#if IS_ENABLED(CONFIG_BRIDGE)
+BPF_CALL_4(bpf_xdp_fdb_lookup, struct xdp_buff *, ctx,
+	   struct bpf_fdb_lookup *, params, int, plen, u32, flags)
+{
+	struct net_device *src, *dst;
+	struct net *net;
+
+	if (plen < sizeof(*params))
+		return -EINVAL;
+
+	net = dev_net(ctx->rxq->dev);
+
+	if (is_multicast_ether_addr(params->addr) ||
+	    is_broadcast_ether_addr(params->addr))
+		return BPF_FDB_LKUP_RET_NOENT;
+
+	src = dev_get_by_index_rcu(net, params->ifindex);
+	if (unlikely(!src))
+		return -ENODEV;
+
+	dst = br_fdb_find_port_xdp(src, params->addr, params->vlan_id);
+	if (dst) {
+		params->ifindex = dst->ifindex;
+		return BPF_FDB_LKUP_RET_SUCCESS;
+	}
+
+	return BPF_FDB_LKUP_RET_NOENT;
+}
+
+static const struct bpf_func_proto bpf_xdp_fdb_lookup_proto = {
+	.func		= bpf_xdp_fdb_lookup,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_PTR_TO_MEM,
+	.arg3_type      = ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+};
+#endif
+
 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
 static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
 {
@@ -6477,6 +6518,10 @@  xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_adjust_tail_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_xdp_fib_lookup_proto;
+#if IS_ENABLED(CONFIG_BRIDGE)
+	case BPF_FUNC_fdb_lookup:
+		return &bpf_xdp_fdb_lookup_proto;
+#endif
 #ifdef CONFIG_INET
 	case BPF_FUNC_sk_lookup_udp:
 		return &bpf_xdp_sk_lookup_udp_proto;
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 5bfa448b4704..49ebd2273614 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -448,6 +448,7 @@  class PrinterHelpers(Printer):
             '__wsum',
 
             'struct bpf_fib_lookup',
+            'struct bpf_fdb_lookup',
             'struct bpf_perf_event_data',
             'struct bpf_perf_event_value',
             'struct bpf_pidns_info',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 54d0c886e3ba..f2e729dd1721 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2149,6 +2149,22 @@  union bpf_attr {
  *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
  *		  packet is not forwarded or needs assist from full stack
  *
+ * long bpf_fdb_lookup(void *ctx, struct bpf_fdb_lookup *params, int plen, u32 flags)
+ *	Description
+ *		Do FDB lookup in kernel tables using parameters in *params*.
+ *		If lookup is successful (ie., FDB lookup finds a destination entry),
+ *		ifindex is set to the egress device index from the FDB lookup.
+ *		Both multicast and broadcast packets are currently unsupported
+ *		in XDP layer.
+ *
+ *		*plen* argument is the size of the passed **struct bpf_fdb_lookup**.
+ *		*ctx* is only **struct xdp_md** for XDP programs.
+ *
+ *     Return
+ *		* < 0 if any input argument is invalid
+ *		*   0 on success (destination port is found)
+ *		* > 0 on failure (there is no entry)
+ *
  * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
  *	Description
  *		Add an entry to, or update a sockhash *map* referencing sockets.
@@ -3449,6 +3465,7 @@  union bpf_attr {
 	FN(get_stack),			\
 	FN(skb_load_bytes_relative),	\
 	FN(fib_lookup),			\
+	FN(fdb_lookup),			\
 	FN(sock_hash_update),		\
 	FN(msg_redirect_hash),		\
 	FN(sk_redirect_hash),		\
@@ -4328,6 +4345,17 @@  struct bpf_fib_lookup {
 	__u8	dmac[6];     /* ETH_ALEN */
 };
 
+enum {
+	BPF_FDB_LKUP_RET_SUCCESS,      /* lookup successful */
+	BPF_FDB_LKUP_RET_NOENT,        /* entry is not found */
+};
+
+struct bpf_fdb_lookup {
+	unsigned char addr[6];     /* ETH_ALEN */
+	__u16 vlan_id;
+	__u32 ifindex;
+};
+
 enum bpf_task_fd_type {
 	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
 	BPF_FD_TYPE_TRACEPOINT,		/* tp name */