diff mbox series

[bpf-next,3/3] bpf: Add mtu checking to FIB forwarding helper

Message ID 20180517160930.25076-4-dsahern@gmail.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series bpf: Add MTU check to fib lookup helper | expand

Commit Message

David Ahern May 17, 2018, 4:09 p.m. UTC
Add check that egress MTU can handle packet to be forwarded. If
the MTU is less than the packet lenght, return 0 meaning the
packet is expected to continue up the stack for help - eg.,
fragmenting the packet or sending an ICMP.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/core/filter.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

Comments

Daniel Borkmann May 17, 2018, 10:22 p.m. UTC | #1
On 05/17/2018 06:09 PM, David Ahern wrote:
> Add check that egress MTU can handle packet to be forwarded. If
> the MTU is less than the packet lenght, return 0 meaning the
> packet is expected to continue up the stack for help - eg.,
> fragmenting the packet or sending an ICMP.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>
> ---
>  net/core/filter.c | 10 ++++++++++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 6d0d1560bd70..c47c47a75d4b 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -4098,6 +4098,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>  	struct fib_nh *nh;
>  	struct flowi4 fl4;
>  	int err;
> +	u32 mtu;
>  
>  	dev = dev_get_by_index_rcu(net, params->ifindex);
>  	if (unlikely(!dev))
> @@ -4149,6 +4150,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>  	if (res.fi->fib_nhs > 1)
>  		fib_select_path(net, &res, &fl4, NULL);
>  
> +	mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
> +	if (params->tot_len > mtu)
> +		return 0;
> +
>  	nh = &res.fi->fib_nh[res.nh_sel];
>  
>  	/* do not handle lwt encaps right now */
> @@ -4188,6 +4193,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>  	struct flowi6 fl6;
>  	int strict = 0;
>  	int oif;
> +	u32 mtu;
>  
>  	/* link local addresses are never forwarded */
>  	if (rt6_need_strict(dst) || rt6_need_strict(src))
> @@ -4250,6 +4256,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>  						       fl6.flowi6_oif, NULL,
>  						       strict);
>  
> +	mtu = ip6_mtu_from_fib6(f6i, dst, src);
> +	if (params->tot_len > mtu)
> +		return 0;
> +
>  	if (f6i->fib6_nh.nh_lwtstate)
>  		return 0;

Could you elaborate how this interacts in tc BPF use case where you have e.g.
GSO packets and tot_len from aggregated packets would definitely be larger
than MTU (e.g. see is_skb_forwardable() as one example on such checks)? Should
this be an opt-in via a new flag for the helper?

Thanks,
Daniel
David Ahern May 18, 2018, 12:34 a.m. UTC | #2
On 5/17/18 4:22 PM, Daniel Borkmann wrote:
> On 05/17/2018 06:09 PM, David Ahern wrote:
>> Add check that egress MTU can handle packet to be forwarded. If
>> the MTU is less than the packet lenght, return 0 meaning the
>> packet is expected to continue up the stack for help - eg.,
>> fragmenting the packet or sending an ICMP.
>>
>> Signed-off-by: David Ahern <dsahern@gmail.com>
>> ---
>>  net/core/filter.c | 10 ++++++++++
>>  1 file changed, 10 insertions(+)
>>
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 6d0d1560bd70..c47c47a75d4b 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -4098,6 +4098,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>>  	struct fib_nh *nh;
>>  	struct flowi4 fl4;
>>  	int err;
>> +	u32 mtu;
>>  
>>  	dev = dev_get_by_index_rcu(net, params->ifindex);
>>  	if (unlikely(!dev))
>> @@ -4149,6 +4150,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>>  	if (res.fi->fib_nhs > 1)
>>  		fib_select_path(net, &res, &fl4, NULL);
>>  
>> +	mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
>> +	if (params->tot_len > mtu)
>> +		return 0;
>> +
>>  	nh = &res.fi->fib_nh[res.nh_sel];
>>  
>>  	/* do not handle lwt encaps right now */
>> @@ -4188,6 +4193,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>>  	struct flowi6 fl6;
>>  	int strict = 0;
>>  	int oif;
>> +	u32 mtu;
>>  
>>  	/* link local addresses are never forwarded */
>>  	if (rt6_need_strict(dst) || rt6_need_strict(src))
>> @@ -4250,6 +4256,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>>  						       fl6.flowi6_oif, NULL,
>>  						       strict);
>>  
>> +	mtu = ip6_mtu_from_fib6(f6i, dst, src);
>> +	if (params->tot_len > mtu)
>> +		return 0;
>> +
>>  	if (f6i->fib6_nh.nh_lwtstate)
>>  		return 0;
> 
> Could you elaborate how this interacts in tc BPF use case where you have e.g.
> GSO packets and tot_len from aggregated packets would definitely be larger
> than MTU (e.g. see is_skb_forwardable() as one example on such checks)? Should
> this be an opt-in via a new flag for the helper?

It should not be opt-in for XDP.

I could add a flag to the internal call -- bpf_skb_fib_lookup sets the
flag to skip the MTU check in bpf_ipv4_fib_lookup and bpf_ipv6_fib_lookup.

For the skb case do you want bpf_skb_fib_lookup call is_skb_forwardable
or leave that to the BPF program?
Daniel Borkmann May 18, 2018, 2:01 p.m. UTC | #3
On 05/18/2018 02:34 AM, David Ahern wrote:
> On 5/17/18 4:22 PM, Daniel Borkmann wrote:
>> On 05/17/2018 06:09 PM, David Ahern wrote:
>>> Add check that egress MTU can handle packet to be forwarded. If
>>> the MTU is less than the packet lenght, return 0 meaning the
>>> packet is expected to continue up the stack for help - eg.,
>>> fragmenting the packet or sending an ICMP.
>>>
>>> Signed-off-by: David Ahern <dsahern@gmail.com>
>>> ---
>>>  net/core/filter.c | 10 ++++++++++
>>>  1 file changed, 10 insertions(+)
>>>
>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>> index 6d0d1560bd70..c47c47a75d4b 100644
>>> --- a/net/core/filter.c
>>> +++ b/net/core/filter.c
>>> @@ -4098,6 +4098,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>>>  	struct fib_nh *nh;
>>>  	struct flowi4 fl4;
>>>  	int err;
>>> +	u32 mtu;
>>>  
>>>  	dev = dev_get_by_index_rcu(net, params->ifindex);
>>>  	if (unlikely(!dev))
>>> @@ -4149,6 +4150,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>>>  	if (res.fi->fib_nhs > 1)
>>>  		fib_select_path(net, &res, &fl4, NULL);
>>>  
>>> +	mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
>>> +	if (params->tot_len > mtu)
>>> +		return 0;
>>> +
>>>  	nh = &res.fi->fib_nh[res.nh_sel];
>>>  
>>>  	/* do not handle lwt encaps right now */
>>> @@ -4188,6 +4193,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>>>  	struct flowi6 fl6;
>>>  	int strict = 0;
>>>  	int oif;
>>> +	u32 mtu;
>>>  
>>>  	/* link local addresses are never forwarded */
>>>  	if (rt6_need_strict(dst) || rt6_need_strict(src))
>>> @@ -4250,6 +4256,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>>>  						       fl6.flowi6_oif, NULL,
>>>  						       strict);
>>>  
>>> +	mtu = ip6_mtu_from_fib6(f6i, dst, src);
>>> +	if (params->tot_len > mtu)
>>> +		return 0;
>>> +
>>>  	if (f6i->fib6_nh.nh_lwtstate)
>>>  		return 0;
>>
>> Could you elaborate how this interacts in tc BPF use case where you have e.g.
>> GSO packets and tot_len from aggregated packets would definitely be larger
>> than MTU (e.g. see is_skb_forwardable() as one example on such checks)? Should
>> this be an opt-in via a new flag for the helper?
> 
> It should not be opt-in for XDP.

Yes, correct, for XDP it should not.

> I could add a flag to the internal call -- bpf_skb_fib_lookup sets the
> flag to skip the MTU check in bpf_ipv4_fib_lookup and bpf_ipv6_fib_lookup.
> 
> For the skb case do you want bpf_skb_fib_lookup call is_skb_forwardable
> or leave that to the BPF program?

I think it probably makes sense to add an internal (unexposed) flag or bool
where we propagate skb_is_gso(skb) from bpf_skb_fib_lookup() call-site and
have similar logic where we first check this bool and if false do the MTU
check (so it still can get enforced for control packets). Thus probably nothing
of that implementation detail needs to be exposed to the program author.
kernel test robot May 20, 2018, 6:41 a.m. UTC | #4
Hi David,

I love your patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/David-Ahern/bpf-Add-MTU-check-to-fib-lookup-helper/20180520-103417
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: ia64-allmodconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=ia64 

All errors (new ones prefixed by >>):

   net/core/filter.o: In function `bpf_ipv6_fib_lookup':
>> filter.c:(.text+0x12072): undefined reference to `ip6_mtu_from_fib6'

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot May 20, 2018, 11:14 a.m. UTC | #5
Hi David,

I love your patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/David-Ahern/bpf-Add-MTU-check-to-fib-lookup-helper/20180520-103417
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: x86_64-randconfig-ws0-05200859 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   net/core/filter.o: In function `bpf_ipv6_fib_lookup':
>> net/core/filter.c:4259: undefined reference to `ip6_mtu_from_fib6'

vim +4259 net/core/filter.c

  4182	
  4183	#if IS_ENABLED(CONFIG_IPV6)
  4184	static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
  4185				       u32 flags)
  4186	{
  4187		struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
  4188		struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
  4189		struct neighbour *neigh;
  4190		struct net_device *dev;
  4191		struct inet6_dev *idev;
  4192		struct fib6_info *f6i;
  4193		struct flowi6 fl6;
  4194		int strict = 0;
  4195		int oif;
  4196		u32 mtu;
  4197	
  4198		/* link local addresses are never forwarded */
  4199		if (rt6_need_strict(dst) || rt6_need_strict(src))
  4200			return 0;
  4201	
  4202		dev = dev_get_by_index_rcu(net, params->ifindex);
  4203		if (unlikely(!dev))
  4204			return -ENODEV;
  4205	
  4206		idev = __in6_dev_get_safely(dev);
  4207		if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
  4208			return 0;
  4209	
  4210		if (flags & BPF_FIB_LOOKUP_OUTPUT) {
  4211			fl6.flowi6_iif = 1;
  4212			oif = fl6.flowi6_oif = params->ifindex;
  4213		} else {
  4214			oif = fl6.flowi6_iif = params->ifindex;
  4215			fl6.flowi6_oif = 0;
  4216			strict = RT6_LOOKUP_F_HAS_SADDR;
  4217		}
  4218		fl6.flowlabel = params->flowlabel;
  4219		fl6.flowi6_scope = 0;
  4220		fl6.flowi6_flags = 0;
  4221		fl6.mp_hash = 0;
  4222	
  4223		fl6.flowi6_proto = params->l4_protocol;
  4224		fl6.daddr = *dst;
  4225		fl6.saddr = *src;
  4226		fl6.fl6_sport = params->sport;
  4227		fl6.fl6_dport = params->dport;
  4228	
  4229		if (flags & BPF_FIB_LOOKUP_DIRECT) {
  4230			u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
  4231			struct fib6_table *tb;
  4232	
  4233			tb = ipv6_stub->fib6_get_table(net, tbid);
  4234			if (unlikely(!tb))
  4235				return 0;
  4236	
  4237			f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
  4238		} else {
  4239			fl6.flowi6_mark = 0;
  4240			fl6.flowi6_secid = 0;
  4241			fl6.flowi6_tun_key.tun_id = 0;
  4242			fl6.flowi6_uid = sock_net_uid(net, NULL);
  4243	
  4244			f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
  4245		}
  4246	
  4247		if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
  4248			return 0;
  4249	
  4250		if (unlikely(f6i->fib6_flags & RTF_REJECT ||
  4251		    f6i->fib6_type != RTN_UNICAST))
  4252			return 0;
  4253	
  4254		if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
  4255			f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
  4256							       fl6.flowi6_oif, NULL,
  4257							       strict);
  4258	
> 4259		mtu = ip6_mtu_from_fib6(f6i, dst, src);
  4260		if (params->tot_len > mtu)
  4261			return 0;
  4262	
  4263		if (f6i->fib6_nh.nh_lwtstate)
  4264			return 0;
  4265	
  4266		if (f6i->fib6_flags & RTF_GATEWAY)
  4267			*dst = f6i->fib6_nh.nh_gw;
  4268	
  4269		dev = f6i->fib6_nh.nh_dev;
  4270		params->rt_metric = f6i->fib6_metric;
  4271	
  4272		/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
  4273		 * not needed here. Can not use __ipv6_neigh_lookup_noref here
  4274		 * because we need to get nd_tbl via the stub
  4275		 */
  4276		neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
  4277					      ndisc_hashfn, dst, dev);
  4278		if (neigh)
  4279			return bpf_fib_set_fwd_params(params, neigh, dev);
  4280	
  4281		return 0;
  4282	}
  4283	#endif
  4284	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox series

Patch

diff --git a/net/core/filter.c b/net/core/filter.c
index 6d0d1560bd70..c47c47a75d4b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4098,6 +4098,7 @@  static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	struct fib_nh *nh;
 	struct flowi4 fl4;
 	int err;
+	u32 mtu;
 
 	dev = dev_get_by_index_rcu(net, params->ifindex);
 	if (unlikely(!dev))
@@ -4149,6 +4150,10 @@  static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	if (res.fi->fib_nhs > 1)
 		fib_select_path(net, &res, &fl4, NULL);
 
+	mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
+	if (params->tot_len > mtu)
+		return 0;
+
 	nh = &res.fi->fib_nh[res.nh_sel];
 
 	/* do not handle lwt encaps right now */
@@ -4188,6 +4193,7 @@  static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	struct flowi6 fl6;
 	int strict = 0;
 	int oif;
+	u32 mtu;
 
 	/* link local addresses are never forwarded */
 	if (rt6_need_strict(dst) || rt6_need_strict(src))
@@ -4250,6 +4256,10 @@  static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 						       fl6.flowi6_oif, NULL,
 						       strict);
 
+	mtu = ip6_mtu_from_fib6(f6i, dst, src);
+	if (params->tot_len > mtu)
+		return 0;
+
 	if (f6i->fib6_nh.nh_lwtstate)
 		return 0;