Message ID | 20180517160930.25076-4-dsahern@gmail.com |
---|---|
State | Changes Requested, archived |
Delegated to: | BPF Maintainers |
Headers | show |
Series | bpf: Add MTU check to fib lookup helper | expand |
On 05/17/2018 06:09 PM, David Ahern wrote: > Add check that egress MTU can handle packet to be forwarded. If > the MTU is less than the packet lenght, return 0 meaning the > packet is expected to continue up the stack for help - eg., > fragmenting the packet or sending an ICMP. > > Signed-off-by: David Ahern <dsahern@gmail.com> > --- > net/core/filter.c | 10 ++++++++++ > 1 file changed, 10 insertions(+) > > diff --git a/net/core/filter.c b/net/core/filter.c > index 6d0d1560bd70..c47c47a75d4b 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -4098,6 +4098,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, > struct fib_nh *nh; > struct flowi4 fl4; > int err; > + u32 mtu; > > dev = dev_get_by_index_rcu(net, params->ifindex); > if (unlikely(!dev)) > @@ -4149,6 +4150,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, > if (res.fi->fib_nhs > 1) > fib_select_path(net, &res, &fl4, NULL); > > + mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); > + if (params->tot_len > mtu) > + return 0; > + > nh = &res.fi->fib_nh[res.nh_sel]; > > /* do not handle lwt encaps right now */ > @@ -4188,6 +4193,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, > struct flowi6 fl6; > int strict = 0; > int oif; > + u32 mtu; > > /* link local addresses are never forwarded */ > if (rt6_need_strict(dst) || rt6_need_strict(src)) > @@ -4250,6 +4256,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, > fl6.flowi6_oif, NULL, > strict); > > + mtu = ip6_mtu_from_fib6(f6i, dst, src); > + if (params->tot_len > mtu) > + return 0; > + > if (f6i->fib6_nh.nh_lwtstate) > return 0; Could you elaborate how this interacts in tc BPF use case where you have e.g. GSO packets and tot_len from aggregated packets would definitely be larger than MTU (e.g. see is_skb_forwardable() as one example on such checks)? Should this be an opt-in via a new flag for the helper? Thanks, Daniel
On 5/17/18 4:22 PM, Daniel Borkmann wrote: > On 05/17/2018 06:09 PM, David Ahern wrote: >> Add check that egress MTU can handle packet to be forwarded. If >> the MTU is less than the packet lenght, return 0 meaning the >> packet is expected to continue up the stack for help - eg., >> fragmenting the packet or sending an ICMP. >> >> Signed-off-by: David Ahern <dsahern@gmail.com> >> --- >> net/core/filter.c | 10 ++++++++++ >> 1 file changed, 10 insertions(+) >> >> diff --git a/net/core/filter.c b/net/core/filter.c >> index 6d0d1560bd70..c47c47a75d4b 100644 >> --- a/net/core/filter.c >> +++ b/net/core/filter.c >> @@ -4098,6 +4098,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, >> struct fib_nh *nh; >> struct flowi4 fl4; >> int err; >> + u32 mtu; >> >> dev = dev_get_by_index_rcu(net, params->ifindex); >> if (unlikely(!dev)) >> @@ -4149,6 +4150,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, >> if (res.fi->fib_nhs > 1) >> fib_select_path(net, &res, &fl4, NULL); >> >> + mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); >> + if (params->tot_len > mtu) >> + return 0; >> + >> nh = &res.fi->fib_nh[res.nh_sel]; >> >> /* do not handle lwt encaps right now */ >> @@ -4188,6 +4193,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, >> struct flowi6 fl6; >> int strict = 0; >> int oif; >> + u32 mtu; >> >> /* link local addresses are never forwarded */ >> if (rt6_need_strict(dst) || rt6_need_strict(src)) >> @@ -4250,6 +4256,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, >> fl6.flowi6_oif, NULL, >> strict); >> >> + mtu = ip6_mtu_from_fib6(f6i, dst, src); >> + if (params->tot_len > mtu) >> + return 0; >> + >> if (f6i->fib6_nh.nh_lwtstate) >> return 0; > > Could you elaborate how this interacts in tc BPF use case where you have e.g. > GSO packets and tot_len from aggregated packets would definitely be larger > than MTU (e.g. see is_skb_forwardable() as one example on such checks)? Should > this be an opt-in via a new flag for the helper? It should not be opt-in for XDP. I could add a flag to the internal call -- bpf_skb_fib_lookup sets the flag to skip the MTU check in bpf_ipv4_fib_lookup and bpf_ipv6_fib_lookup. For the skb case do you want bpf_skb_fib_lookup call is_skb_forwardable or leave that to the BPF program?
On 05/18/2018 02:34 AM, David Ahern wrote: > On 5/17/18 4:22 PM, Daniel Borkmann wrote: >> On 05/17/2018 06:09 PM, David Ahern wrote: >>> Add check that egress MTU can handle packet to be forwarded. If >>> the MTU is less than the packet lenght, return 0 meaning the >>> packet is expected to continue up the stack for help - eg., >>> fragmenting the packet or sending an ICMP. >>> >>> Signed-off-by: David Ahern <dsahern@gmail.com> >>> --- >>> net/core/filter.c | 10 ++++++++++ >>> 1 file changed, 10 insertions(+) >>> >>> diff --git a/net/core/filter.c b/net/core/filter.c >>> index 6d0d1560bd70..c47c47a75d4b 100644 >>> --- a/net/core/filter.c >>> +++ b/net/core/filter.c >>> @@ -4098,6 +4098,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, >>> struct fib_nh *nh; >>> struct flowi4 fl4; >>> int err; >>> + u32 mtu; >>> >>> dev = dev_get_by_index_rcu(net, params->ifindex); >>> if (unlikely(!dev)) >>> @@ -4149,6 +4150,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, >>> if (res.fi->fib_nhs > 1) >>> fib_select_path(net, &res, &fl4, NULL); >>> >>> + mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); >>> + if (params->tot_len > mtu) >>> + return 0; >>> + >>> nh = &res.fi->fib_nh[res.nh_sel]; >>> >>> /* do not handle lwt encaps right now */ >>> @@ -4188,6 +4193,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, >>> struct flowi6 fl6; >>> int strict = 0; >>> int oif; >>> + u32 mtu; >>> >>> /* link local addresses are never forwarded */ >>> if (rt6_need_strict(dst) || rt6_need_strict(src)) >>> @@ -4250,6 +4256,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, >>> fl6.flowi6_oif, NULL, >>> strict); >>> >>> + mtu = ip6_mtu_from_fib6(f6i, dst, src); >>> + if (params->tot_len > mtu) >>> + return 0; >>> + >>> if (f6i->fib6_nh.nh_lwtstate) >>> return 0; >> >> Could you elaborate how this interacts in tc BPF use case where you have e.g. >> GSO packets and tot_len from aggregated packets would definitely be larger >> than MTU (e.g. see is_skb_forwardable() as one example on such checks)? Should >> this be an opt-in via a new flag for the helper? > > It should not be opt-in for XDP. Yes, correct, for XDP it should not. > I could add a flag to the internal call -- bpf_skb_fib_lookup sets the > flag to skip the MTU check in bpf_ipv4_fib_lookup and bpf_ipv6_fib_lookup. > > For the skb case do you want bpf_skb_fib_lookup call is_skb_forwardable > or leave that to the BPF program? I think it probably makes sense to add an internal (unexposed) flag or bool where we propagate skb_is_gso(skb) from bpf_skb_fib_lookup() call-site and have similar logic where we first check this bool and if false do the MTU check (so it still can get enforced for control packets). Thus probably nothing of that implementation detail needs to be exposed to the program author.
Hi David,
I love your patch! Yet something to improve:
[auto build test ERROR on bpf-next/master]
url: https://github.com/0day-ci/linux/commits/David-Ahern/bpf-Add-MTU-check-to-fib-lookup-helper/20180520-103417
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: ia64-allmodconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 7.2.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=ia64
All errors (new ones prefixed by >>):
net/core/filter.o: In function `bpf_ipv6_fib_lookup':
>> filter.c:(.text+0x12072): undefined reference to `ip6_mtu_from_fib6'
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi David, I love your patch! Yet something to improve: [auto build test ERROR on bpf-next/master] url: https://github.com/0day-ci/linux/commits/David-Ahern/bpf-Add-MTU-check-to-fib-lookup-helper/20180520-103417 base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master config: x86_64-randconfig-ws0-05200859 (attached as .config) compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 All errors (new ones prefixed by >>): net/core/filter.o: In function `bpf_ipv6_fib_lookup': >> net/core/filter.c:4259: undefined reference to `ip6_mtu_from_fib6' vim +4259 net/core/filter.c 4182 4183 #if IS_ENABLED(CONFIG_IPV6) 4184 static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, 4185 u32 flags) 4186 { 4187 struct in6_addr *src = (struct in6_addr *) params->ipv6_src; 4188 struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst; 4189 struct neighbour *neigh; 4190 struct net_device *dev; 4191 struct inet6_dev *idev; 4192 struct fib6_info *f6i; 4193 struct flowi6 fl6; 4194 int strict = 0; 4195 int oif; 4196 u32 mtu; 4197 4198 /* link local addresses are never forwarded */ 4199 if (rt6_need_strict(dst) || rt6_need_strict(src)) 4200 return 0; 4201 4202 dev = dev_get_by_index_rcu(net, params->ifindex); 4203 if (unlikely(!dev)) 4204 return -ENODEV; 4205 4206 idev = __in6_dev_get_safely(dev); 4207 if (unlikely(!idev || !net->ipv6.devconf_all->forwarding)) 4208 return 0; 4209 4210 if (flags & BPF_FIB_LOOKUP_OUTPUT) { 4211 fl6.flowi6_iif = 1; 4212 oif = fl6.flowi6_oif = params->ifindex; 4213 } else { 4214 oif = fl6.flowi6_iif = params->ifindex; 4215 fl6.flowi6_oif = 0; 4216 strict = RT6_LOOKUP_F_HAS_SADDR; 4217 } 4218 fl6.flowlabel = params->flowlabel; 4219 fl6.flowi6_scope = 0; 4220 fl6.flowi6_flags = 0; 4221 fl6.mp_hash = 0; 4222 4223 fl6.flowi6_proto = params->l4_protocol; 4224 fl6.daddr = *dst; 4225 fl6.saddr = *src; 4226 fl6.fl6_sport = params->sport; 4227 fl6.fl6_dport = params->dport; 4228 4229 if (flags & BPF_FIB_LOOKUP_DIRECT) { 4230 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; 4231 struct fib6_table *tb; 4232 4233 tb = ipv6_stub->fib6_get_table(net, tbid); 4234 if (unlikely(!tb)) 4235 return 0; 4236 4237 f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict); 4238 } else { 4239 fl6.flowi6_mark = 0; 4240 fl6.flowi6_secid = 0; 4241 fl6.flowi6_tun_key.tun_id = 0; 4242 fl6.flowi6_uid = sock_net_uid(net, NULL); 4243 4244 f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict); 4245 } 4246 4247 if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry)) 4248 return 0; 4249 4250 if (unlikely(f6i->fib6_flags & RTF_REJECT || 4251 f6i->fib6_type != RTN_UNICAST)) 4252 return 0; 4253 4254 if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0) 4255 f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6, 4256 fl6.flowi6_oif, NULL, 4257 strict); 4258 > 4259 mtu = ip6_mtu_from_fib6(f6i, dst, src); 4260 if (params->tot_len > mtu) 4261 return 0; 4262 4263 if (f6i->fib6_nh.nh_lwtstate) 4264 return 0; 4265 4266 if (f6i->fib6_flags & RTF_GATEWAY) 4267 *dst = f6i->fib6_nh.nh_gw; 4268 4269 dev = f6i->fib6_nh.nh_dev; 4270 params->rt_metric = f6i->fib6_metric; 4271 4272 /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is 4273 * not needed here. Can not use __ipv6_neigh_lookup_noref here 4274 * because we need to get nd_tbl via the stub 4275 */ 4276 neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, 4277 ndisc_hashfn, dst, dev); 4278 if (neigh) 4279 return bpf_fib_set_fwd_params(params, neigh, dev); 4280 4281 return 0; 4282 } 4283 #endif 4284 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
diff --git a/net/core/filter.c b/net/core/filter.c index 6d0d1560bd70..c47c47a75d4b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4098,6 +4098,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, struct fib_nh *nh; struct flowi4 fl4; int err; + u32 mtu; dev = dev_get_by_index_rcu(net, params->ifindex); if (unlikely(!dev)) @@ -4149,6 +4150,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (res.fi->fib_nhs > 1) fib_select_path(net, &res, &fl4, NULL); + mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); + if (params->tot_len > mtu) + return 0; + nh = &res.fi->fib_nh[res.nh_sel]; /* do not handle lwt encaps right now */ @@ -4188,6 +4193,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, struct flowi6 fl6; int strict = 0; int oif; + u32 mtu; /* link local addresses are never forwarded */ if (rt6_need_strict(dst) || rt6_need_strict(src)) @@ -4250,6 +4256,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, fl6.flowi6_oif, NULL, strict); + mtu = ip6_mtu_from_fib6(f6i, dst, src); + if (params->tot_len > mtu) + return 0; + if (f6i->fib6_nh.nh_lwtstate) return 0;
Add check that egress MTU can handle packet to be forwarded. If the MTU is less than the packet lenght, return 0 meaning the packet is expected to continue up the stack for help - eg., fragmenting the packet or sending an ICMP. Signed-off-by: David Ahern <dsahern@gmail.com> --- net/core/filter.c | 10 ++++++++++ 1 file changed, 10 insertions(+)