diff mbox series

[bpf-next,V4,2/5] bpf: bpf_fib_lookup return MTU value as output when looked up

Message ID 160381601522.1435097.11103677488984953095.stgit@firesoul
State Not Applicable
Delegated to: BPF Maintainers
Headers show
Series bpf: New approach for BPF MTU handling | expand

Checks

Context Check Description
jkicinski/cover_letter success Link
jkicinski/fixes_present success Link
jkicinski/patch_count success Link
jkicinski/tree_selection success Clearly marked for bpf-next
jkicinski/subject_prefix success Link
jkicinski/source_inline success Was 0 now: 0
jkicinski/verify_signedoff success Link
jkicinski/module_param success Was 0 now: 0
jkicinski/build_32bit fail Errors and warnings before: 15999 this patch: 15999
jkicinski/kdoc success Errors and warnings before: 0 this patch: 0
jkicinski/verify_fixes success Link
jkicinski/checkpatch fail Link
jkicinski/build_allmodconfig_warn success Errors and warnings before: 16031 this patch: 16031
jkicinski/header_inline success Link
jkicinski/stable success Stable not CCed

Commit Message

Jesper Dangaard Brouer Oct. 27, 2020, 4:26 p.m. UTC
The BPF-helpers for FIB lookup (bpf_xdp_fib_lookup and bpf_skb_fib_lookup)
can perform MTU check and return BPF_FIB_LKUP_RET_FRAG_NEEDED.  The BPF-prog
don't know the MTU value that caused this rejection.

If the BPF-prog wants to implement PMTU (Path MTU Discovery) (rfc1191) it
need to know this MTU value for the ICMP packet.

Patch change lookup and result struct bpf_fib_lookup, to contain this MTU
value as output via a union with 'tot_len' as this is the value used for
the MTU lookup.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/uapi/linux/bpf.h       |   11 +++++++++--
 net/core/filter.c              |   17 ++++++++++++-----
 tools/include/uapi/linux/bpf.h |   11 +++++++++--
 3 files changed, 30 insertions(+), 9 deletions(-)

Comments

David Ahern Oct. 27, 2020, 5:15 p.m. UTC | #1
On 10/27/20 10:26 AM, Jesper Dangaard Brouer wrote:
> The BPF-helpers for FIB lookup (bpf_xdp_fib_lookup and bpf_skb_fib_lookup)
> can perform MTU check and return BPF_FIB_LKUP_RET_FRAG_NEEDED.  The BPF-prog
> don't know the MTU value that caused this rejection.
> 
> If the BPF-prog wants to implement PMTU (Path MTU Discovery) (rfc1191) it
> need to know this MTU value for the ICMP packet.
> 
> Patch change lookup and result struct bpf_fib_lookup, to contain this MTU
> value as output via a union with 'tot_len' as this is the value used for
> the MTU lookup.
> 
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> ---
>  include/uapi/linux/bpf.h       |   11 +++++++++--
>  net/core/filter.c              |   17 ++++++++++++-----
>  tools/include/uapi/linux/bpf.h |   11 +++++++++--
>  3 files changed, 30 insertions(+), 9 deletions(-)
> 


Reviewed-by: David Ahern <dsahern@kernel.org>
Dan Carpenter Oct. 28, 2020, 12:49 p.m. UTC | #2
Hi Jesper,

url:    https://github.com/0day-ci/linux/commits/Jesper-Dangaard-Brouer/bpf-New-approach-for-BPF-MTU-handling/20201028-002919
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git master
config: i386-randconfig-m021-20201026 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>

New smatch warnings:
net/core/filter.c:5395 bpf_ipv4_fib_lookup() error: uninitialized symbol 'mtu'.

vim +/mtu +5395 net/core/filter.c

87f5fc7e48dd317 David Ahern            2018-05-09  5281  static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4f74fede40df8db David Ahern            2018-05-21  5282  			       u32 flags, bool check_mtu)
87f5fc7e48dd317 David Ahern            2018-05-09  5283  {
eba618abacade71 David Ahern            2019-04-02  5284  	struct fib_nh_common *nhc;
87f5fc7e48dd317 David Ahern            2018-05-09  5285  	struct in_device *in_dev;
87f5fc7e48dd317 David Ahern            2018-05-09  5286  	struct neighbour *neigh;
87f5fc7e48dd317 David Ahern            2018-05-09  5287  	struct net_device *dev;
87f5fc7e48dd317 David Ahern            2018-05-09  5288  	struct fib_result res;
87f5fc7e48dd317 David Ahern            2018-05-09  5289  	struct flowi4 fl4;
87f5fc7e48dd317 David Ahern            2018-05-09  5290  	int err;
4f74fede40df8db David Ahern            2018-05-21  5291  	u32 mtu;
                                                                ^^^^^^^^

87f5fc7e48dd317 David Ahern            2018-05-09  5292  
87f5fc7e48dd317 David Ahern            2018-05-09  5293  	dev = dev_get_by_index_rcu(net, params->ifindex);
87f5fc7e48dd317 David Ahern            2018-05-09  5294  	if (unlikely(!dev))
87f5fc7e48dd317 David Ahern            2018-05-09  5295  		return -ENODEV;
87f5fc7e48dd317 David Ahern            2018-05-09  5296  
87f5fc7e48dd317 David Ahern            2018-05-09  5297  	/* verify forwarding is enabled on this interface */
87f5fc7e48dd317 David Ahern            2018-05-09  5298  	in_dev = __in_dev_get_rcu(dev);
87f5fc7e48dd317 David Ahern            2018-05-09  5299  	if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
4c79579b44b1876 David Ahern            2018-06-26  5300  		return BPF_FIB_LKUP_RET_FWD_DISABLED;
87f5fc7e48dd317 David Ahern            2018-05-09  5301  
87f5fc7e48dd317 David Ahern            2018-05-09  5302  	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
87f5fc7e48dd317 David Ahern            2018-05-09  5303  		fl4.flowi4_iif = 1;
87f5fc7e48dd317 David Ahern            2018-05-09  5304  		fl4.flowi4_oif = params->ifindex;
87f5fc7e48dd317 David Ahern            2018-05-09  5305  	} else {
87f5fc7e48dd317 David Ahern            2018-05-09  5306  		fl4.flowi4_iif = params->ifindex;
87f5fc7e48dd317 David Ahern            2018-05-09  5307  		fl4.flowi4_oif = 0;
87f5fc7e48dd317 David Ahern            2018-05-09  5308  	}
87f5fc7e48dd317 David Ahern            2018-05-09  5309  	fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
87f5fc7e48dd317 David Ahern            2018-05-09  5310  	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
87f5fc7e48dd317 David Ahern            2018-05-09  5311  	fl4.flowi4_flags = 0;
87f5fc7e48dd317 David Ahern            2018-05-09  5312  
87f5fc7e48dd317 David Ahern            2018-05-09  5313  	fl4.flowi4_proto = params->l4_protocol;
87f5fc7e48dd317 David Ahern            2018-05-09  5314  	fl4.daddr = params->ipv4_dst;
87f5fc7e48dd317 David Ahern            2018-05-09  5315  	fl4.saddr = params->ipv4_src;
87f5fc7e48dd317 David Ahern            2018-05-09  5316  	fl4.fl4_sport = params->sport;
87f5fc7e48dd317 David Ahern            2018-05-09  5317  	fl4.fl4_dport = params->dport;
1869e226a7b3ef7 David Ahern            2020-09-13  5318  	fl4.flowi4_multipath_hash = 0;
87f5fc7e48dd317 David Ahern            2018-05-09  5319  
87f5fc7e48dd317 David Ahern            2018-05-09  5320  	if (flags & BPF_FIB_LOOKUP_DIRECT) {
87f5fc7e48dd317 David Ahern            2018-05-09  5321  		u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
87f5fc7e48dd317 David Ahern            2018-05-09  5322  		struct fib_table *tb;
87f5fc7e48dd317 David Ahern            2018-05-09  5323  
87f5fc7e48dd317 David Ahern            2018-05-09  5324  		tb = fib_get_table(net, tbid);
87f5fc7e48dd317 David Ahern            2018-05-09  5325  		if (unlikely(!tb))
4c79579b44b1876 David Ahern            2018-06-26  5326  			return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e48dd317 David Ahern            2018-05-09  5327  
87f5fc7e48dd317 David Ahern            2018-05-09  5328  		err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
87f5fc7e48dd317 David Ahern            2018-05-09  5329  	} else {
87f5fc7e48dd317 David Ahern            2018-05-09  5330  		fl4.flowi4_mark = 0;
87f5fc7e48dd317 David Ahern            2018-05-09  5331  		fl4.flowi4_secid = 0;
87f5fc7e48dd317 David Ahern            2018-05-09  5332  		fl4.flowi4_tun_key.tun_id = 0;
87f5fc7e48dd317 David Ahern            2018-05-09  5333  		fl4.flowi4_uid = sock_net_uid(net, NULL);
87f5fc7e48dd317 David Ahern            2018-05-09  5334  
87f5fc7e48dd317 David Ahern            2018-05-09  5335  		err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
87f5fc7e48dd317 David Ahern            2018-05-09  5336  	}
87f5fc7e48dd317 David Ahern            2018-05-09  5337  
4c79579b44b1876 David Ahern            2018-06-26  5338  	if (err) {
4c79579b44b1876 David Ahern            2018-06-26  5339  		/* map fib lookup errors to RTN_ type */
4c79579b44b1876 David Ahern            2018-06-26  5340  		if (err == -EINVAL)
4c79579b44b1876 David Ahern            2018-06-26  5341  			return BPF_FIB_LKUP_RET_BLACKHOLE;
4c79579b44b1876 David Ahern            2018-06-26  5342  		if (err == -EHOSTUNREACH)
4c79579b44b1876 David Ahern            2018-06-26  5343  			return BPF_FIB_LKUP_RET_UNREACHABLE;
4c79579b44b1876 David Ahern            2018-06-26  5344  		if (err == -EACCES)
4c79579b44b1876 David Ahern            2018-06-26  5345  			return BPF_FIB_LKUP_RET_PROHIBIT;
4c79579b44b1876 David Ahern            2018-06-26  5346  
4c79579b44b1876 David Ahern            2018-06-26  5347  		return BPF_FIB_LKUP_RET_NOT_FWDED;
4c79579b44b1876 David Ahern            2018-06-26  5348  	}
4c79579b44b1876 David Ahern            2018-06-26  5349  
4c79579b44b1876 David Ahern            2018-06-26  5350  	if (res.type != RTN_UNICAST)
4c79579b44b1876 David Ahern            2018-06-26  5351  		return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e48dd317 David Ahern            2018-05-09  5352  
5481d73f81549e2 David Ahern            2019-06-03  5353  	if (fib_info_num_path(res.fi) > 1)
87f5fc7e48dd317 David Ahern            2018-05-09  5354  		fib_select_path(net, &res, &fl4, NULL);
87f5fc7e48dd317 David Ahern            2018-05-09  5355  
4f74fede40df8db David Ahern            2018-05-21  5356  	if (check_mtu) {
4f74fede40df8db David Ahern            2018-05-21  5357  		mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
88ffc2c2e37ebb3 Jesper Dangaard Brouer 2020-10-27  5358  		if (params->tot_len > mtu) {
88ffc2c2e37ebb3 Jesper Dangaard Brouer 2020-10-27  5359  			params->mtu = mtu; /* union with tot_len */
4c79579b44b1876 David Ahern            2018-06-26  5360  			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
4f74fede40df8db David Ahern            2018-05-21  5361  		}
88ffc2c2e37ebb3 Jesper Dangaard Brouer 2020-10-27  5362  	}

"mtu" not initialized on else path.

4f74fede40df8db David Ahern            2018-05-21  5363  
eba618abacade71 David Ahern            2019-04-02  5364  	nhc = res.nhc;
87f5fc7e48dd317 David Ahern            2018-05-09  5365  
87f5fc7e48dd317 David Ahern            2018-05-09  5366  	/* do not handle lwt encaps right now */
eba618abacade71 David Ahern            2019-04-02  5367  	if (nhc->nhc_lwtstate)
4c79579b44b1876 David Ahern            2018-06-26  5368  		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
87f5fc7e48dd317 David Ahern            2018-05-09  5369  
eba618abacade71 David Ahern            2019-04-02  5370  	dev = nhc->nhc_dev;
87f5fc7e48dd317 David Ahern            2018-05-09  5371  
87f5fc7e48dd317 David Ahern            2018-05-09  5372  	params->rt_metric = res.fi->fib_priority;
d1c362e1dd68a42 Toke Høiland-Jørgensen 2020-10-09  5373  	params->ifindex = dev->ifindex;
87f5fc7e48dd317 David Ahern            2018-05-09  5374  
87f5fc7e48dd317 David Ahern            2018-05-09  5375  	/* xdp and cls_bpf programs are run in RCU-bh so
87f5fc7e48dd317 David Ahern            2018-05-09  5376  	 * rcu_read_lock_bh is not needed here
87f5fc7e48dd317 David Ahern            2018-05-09  5377  	 */
6f5f68d05ec0f64 David Ahern            2019-04-05  5378  	if (likely(nhc->nhc_gw_family != AF_INET6)) {
6f5f68d05ec0f64 David Ahern            2019-04-05  5379  		if (nhc->nhc_gw_family)
6f5f68d05ec0f64 David Ahern            2019-04-05  5380  			params->ipv4_dst = nhc->nhc_gw.ipv4;
6f5f68d05ec0f64 David Ahern            2019-04-05  5381  
6f5f68d05ec0f64 David Ahern            2019-04-05  5382  		neigh = __ipv4_neigh_lookup_noref(dev,
6f5f68d05ec0f64 David Ahern            2019-04-05  5383  						 (__force u32)params->ipv4_dst);
6f5f68d05ec0f64 David Ahern            2019-04-05  5384  	} else {
6f5f68d05ec0f64 David Ahern            2019-04-05  5385  		struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
6f5f68d05ec0f64 David Ahern            2019-04-05  5386  
6f5f68d05ec0f64 David Ahern            2019-04-05  5387  		params->family = AF_INET6;
6f5f68d05ec0f64 David Ahern            2019-04-05  5388  		*dst = nhc->nhc_gw.ipv6;
6f5f68d05ec0f64 David Ahern            2019-04-05  5389  		neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
6f5f68d05ec0f64 David Ahern            2019-04-05  5390  	}
6f5f68d05ec0f64 David Ahern            2019-04-05  5391  
4c79579b44b1876 David Ahern            2018-06-26  5392  	if (!neigh)
4c79579b44b1876 David Ahern            2018-06-26  5393  		return BPF_FIB_LKUP_RET_NO_NEIGH;
87f5fc7e48dd317 David Ahern            2018-05-09  5394  
88ffc2c2e37ebb3 Jesper Dangaard Brouer 2020-10-27 @5395  	return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
                                                                                                                  ^^^
Uninitialized variable warning.

87f5fc7e48dd317 David Ahern            2018-05-09  5396  }

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Jesper Dangaard Brouer Oct. 30, 2020, 2:35 p.m. UTC | #3
On Wed, 28 Oct 2020 15:49:42 +0300
Dan Carpenter <dan.carpenter@oracle.com> wrote:

> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
> 
> New smatch warnings:
> net/core/filter.c:5395 bpf_ipv4_fib_lookup() error: uninitialized symbol 'mtu'.

I will fix and send V5.
Jesper Dangaard Brouer Oct. 30, 2020, 5:01 p.m. UTC | #4
On Tue, 27 Oct 2020 11:15:31 -0600
David Ahern <dsahern@gmail.com> wrote:

> On 10/27/20 10:26 AM, Jesper Dangaard Brouer wrote:
> > The BPF-helpers for FIB lookup (bpf_xdp_fib_lookup and bpf_skb_fib_lookup)
> > can perform MTU check and return BPF_FIB_LKUP_RET_FRAG_NEEDED.  The BPF-prog
> > don't know the MTU value that caused this rejection.
> > 
> > If the BPF-prog wants to implement PMTU (Path MTU Discovery) (rfc1191) it
> > need to know this MTU value for the ICMP packet.
> > 
> > Patch change lookup and result struct bpf_fib_lookup, to contain this MTU
> > value as output via a union with 'tot_len' as this is the value used for
> > the MTU lookup.
> > 
> > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> > ---
> >  include/uapi/linux/bpf.h       |   11 +++++++++--
> >  net/core/filter.c              |   17 ++++++++++++-----
> >  tools/include/uapi/linux/bpf.h |   11 +++++++++--
> >  3 files changed, 30 insertions(+), 9 deletions(-)
> 
> Reviewed-by: David Ahern <dsahern@kernel.org>

Thanks a lot for the review.  I didn't propagate-it-over in V5 of this
patch, as I changed the name of the output member from mtu to
mtu_result in V5.  Please review V5 and give your review consent.
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e6ceac3f7d62..03c042e3a34c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2219,6 +2219,9 @@  union bpf_attr {
  *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
  *		  packet is not forwarded or needs assist from full stack
  *
+ *		If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU
+ *		was exceeded and result params->mtu contains the MTU.
+ *
  * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
  *	Description
  *		Add an entry to, or update a sockhash *map* referencing sockets.
@@ -4872,9 +4875,13 @@  struct bpf_fib_lookup {
 	__be16	sport;
 	__be16	dport;
 
-	/* total length of packet from network header - used for MTU check */
-	__u16	tot_len;
+	union {	/* used for MTU check */
+		/* input to lookup */
+		__u16	tot_len; /* total length of packet from network hdr */
 
+		/* output: MTU value (if requested check_mtu) */
+		__u16	mtu;
+	};
 	/* input: L3 device index for lookup
 	 * output: device index from FIB lookup
 	 */
diff --git a/net/core/filter.c b/net/core/filter.c
index 1ee97fdeea64..caa427edc563 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5265,12 +5265,13 @@  static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
 #if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
 static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
 				  const struct neighbour *neigh,
-				  const struct net_device *dev)
+				  const struct net_device *dev, u32 mtu)
 {
 	memcpy(params->dmac, neigh->ha, ETH_ALEN);
 	memcpy(params->smac, dev->dev_addr, ETH_ALEN);
 	params->h_vlan_TCI = 0;
 	params->h_vlan_proto = 0;
+	params->mtu = mtu;
 
 	return 0;
 }
@@ -5354,8 +5355,10 @@  static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 	if (check_mtu) {
 		mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
-		if (params->tot_len > mtu)
+		if (params->tot_len > mtu) {
+			params->mtu = mtu; /* union with tot_len */
 			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
+		}
 	}
 
 	nhc = res.nhc;
@@ -5389,7 +5392,7 @@  static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	if (!neigh)
 		return BPF_FIB_LKUP_RET_NO_NEIGH;
 
-	return bpf_fib_set_fwd_params(params, neigh, dev);
+	return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
 }
 #endif
 
@@ -5481,8 +5484,10 @@  static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 	if (check_mtu) {
 		mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
-		if (params->tot_len > mtu)
+		if (params->tot_len > mtu) {
+			params->mtu = mtu; /* union with tot_len */
 			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
+		}
 	}
 
 	if (res.nh->fib_nh_lws)
@@ -5502,7 +5507,7 @@  static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	if (!neigh)
 		return BPF_FIB_LKUP_RET_NO_NEIGH;
 
-	return bpf_fib_set_fwd_params(params, neigh, dev);
+	return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
 }
 #endif
 
@@ -5571,6 +5576,8 @@  BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
 		dev = dev_get_by_index_rcu(net, params->ifindex);
 		if (!is_skb_forwardable(dev, skb))
 			rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
+
+		params->mtu = dev->mtu; /* union with tot_len */
 	}
 
 	return rc;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e6ceac3f7d62..03c042e3a34c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2219,6 +2219,9 @@  union bpf_attr {
  *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
  *		  packet is not forwarded or needs assist from full stack
  *
+ *		If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU
+ *		was exceeded and result params->mtu contains the MTU.
+ *
  * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
  *	Description
  *		Add an entry to, or update a sockhash *map* referencing sockets.
@@ -4872,9 +4875,13 @@  struct bpf_fib_lookup {
 	__be16	sport;
 	__be16	dport;
 
-	/* total length of packet from network header - used for MTU check */
-	__u16	tot_len;
+	union {	/* used for MTU check */
+		/* input to lookup */
+		__u16	tot_len; /* total length of packet from network hdr */
 
+		/* output: MTU value (if requested check_mtu) */
+		__u16	mtu;
+	};
 	/* input: L3 device index for lookup
 	 * output: device index from FIB lookup
 	 */