diff mbox series

[bpf-next,v6,1/6] net: move netfilter nf_reject_fill_skb_dst to core ipv4

Message ID 20260518122842.218522-2-mahe.tardy@gmail.com
State Handled Elsewhere, archived
Headers show
Series bpf: add icmp_send kfunc | expand

Commit Message

Mahe Tardy May 18, 2026, 12:28 p.m. UTC
Move and rename nf_reject_fill_skb_dst from
ipv4/netfilter/nf_reject_ipv4 to ip_route_reply_fill_dst in ipv4/route.c
so that it can be reused in the following patches by BPF kfuncs.

Netfilter uses nf_ip_route that is almost a transparent wrapper around
ip_route_output_key so this patch inlines it.

Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
---
 include/net/route.h                 |  1 +
 net/ipv4/netfilter/nf_reject_ipv4.c | 19 ++-----------------
 net/ipv4/route.c                    | 15 +++++++++++++++
 3 files changed, 18 insertions(+), 17 deletions(-)

--
2.34.1

Comments

bot+bpf-ci@kernel.org May 18, 2026, 1:07 p.m. UTC | #1
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index bc1296f0ea69..1f031c5ef554 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -2945,6 +2945,21 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
>  }
>  EXPORT_SYMBOL_GPL(ip_route_output_flow);
>
> +int ip_route_reply_fill_dst(struct sk_buff *skb)
> +{
> +	struct rtable *rt;
> +	struct flowi4 fl4 = {
> +		.daddr = ip_hdr(skb)->saddr
> +	};

Can this lead to an out-of-bounds read?

Now that this is exported for BPF kfuncs, it can be reached from early
hooks like TC ingress where the IPv4 header might not be linearized yet.

If a malformed packet contains less than sizeof(struct iphdr) bytes of
linear data, does accessing ->saddr require validation via pskb_may_pull()
or similar?

This concern was raised by sashiko-bot@kernel.org in v4 review:
https://lore.kernel.org/bpf/20260421111320.D97D9C2BCB0@smtp.kernel.org/

> +
> +	rt = ip_route_output_key(dev_net(skb->dev), &fl4);

Can skb->dev be NULL here?

While safe in its original netfilter context, a BPF_PROG_TYPE_NETFILTER
program attached to the NF_INET_LOCAL_OUT hook will receive locally
generated SKBs before skb->dev is assigned (e.g., in __ip_local_out before
ip_output).

Does dev_net(skb->dev) need a check to prevent a kernel panic when called
from such contexts?

This concern was raised by sashiko-bot@kernel.org in v4 review:
https://lore.kernel.org/bpf/20260421111320.D97D9C2BCB0@smtp.kernel.org/

> +	if (IS_ERR(rt))
> +		return PTR_ERR(rt);
> +	skb_dst_set(skb, &rt->dst);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(ip_route_reply_fill_dst);


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26034287312
Mahe Tardy May 18, 2026, 2:21 p.m. UTC | #2
On Mon, May 18, 2026 at 01:07:54PM +0000, bot+bpf-ci@kernel.org wrote:
> > diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> > index bc1296f0ea69..1f031c5ef554 100644
> > --- a/net/ipv4/route.c
> > +++ b/net/ipv4/route.c
> > @@ -2945,6 +2945,21 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
> >  }
> >  EXPORT_SYMBOL_GPL(ip_route_output_flow);
> >
> > +int ip_route_reply_fill_dst(struct sk_buff *skb)
> > +{
> > +	struct rtable *rt;
> > +	struct flowi4 fl4 = {
> > +		.daddr = ip_hdr(skb)->saddr
> > +	};
> 
> Can this lead to an out-of-bounds read?
> 
> Now that this is exported for BPF kfuncs, it can be reached from early
> hooks like TC ingress where the IPv4 header might not be linearized yet.
> 
> If a malformed packet contains less than sizeof(struct iphdr) bytes of
> linear data, does accessing ->saddr require validation via pskb_may_pull()
> or similar?
> 
> This concern was raised by sashiko-bot@kernel.org in v4 review:
> https://lore.kernel.org/bpf/20260421111320.D97D9C2BCB0@smtp.kernel.org/

Yes but it should be fine since it's the caller responsability to make
sure the header has been linearized, and we are calling
pskb_network_may_pull.

> > +
> > +	rt = ip_route_output_key(dev_net(skb->dev), &fl4);
> 
> Can skb->dev be NULL here?
> 
> While safe in its original netfilter context, a BPF_PROG_TYPE_NETFILTER
> program attached to the NF_INET_LOCAL_OUT hook will receive locally
> generated SKBs before skb->dev is assigned (e.g., in __ip_local_out before
> ip_output).
> 
> Does dev_net(skb->dev) need a check to prevent a kernel panic when called
> from such contexts?
> 
> This concern was raised by sashiko-bot@kernel.org in v4 review:
> https://lore.kernel.org/bpf/20260421111320.D97D9C2BCB0@smtp.kernel.org/

We can't attach such program as now it's only cgroup_skb and tc,
outdated.

> > +	if (IS_ERR(rt))
> > +		return PTR_ERR(rt);
> > +	skb_dst_set(skb, &rt->dst);
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(ip_route_reply_fill_dst);
> 
> 
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
> 
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26034287312
diff mbox series

Patch

diff --git a/include/net/route.h b/include/net/route.h
index f90106f383c5..300d292cd9a1 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -173,6 +173,7 @@  struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
 				    const struct sock *sk);
 struct dst_entry *ipv4_blackhole_route(struct net *net,
 				       struct dst_entry *dst_orig);
+int ip_route_reply_fill_dst(struct sk_buff *skb);

 static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4 *flp)
 {
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index fecf6621f679..c1c0724e4d4d 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -252,21 +252,6 @@  static void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *
 	nskb->csum_offset = offsetof(struct tcphdr, check);
 }

-static int nf_reject_fill_skb_dst(struct sk_buff *skb_in)
-{
-	struct dst_entry *dst = NULL;
-	struct flowi fl;
-
-	memset(&fl, 0, sizeof(struct flowi));
-	fl.u.ip4.daddr = ip_hdr(skb_in)->saddr;
-	nf_ip_route(dev_net(skb_in->dev), &dst, &fl, false);
-	if (!dst)
-		return -1;
-
-	skb_dst_set(skb_in, dst);
-	return 0;
-}
-
 /* Send RST reply */
 void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
 		   int hook)
@@ -279,7 +264,7 @@  void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
 	if (!oth)
 		return;

-	if (!skb_dst(oldskb) && nf_reject_fill_skb_dst(oldskb) < 0)
+	if (!skb_dst(oldskb) && ip_route_reply_fill_dst(oldskb) < 0)
 		return;

 	if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -352,7 +337,7 @@  void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
 	if (iph->frag_off & htons(IP_OFFSET))
 		return;

-	if (!skb_dst(skb_in) && nf_reject_fill_skb_dst(skb_in) < 0)
+	if (!skb_dst(skb_in) && ip_route_reply_fill_dst(skb_in) < 0)
 		return;

 	if (skb_csum_unnecessary(skb_in) ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bc1296f0ea69..1f031c5ef554 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2945,6 +2945,21 @@  struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
 }
 EXPORT_SYMBOL_GPL(ip_route_output_flow);

+int ip_route_reply_fill_dst(struct sk_buff *skb)
+{
+	struct rtable *rt;
+	struct flowi4 fl4 = {
+		.daddr = ip_hdr(skb)->saddr
+	};
+
+	rt = ip_route_output_key(dev_net(skb->dev), &fl4);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+	skb_dst_set(skb, &rt->dst);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_route_reply_fill_dst);
+
 /* called with rcu_read_lock held */
 static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
 			struct rtable *rt, u32 table_id, dscp_t dscp,