diff mbox series

netfilter: fix dangling pointer access of fake_rtable

Message ID 20190409065612.32652-1-rdong.ge@gmail.com
State Awaiting Upstream
Delegated to: David Miller
Headers show
Series netfilter: fix dangling pointer access of fake_rtable | expand

Commit Message

Rundong Ge April 9, 2019, 6:56 a.m. UTC
With  bridge-nf-call-iptables enabeled, Skbs go through the bridge
and enqueued between  <NF_BR_PRE_ROUTING,NF_BR_PRI_BRNF> and
<NF_BR_FORWARD,NF_BR_PRI_BRNF - 1> won't be flushed when bridge is
down. Then _skb_refdst of skbs in the nfqueue become dangling pointer.

Reproduce steps:
1.Create a bridge on the box.
2.echo 1 >/proc/sys/net/bridge/bridge-nf-call-iptables
3.Add a netfilter hook function to queue the packets to nfqueuenum 0.
  The hook point must between <NF_BR_PRE_ROUTING,NF_BR_PRI_BRNF> and
  <NF_BR_FORWARD,NF_BR_PRI_BRNF - 1>.
4.Add a userspace process "nfqueue_rcv" to continuously read and
  set_verdict "NF_ACCEPT" to packets from queue 0.
5.Continuosly ping client1 from client0
6.Send "Ctrl + Z" to pause the "nfqueue_rcv" to simulate the queue
  congestion.
7.Using "ifconfig br0 down&&brctl delbr br0" to delete the bridge.
8.At this time the _skb_refdst of skbs in the nfqueue become dangling
  pointer. If we send "fg" to resume the "nfqueue_rcv", the kernel
  may try to access the freed memory.

Debug log:
Here I add debug logs in "netdev_freemem" and "dst_release" to prove
the freed memory access. As the log shows, the "dst_release" accessed
bridge's fake_rtable after the bridge was freed.

Apr  8 22:25:14 raydon kernel: [62139.005062] netdev_freemem name:br0,
fake_rtable:000000009d76cef0

Apr  8 22:25:21 raydon kernel: [62145.967133] dst_release
dst:000000009d76cef0 dst->dev->name: řKU¡TH

Apr  8 22:25:21 raydon kernel: [62145.967154] dst_release
dst:000000009d76cef0 dst->dev->name: řKU¡TH

Apr  8 22:25:21 raydon kernel: [62145.967180] dst_release
dst:000000009d76cef0 dst->dev->name: řKU¡TH

Apr  8 22:25:21 raydon kernel: [62145.967197] dst_release
dst:000000009d76cef0 dst->dev->name: řKU¡TH

The reason why the hook point should be after <NF_BR_PRE_ROUTING,
NF_BR_PRI_BRNF> is skbs reference bridge's fake_rtable in
"br_nf_pre_routing_finish" hooked at <NF_BR_PRE_ROUTING,NF_BR_PRI_BRNF>.

And the reason why the hook point should be before <NF_BR_FORWARD,
NF_BR_PRI_BRNF - 1> is "br_nf_forward_ip" will set the state.out to
bridge dev. After this hook point, the "nfqnl_dev_drop" triggered by
the bridge's NETDEV_DOWN event can flush the queued skbs before
bridge's memory is freed, because the state.out now matches the
bridge's dev.

Signed-off-by: Rundong Ge <rdong.ge@gmail.com>
---
 net/netfilter/nfnetlink_queue.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

Comments

Rundong Ge April 18, 2019, 9:58 a.m. UTC | #1
friendly ping

Rundong Ge <rdong.ge@gmail.com> 于2019年4月9日周二 下午2:56写道:
>
> With  bridge-nf-call-iptables enabeled, Skbs go through the bridge
> and enqueued between  <NF_BR_PRE_ROUTING,NF_BR_PRI_BRNF> and
> <NF_BR_FORWARD,NF_BR_PRI_BRNF - 1> won't be flushed when bridge is
> down. Then _skb_refdst of skbs in the nfqueue become dangling pointer.
>
> Reproduce steps:
> 1.Create a bridge on the box.
> 2.echo 1 >/proc/sys/net/bridge/bridge-nf-call-iptables
> 3.Add a netfilter hook function to queue the packets to nfqueuenum 0.
>   The hook point must between <NF_BR_PRE_ROUTING,NF_BR_PRI_BRNF> and
>   <NF_BR_FORWARD,NF_BR_PRI_BRNF - 1>.
> 4.Add a userspace process "nfqueue_rcv" to continuously read and
>   set_verdict "NF_ACCEPT" to packets from queue 0.
> 5.Continuosly ping client1 from client0
> 6.Send "Ctrl + Z" to pause the "nfqueue_rcv" to simulate the queue
>   congestion.
> 7.Using "ifconfig br0 down&&brctl delbr br0" to delete the bridge.
> 8.At this time the _skb_refdst of skbs in the nfqueue become dangling
>   pointer. If we send "fg" to resume the "nfqueue_rcv", the kernel
>   may try to access the freed memory.
>
> Debug log:
> Here I add debug logs in "netdev_freemem" and "dst_release" to prove
> the freed memory access. As the log shows, the "dst_release" accessed
> bridge's fake_rtable after the bridge was freed.
>
> Apr  8 22:25:14 raydon kernel: [62139.005062] netdev_freemem name:br0,
> fake_rtable:000000009d76cef0
>
> Apr  8 22:25:21 raydon kernel: [62145.967133] dst_release
> dst:000000009d76cef0 dst->dev->name: řKU¡TH
>
> Apr  8 22:25:21 raydon kernel: [62145.967154] dst_release
> dst:000000009d76cef0 dst->dev->name: řKU¡TH
>
> Apr  8 22:25:21 raydon kernel: [62145.967180] dst_release
> dst:000000009d76cef0 dst->dev->name: řKU¡TH
>
> Apr  8 22:25:21 raydon kernel: [62145.967197] dst_release
> dst:000000009d76cef0 dst->dev->name: řKU¡TH
>
> The reason why the hook point should be after <NF_BR_PRE_ROUTING,
> NF_BR_PRI_BRNF> is skbs reference bridge's fake_rtable in
> "br_nf_pre_routing_finish" hooked at <NF_BR_PRE_ROUTING,NF_BR_PRI_BRNF>.
>
> And the reason why the hook point should be before <NF_BR_FORWARD,
> NF_BR_PRI_BRNF - 1> is "br_nf_forward_ip" will set the state.out to
> bridge dev. After this hook point, the "nfqnl_dev_drop" triggered by
> the bridge's NETDEV_DOWN event can flush the queued skbs before
> bridge's memory is freed, because the state.out now matches the
> bridge's dev.
>
> Signed-off-by: Rundong Ge <rdong.ge@gmail.com>
> ---
>  net/netfilter/nfnetlink_queue.c | 24 ++++++++++++++++++------
>  1 file changed, 18 insertions(+), 6 deletions(-)
>
> diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
> index 0dcc359..57eb02d 100644
> --- a/net/netfilter/nfnetlink_queue.c
> +++ b/net/netfilter/nfnetlink_queue.c
> @@ -905,13 +905,25 @@ static void free_entry(struct nf_queue_entry *entry)
>  dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
>  {
>  #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> -       int physinif, physoutif;
> +       struct net_device *physindev, *physoutdev;
> +       struct net_bridge_port *port;
>
> -       physinif = nf_bridge_get_physinif(entry->skb);
> -       physoutif = nf_bridge_get_physoutif(entry->skb);
> -
> -       if (physinif == ifindex || physoutif == ifindex)
> -               return 1;
> +       physindev = nf_bridge_get_physindev(entry->skb);
> +       physoutdev = nf_bridge_get_physoutdev(entry->skb);
> +       if (physindev) {
> +               if (physindev->ifindex == ifindex)
> +                       return 1;
> +               port = br_port_get_rcu(physindev);
> +               if (port && port->br->dev->ifindex == ifindex)
> +                       return 1;
> +       }
> +       if (physoutdev) {
> +               if (physoutdev->ifindex == ifindex)
> +                       return 1;
> +               port = br_port_get_rcu(physoutdev);
> +               if (port && port->br->dev->ifindex == ifindex)
> +                       return 1;
> +       }
>  #endif
>         if (entry->state.in)
>                 if (entry->state.in->ifindex == ifindex)
> --
> 1.8.3.1
>
Pablo Neira Ayuso April 22, 2019, 8:33 a.m. UTC | #2
On Tue, Apr 09, 2019 at 06:56:12AM +0000, Rundong Ge wrote:
[...]
> diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
> index 0dcc359..57eb02d 100644
> --- a/net/netfilter/nfnetlink_queue.c
> +++ b/net/netfilter/nfnetlink_queue.c
> @@ -905,13 +905,25 @@ static void free_entry(struct nf_queue_entry *entry)
>  dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
>  {
>  #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> -	int physinif, physoutif;
> +	struct net_device *physindev, *physoutdev;
> +	struct net_bridge_port *port;
>  
> -	physinif = nf_bridge_get_physinif(entry->skb);
> -	physoutif = nf_bridge_get_physoutif(entry->skb);
> -
> -	if (physinif == ifindex || physoutif == ifindex)
> -		return 1;
> +	physindev = nf_bridge_get_physindev(entry->skb);
> +	physoutdev = nf_bridge_get_physoutdev(entry->skb);
> +	if (physindev) {
> +		if (physindev->ifindex == ifindex)
> +			return 1;
> +		port = br_port_get_rcu(physindev);
> +		if (port && port->br->dev->ifindex == ifindex)
> +			return 1;
> +	}
> +	if (physoutdev) {
> +		if (physoutdev->ifindex == ifindex)
> +			return 1;
> +		port = br_port_get_rcu(physoutdev);
> +		if (port && port->br->dev->ifindex == ifindex)
> +			return 1;
> +	}

Either entry->state.in and entry->state.out point to the bridge
device, after this #endif.

>  #endif
>  	if (entry->state.in)
>  		if (entry->state.in->ifindex == ifindex)
> -- 
> 1.8.3.1
>
Rundong Ge April 22, 2019, 9:16 a.m. UTC | #3
br_nf_pre_routing will call the NF_INET_PRE_ROUTING hooks, at this
time both entry->state.in and entry->state.out are not bridge device.

NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
skb->dev, NULL,
br_nf_pre_routing_finish);

Pablo Neira Ayuso <pablo@netfilter.org> 于2019年4月22日周一 下午4:34写道:
>
> On Tue, Apr 09, 2019 at 06:56:12AM +0000, Rundong Ge wrote:
> [...]
> > diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
> > index 0dcc359..57eb02d 100644
> > --- a/net/netfilter/nfnetlink_queue.c
> > +++ b/net/netfilter/nfnetlink_queue.c
> > @@ -905,13 +905,25 @@ static void free_entry(struct nf_queue_entry *entry)
> >  dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
> >  {
> >  #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> > -     int physinif, physoutif;
> > +     struct net_device *physindev, *physoutdev;
> > +     struct net_bridge_port *port;
> >
> > -     physinif = nf_bridge_get_physinif(entry->skb);
> > -     physoutif = nf_bridge_get_physoutif(entry->skb);
> > -
> > -     if (physinif == ifindex || physoutif == ifindex)
> > -             return 1;
> > +     physindev = nf_bridge_get_physindev(entry->skb);
> > +     physoutdev = nf_bridge_get_physoutdev(entry->skb);
> > +     if (physindev) {
> > +             if (physindev->ifindex == ifindex)
> > +                     return 1;
> > +             port = br_port_get_rcu(physindev);
> > +             if (port && port->br->dev->ifindex == ifindex)
> > +                     return 1;
> > +     }
> > +     if (physoutdev) {
> > +             if (physoutdev->ifindex == ifindex)
> > +                     return 1;
> > +             port = br_port_get_rcu(physoutdev);
> > +             if (port && port->br->dev->ifindex == ifindex)
> > +                     return 1;
> > +     }
>
> Either entry->state.in and entry->state.out point to the bridge
> device, after this #endif.
>
> >  #endif
> >       if (entry->state.in)
> >               if (entry->state.in->ifindex == ifindex)
> > --
> > 1.8.3.1
> >
Florian Westphal April 22, 2019, 9:35 a.m. UTC | #4
Rundong Ge <rdong.ge@gmail.com> wrote:
> br_nf_pre_routing will call the NF_INET_PRE_ROUTING hooks, at this
> time both entry->state.in and entry->state.out are not bridge device.
> 
> NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
> skb->dev, NULL,
> br_nf_pre_routing_finish);

skb->dev is munged in setup_prerouting() to be bridge or vlan device on
top of bridge.

That being said, I think we need this fix at least:

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -197,8 +197,15 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
 		.size	= sizeof(*entry) + route_key_size,
 	};
 
+	if (skb_dst(skb)) {
+		skb_dst_force(skb);
+		if (!skb_dst(skb)) {
+			status = -EHOSTUNREACH;
+			goto err;
+		}
+	}
+
 	nf_queue_entry_get_refs(entry);
-	skb_dst_force(skb);
 
 	switch (entry->state.pf) {
 	case AF_INET:


Then, why not add, in dev_cmp:

	dst = skb_dst(skb);
	if (dst && dst->dev->index == index ...

?
Rundong Ge April 22, 2019, 9:51 a.m. UTC | #5
skb->dev is munged in setup_prerouting() to be bridge or vlan device on
top of bridge.
--Yes, but  br_nf_pre_routing_finish will set the skb->dev back to the phyindev.

Florian Westphal <fw@strlen.de> 于2019年4月22日周一 下午5:35写道:
>
> Rundong Ge <rdong.ge@gmail.com> wrote:
> > br_nf_pre_routing will call the NF_INET_PRE_ROUTING hooks, at this
> > time both entry->state.in and entry->state.out are not bridge device.
> >
> > NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
> > skb->dev, NULL,
> > br_nf_pre_routing_finish);
>
> skb->dev is munged in setup_prerouting() to be bridge or vlan device on
> top of bridge.
>
> That being said, I think we need this fix at least:
>
> diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
> --- a/net/netfilter/nf_queue.c
> +++ b/net/netfilter/nf_queue.c
> @@ -197,8 +197,15 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
>                 .size   = sizeof(*entry) + route_key_size,
>         };
>
> +       if (skb_dst(skb)) {
> +               skb_dst_force(skb);
> +               if (!skb_dst(skb)) {
> +                       status = -EHOSTUNREACH;
> +                       goto err;
> +               }
> +       }
> +
>         nf_queue_entry_get_refs(entry);
> -       skb_dst_force(skb);
>
>         switch (entry->state.pf) {
>         case AF_INET:
>
>
> Then, why not add, in dev_cmp:
>
>         dst = skb_dst(skb);
>         if (dst && dst->dev->index == index ...
>
> ?
Rundong Ge April 22, 2019, 10:10 a.m. UTC | #6
The hook in my testcase is at NF_BR_FORWARD, and priority is -2.
And at this hook point both the entry->out  and entry->in  are not
bridge device.
But the dst was set to the bridge's fake_rtable.

Rundong Ge <rdong.ge@gmail.com> 于2019年4月22日周一 下午5:51写道:
>
> skb->dev is munged in setup_prerouting() to be bridge or vlan device on
> top of bridge.
> --Yes, but  br_nf_pre_routing_finish will set the skb->dev back to the phyindev.
>
> Florian Westphal <fw@strlen.de> 于2019年4月22日周一 下午5:35写道:
> >
> > Rundong Ge <rdong.ge@gmail.com> wrote:
> > > br_nf_pre_routing will call the NF_INET_PRE_ROUTING hooks, at this
> > > time both entry->state.in and entry->state.out are not bridge device.
> > >
> > > NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
> > > skb->dev, NULL,
> > > br_nf_pre_routing_finish);
> >
> > skb->dev is munged in setup_prerouting() to be bridge or vlan device on
> > top of bridge.
> >
> > That being said, I think we need this fix at least:
> >
> > diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
> > --- a/net/netfilter/nf_queue.c
> > +++ b/net/netfilter/nf_queue.c
> > @@ -197,8 +197,15 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
> >                 .size   = sizeof(*entry) + route_key_size,
> >         };
> >
> > +       if (skb_dst(skb)) {
> > +               skb_dst_force(skb);
> > +               if (!skb_dst(skb)) {
> > +                       status = -EHOSTUNREACH;
> > +                       goto err;
> > +               }
> > +       }
> > +
> >         nf_queue_entry_get_refs(entry);
> > -       skb_dst_force(skb);
> >
> >         switch (entry->state.pf) {
> >         case AF_INET:
> >
> >
> > Then, why not add, in dev_cmp:
> >
> >         dst = skb_dst(skb);
> >         if (dst && dst->dev->index == index ...
> >
> > ?
diff mbox series

Patch

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 0dcc359..57eb02d 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -905,13 +905,25 @@  static void free_entry(struct nf_queue_entry *entry)
 dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
 {
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	int physinif, physoutif;
+	struct net_device *physindev, *physoutdev;
+	struct net_bridge_port *port;
 
-	physinif = nf_bridge_get_physinif(entry->skb);
-	physoutif = nf_bridge_get_physoutif(entry->skb);
-
-	if (physinif == ifindex || physoutif == ifindex)
-		return 1;
+	physindev = nf_bridge_get_physindev(entry->skb);
+	physoutdev = nf_bridge_get_physoutdev(entry->skb);
+	if (physindev) {
+		if (physindev->ifindex == ifindex)
+			return 1;
+		port = br_port_get_rcu(physindev);
+		if (port && port->br->dev->ifindex == ifindex)
+			return 1;
+	}
+	if (physoutdev) {
+		if (physoutdev->ifindex == ifindex)
+			return 1;
+		port = br_port_get_rcu(physoutdev);
+		if (port && port->br->dev->ifindex == ifindex)
+			return 1;
+	}
 #endif
 	if (entry->state.in)
 		if (entry->state.in->ifindex == ifindex)