diff mbox

[1/2] ipv6: avoid blackhole and prohibited entries upon prefix purge [v2]

Message ID 2A507F9D-3D53-475F-8FA9-9E6CFEE9C97A@ipflavors.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Romain KUNTZ Jan. 5, 2013, 9:44 p.m. UTC
Mobile IPv6 provokes a kernel Oops since commit 64c6d08e (ipv6:
del unreachable route when an addr is deleted on lo), because
ip6_route_lookup() may also return blackhole and prohibited
entry. However, these entries have a NULL rt6i_table argument,
which provokes an Oops in __ip6_del_rt() when trying to lock
rt6i_table->tb6_lock.

Beside, when purging a prefix, blakhole and prohibited entries
should not be selected because they are not what we are looking
for.

We fix this by adding two new lookup flags (RT6_LOOKUP_F_NO_BLK_HOLE
and RT6_LOOKUP_F_NO_PROHIBIT) in order to ensure that such entries
are skipped during lookup and that the correct entry is returned.

[v2]: use 'goto out;' instead of 'goto again;' to avoid unnecessary
oprations on rt (as suggested by Eric Dumazet).

Signed-off-by: Romain Kuntz <r.kuntz@ipflavors.com>
---
 include/net/ip6_route.h |    2 ++
 net/ipv6/addrconf.c     |    4 +++-
 net/ipv6/fib6_rules.c   |    4 ++++
 3 files changed, 9 insertions(+), 1 deletions(-)

Comments

Nicolas Dichtel Jan. 7, 2013, 10:25 a.m. UTC | #1
Le 05/01/2013 22:44, Romain KUNTZ a écrit :
> Mobile IPv6 provokes a kernel Oops since commit 64c6d08e (ipv6:
> del unreachable route when an addr is deleted on lo), because
> ip6_route_lookup() may also return blackhole and prohibited
> entry. However, these entries have a NULL rt6i_table argument,
> which provokes an Oops in __ip6_del_rt() when trying to lock
> rt6i_table->tb6_lock.
>
> Beside, when purging a prefix, blakhole and prohibited entries
> should not be selected because they are not what we are looking
> for.
>
> We fix this by adding two new lookup flags (RT6_LOOKUP_F_NO_BLK_HOLE
> and RT6_LOOKUP_F_NO_PROHIBIT) in order to ensure that such entries
> are skipped during lookup and that the correct entry is returned.
>
> [v2]: use 'goto out;' instead of 'goto again;' to avoid unnecessary
> oprations on rt (as suggested by Eric Dumazet).
>
> Signed-off-by: Romain Kuntz <r.kuntz@ipflavors.com>
> ---
>   include/net/ip6_route.h |    2 ++
>   net/ipv6/addrconf.c     |    4 +++-
>   net/ipv6/fib6_rules.c   |    4 ++++
>   3 files changed, 9 insertions(+), 1 deletions(-)
>
> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
> index 27d8318..3c93743 100644
> --- a/include/net/ip6_route.h
> +++ b/include/net/ip6_route.h
> @@ -30,6 +30,8 @@ struct route_info {
>   #define RT6_LOOKUP_F_SRCPREF_TMP	0x00000008
>   #define RT6_LOOKUP_F_SRCPREF_PUBLIC	0x00000010
>   #define RT6_LOOKUP_F_SRCPREF_COA	0x00000020
> +#define RT6_LOOKUP_F_NO_BLK_HOLE	0x00000040
> +#define RT6_LOOKUP_F_NO_PROHIBIT	0x00000080
>
>   /*
>    * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index 408cac4a..1891e23 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -948,7 +948,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
>   		fl6.flowi6_oif = ifp->idev->dev->ifindex;
>   		fl6.daddr = prefix;
>   		rt = (struct rt6_info *)ip6_route_lookup(net, &fl6,
> -							 RT6_LOOKUP_F_IFACE);
> +						RT6_LOOKUP_F_IFACE |
> +						RT6_LOOKUP_F_NO_BLK_HOLE |
> +						RT6_LOOKUP_F_NO_PROHIBIT);
>
>   		if (rt != net->ipv6.ip6_null_entry &&
Is it not simpler to test the result here (net->ipv6.ip6_blk_hole_entry and
net->ipv6.ip6_prohibit_entry) like for the null_entry?
It will also avoid adding more flags.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Romain KUNTZ Jan. 7, 2013, 11:30 a.m. UTC | #2
Hello Nicolas,

On Jan 7, 2013, at 11:25 , Nicolas Dichtel <nicolas.dichtel@6wind.com> wrote:

> Le 05/01/2013 22:44, Romain KUNTZ a écrit :
>> Mobile IPv6 provokes a kernel Oops since commit 64c6d08e (ipv6:
>> del unreachable route when an addr is deleted on lo), because
>> ip6_route_lookup() may also return blackhole and prohibited
>> entry. However, these entries have a NULL rt6i_table argument,
>> which provokes an Oops in __ip6_del_rt() when trying to lock
>> rt6i_table->tb6_lock.
>> 
>> Beside, when purging a prefix, blakhole and prohibited entries
>> should not be selected because they are not what we are looking
>> for.
>> 
>> We fix this by adding two new lookup flags (RT6_LOOKUP_F_NO_BLK_HOLE
>> and RT6_LOOKUP_F_NO_PROHIBIT) in order to ensure that such entries
>> are skipped during lookup and that the correct entry is returned.
>> 
>> [v2]: use 'goto out;' instead of 'goto again;' to avoid unnecessary
>> oprations on rt (as suggested by Eric Dumazet).
>> 
>> Signed-off-by: Romain Kuntz <r.kuntz@ipflavors.com>
>> ---
>>  include/net/ip6_route.h |    2 ++
>>  net/ipv6/addrconf.c     |    4 +++-
>>  net/ipv6/fib6_rules.c   |    4 ++++
>>  3 files changed, 9 insertions(+), 1 deletions(-)
>> 
>> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
>> index 27d8318..3c93743 100644
>> --- a/include/net/ip6_route.h
>> +++ b/include/net/ip6_route.h
>> @@ -30,6 +30,8 @@ struct route_info {
>>  #define RT6_LOOKUP_F_SRCPREF_TMP	0x00000008
>>  #define RT6_LOOKUP_F_SRCPREF_PUBLIC	0x00000010
>>  #define RT6_LOOKUP_F_SRCPREF_COA	0x00000020
>> +#define RT6_LOOKUP_F_NO_BLK_HOLE	0x00000040
>> +#define RT6_LOOKUP_F_NO_PROHIBIT	0x00000080
>> 
>>  /*
>>   * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
>> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
>> index 408cac4a..1891e23 100644
>> --- a/net/ipv6/addrconf.c
>> +++ b/net/ipv6/addrconf.c
>> @@ -948,7 +948,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
>>  		fl6.flowi6_oif = ifp->idev->dev->ifindex;
>>  		fl6.daddr = prefix;
>>  		rt = (struct rt6_info *)ip6_route_lookup(net, &fl6,
>> -							 RT6_LOOKUP_F_IFACE);
>> +						RT6_LOOKUP_F_IFACE |
>> +						RT6_LOOKUP_F_NO_BLK_HOLE |
>> +						RT6_LOOKUP_F_NO_PROHIBIT);
>> 
>>  		if (rt != net->ipv6.ip6_null_entry &&
> Is it not simpler to test the result here (net->ipv6.ip6_blk_hole_entry and
> net->ipv6.ip6_prohibit_entry) like for the null_entry?
> It will also avoid adding more flags.

Your proposal would only solve part of the problem (the Oops in __ip6_del_rt()). Another problem here is that blackhole and prohibited rules should not be selected when trying to purge a prefix (correct me if I'm wrong) because they are not what we are looking for. This can prevent the targeted prefix from being purged. 

Thank you,
Romain


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas Dichtel Jan. 7, 2013, 3:43 p.m. UTC | #3
Le 07/01/2013 12:30, Romain KUNTZ a écrit :
> Hello Nicolas,
>
> On Jan 7, 2013, at 11:25 , Nicolas Dichtel <nicolas.dichtel@6wind.com> wrote:
>
>> Le 05/01/2013 22:44, Romain KUNTZ a écrit :
>>> Mobile IPv6 provokes a kernel Oops since commit 64c6d08e (ipv6:
>>> del unreachable route when an addr is deleted on lo), because
>>> ip6_route_lookup() may also return blackhole and prohibited
>>> entry. However, these entries have a NULL rt6i_table argument,
>>> which provokes an Oops in __ip6_del_rt() when trying to lock
>>> rt6i_table->tb6_lock.
>>>
>>> Beside, when purging a prefix, blakhole and prohibited entries
>>> should not be selected because they are not what we are looking
>>> for.
>>>
>>> We fix this by adding two new lookup flags (RT6_LOOKUP_F_NO_BLK_HOLE
>>> and RT6_LOOKUP_F_NO_PROHIBIT) in order to ensure that such entries
>>> are skipped during lookup and that the correct entry is returned.
>>>
>>> [v2]: use 'goto out;' instead of 'goto again;' to avoid unnecessary
>>> oprations on rt (as suggested by Eric Dumazet).
>>>
>>> Signed-off-by: Romain Kuntz <r.kuntz@ipflavors.com>
>>> ---
>>>   include/net/ip6_route.h |    2 ++
>>>   net/ipv6/addrconf.c     |    4 +++-
>>>   net/ipv6/fib6_rules.c   |    4 ++++
>>>   3 files changed, 9 insertions(+), 1 deletions(-)
>>>
>>> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
>>> index 27d8318..3c93743 100644
>>> --- a/include/net/ip6_route.h
>>> +++ b/include/net/ip6_route.h
>>> @@ -30,6 +30,8 @@ struct route_info {
>>>   #define RT6_LOOKUP_F_SRCPREF_TMP	0x00000008
>>>   #define RT6_LOOKUP_F_SRCPREF_PUBLIC	0x00000010
>>>   #define RT6_LOOKUP_F_SRCPREF_COA	0x00000020
>>> +#define RT6_LOOKUP_F_NO_BLK_HOLE	0x00000040
>>> +#define RT6_LOOKUP_F_NO_PROHIBIT	0x00000080
>>>
>>>   /*
>>>    * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
>>> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
>>> index 408cac4a..1891e23 100644
>>> --- a/net/ipv6/addrconf.c
>>> +++ b/net/ipv6/addrconf.c
>>> @@ -948,7 +948,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
>>>   		fl6.flowi6_oif = ifp->idev->dev->ifindex;
>>>   		fl6.daddr = prefix;
>>>   		rt = (struct rt6_info *)ip6_route_lookup(net, &fl6,
>>> -							 RT6_LOOKUP_F_IFACE);
>>> +						RT6_LOOKUP_F_IFACE |
>>> +						RT6_LOOKUP_F_NO_BLK_HOLE |
>>> +						RT6_LOOKUP_F_NO_PROHIBIT);
>>>
>>>   		if (rt != net->ipv6.ip6_null_entry &&
>> Is it not simpler to test the result here (net->ipv6.ip6_blk_hole_entry and
>> net->ipv6.ip6_prohibit_entry) like for the null_entry?
>> It will also avoid adding more flags.
>
> Your proposal would only solve part of the problem (the Oops in __ip6_del_rt()). Another problem here is that blackhole and prohibited rules should not be selected when trying to purge a prefix (correct me if I'm wrong) because they are not what we are looking for. This can prevent the targeted prefix from being purged.
In fact, I'm not sure to get the scenario. This part of the code just tries
to remove the connected prefix, added by the kernel when the address was added.
Can you describe your scenario?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Romain KUNTZ Jan. 8, 2013, 11:38 a.m. UTC | #4
On Jan 7, 2013, at 16:43 , Nicolas Dichtel <nicolas.dichtel@6wind.com> wrote:
> Le 07/01/2013 12:30, Romain KUNTZ a écrit :
>> Hello Nicolas,
>> 
>> On Jan 7, 2013, at 11:25 , Nicolas Dichtel <nicolas.dichtel@6wind.com> wrote:
>> 
>>> Le 05/01/2013 22:44, Romain KUNTZ a écrit :
>>>> Mobile IPv6 provokes a kernel Oops since commit 64c6d08e (ipv6:
>>>> del unreachable route when an addr is deleted on lo), because
>>>> ip6_route_lookup() may also return blackhole and prohibited
>>>> entry. However, these entries have a NULL rt6i_table argument,
>>>> which provokes an Oops in __ip6_del_rt() when trying to lock
>>>> rt6i_table->tb6_lock.
>>>> 
>>>> Beside, when purging a prefix, blakhole and prohibited entries
>>>> should not be selected because they are not what we are looking
>>>> for.
>>>> 
>>>> We fix this by adding two new lookup flags (RT6_LOOKUP_F_NO_BLK_HOLE
>>>> and RT6_LOOKUP_F_NO_PROHIBIT) in order to ensure that such entries
>>>> are skipped during lookup and that the correct entry is returned.
>>>> 
>>>> [v2]: use 'goto out;' instead of 'goto again;' to avoid unnecessary
>>>> oprations on rt (as suggested by Eric Dumazet).
>>>> 
>>>> Signed-off-by: Romain Kuntz <r.kuntz@ipflavors.com>
>>>> ---
>>>>  include/net/ip6_route.h |    2 ++
>>>>  net/ipv6/addrconf.c     |    4 +++-
>>>>  net/ipv6/fib6_rules.c   |    4 ++++
>>>>  3 files changed, 9 insertions(+), 1 deletions(-)
>>>> 
>>>> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
>>>> index 27d8318..3c93743 100644
>>>> --- a/include/net/ip6_route.h
>>>> +++ b/include/net/ip6_route.h
>>>> @@ -30,6 +30,8 @@ struct route_info {
>>>>  #define RT6_LOOKUP_F_SRCPREF_TMP	0x00000008
>>>>  #define RT6_LOOKUP_F_SRCPREF_PUBLIC	0x00000010
>>>>  #define RT6_LOOKUP_F_SRCPREF_COA	0x00000020
>>>> +#define RT6_LOOKUP_F_NO_BLK_HOLE	0x00000040
>>>> +#define RT6_LOOKUP_F_NO_PROHIBIT	0x00000080
>>>> 
>>>>  /*
>>>>   * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
>>>> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
>>>> index 408cac4a..1891e23 100644
>>>> --- a/net/ipv6/addrconf.c
>>>> +++ b/net/ipv6/addrconf.c
>>>> @@ -948,7 +948,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
>>>>  		fl6.flowi6_oif = ifp->idev->dev->ifindex;
>>>>  		fl6.daddr = prefix;
>>>>  		rt = (struct rt6_info *)ip6_route_lookup(net, &fl6,
>>>> -							 RT6_LOOKUP_F_IFACE);
>>>> +						RT6_LOOKUP_F_IFACE |
>>>> +						RT6_LOOKUP_F_NO_BLK_HOLE |
>>>> +						RT6_LOOKUP_F_NO_PROHIBIT);
>>>> 
>>>>  		if (rt != net->ipv6.ip6_null_entry &&
>>> Is it not simpler to test the result here (net->ipv6.ip6_blk_hole_entry and
>>> net->ipv6.ip6_prohibit_entry) like for the null_entry?
>>> It will also avoid adding more flags.
>> 
>> Your proposal would only solve part of the problem (the Oops in __ip6_del_rt()). Another problem here is that blackhole and prohibited rules should not be selected when trying to purge a prefix (correct me if I'm wrong) because they are not what we are looking for. This can prevent the targeted prefix from being purged.
> In fact, I'm not sure to get the scenario. This part of the code just tries
> to remove the connected prefix, added by the kernel when the address was added.
> Can you describe your scenario?


I should have given more details from the beginning, my mistake. The scenario where this happens is quite simple:

- install a blackhole rule (e.g. "from 2001:db8::1000 blackhole" - the source address does not matter at all) with the FIB_RULE_FIND_SADDR flag set (setting this flag is not possible with iproute2, but for test purpose you can use the enclosed patch against the latest iproute2 tree and then use "./ip -6 rule add from 2001:db8::1000/128 blackhole prio 1000").

- try to delete an address from one of your interface (any address, it can be different from the one you used for the blackhole rule): "ip -6 addr del <v6-addr>/64 dev eth<x>"

and you get an Oops. When trying to remove the connected prefix, the fib6_rule_match() function will match the blackhole rule because RT6_LOOKUP_F_HAS_SADDR is not set and FIB_RULE_FIND_SADDR is set.

With your proposal, the Oops is fixed but the connected prefix route is not deleted. With my initial patch, the Oops is fixed and the connected prefix route is also deleted.

Thanks,
Romain
Nicolas Dichtel Jan. 8, 2013, 4:22 p.m. UTC | #5
Le 08/01/2013 12:38, Romain KUNTZ a écrit :
> On Jan 7, 2013, at 16:43 , Nicolas Dichtel <nicolas.dichtel@6wind.com> wrote:
>> Le 07/01/2013 12:30, Romain KUNTZ a écrit :
>>> Hello Nicolas,
>>>
>>> On Jan 7, 2013, at 11:25 , Nicolas Dichtel <nicolas.dichtel@6wind.com> wrote:
>>>
>>>> Le 05/01/2013 22:44, Romain KUNTZ a écrit :
>>>>> Mobile IPv6 provokes a kernel Oops since commit 64c6d08e (ipv6:
>>>>> del unreachable route when an addr is deleted on lo), because
>>>>> ip6_route_lookup() may also return blackhole and prohibited
>>>>> entry. However, these entries have a NULL rt6i_table argument,
>>>>> which provokes an Oops in __ip6_del_rt() when trying to lock
>>>>> rt6i_table->tb6_lock.
>>>>>
>>>>> Beside, when purging a prefix, blakhole and prohibited entries
>>>>> should not be selected because they are not what we are looking
>>>>> for.
>>>>>
>>>>> We fix this by adding two new lookup flags (RT6_LOOKUP_F_NO_BLK_HOLE
>>>>> and RT6_LOOKUP_F_NO_PROHIBIT) in order to ensure that such entries
>>>>> are skipped during lookup and that the correct entry is returned.
>>>>>
>>>>> [v2]: use 'goto out;' instead of 'goto again;' to avoid unnecessary
>>>>> oprations on rt (as suggested by Eric Dumazet).
>>>>>
>>>>> Signed-off-by: Romain Kuntz <r.kuntz@ipflavors.com>
>>>>> ---
>>>>>   include/net/ip6_route.h |    2 ++
>>>>>   net/ipv6/addrconf.c     |    4 +++-
>>>>>   net/ipv6/fib6_rules.c   |    4 ++++
>>>>>   3 files changed, 9 insertions(+), 1 deletions(-)
>>>>>
>>>>> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
>>>>> index 27d8318..3c93743 100644
>>>>> --- a/include/net/ip6_route.h
>>>>> +++ b/include/net/ip6_route.h
>>>>> @@ -30,6 +30,8 @@ struct route_info {
>>>>>   #define RT6_LOOKUP_F_SRCPREF_TMP	0x00000008
>>>>>   #define RT6_LOOKUP_F_SRCPREF_PUBLIC	0x00000010
>>>>>   #define RT6_LOOKUP_F_SRCPREF_COA	0x00000020
>>>>> +#define RT6_LOOKUP_F_NO_BLK_HOLE	0x00000040
>>>>> +#define RT6_LOOKUP_F_NO_PROHIBIT	0x00000080
>>>>>
>>>>>   /*
>>>>>    * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
>>>>> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
>>>>> index 408cac4a..1891e23 100644
>>>>> --- a/net/ipv6/addrconf.c
>>>>> +++ b/net/ipv6/addrconf.c
>>>>> @@ -948,7 +948,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
>>>>>   		fl6.flowi6_oif = ifp->idev->dev->ifindex;
>>>>>   		fl6.daddr = prefix;
>>>>>   		rt = (struct rt6_info *)ip6_route_lookup(net, &fl6,
>>>>> -							 RT6_LOOKUP_F_IFACE);
>>>>> +						RT6_LOOKUP_F_IFACE |
>>>>> +						RT6_LOOKUP_F_NO_BLK_HOLE |
>>>>> +						RT6_LOOKUP_F_NO_PROHIBIT);
>>>>>
>>>>>   		if (rt != net->ipv6.ip6_null_entry &&
>>>> Is it not simpler to test the result here (net->ipv6.ip6_blk_hole_entry and
>>>> net->ipv6.ip6_prohibit_entry) like for the null_entry?
>>>> It will also avoid adding more flags.
>>>
>>> Your proposal would only solve part of the problem (the Oops in __ip6_del_rt()). Another problem here is that blackhole and prohibited rules should not be selected when trying to purge a prefix (correct me if I'm wrong) because they are not what we are looking for. This can prevent the targeted prefix from being purged.
>> In fact, I'm not sure to get the scenario. This part of the code just tries
>> to remove the connected prefix, added by the kernel when the address was added.
>> Can you describe your scenario?
>
>
> I should have given more details from the beginning, my mistake. The scenario where this happens is quite simple:
>
> - install a blackhole rule (e.g. "from 2001:db8::1000 blackhole" - the source address does not matter at all) with the FIB_RULE_FIND_SADDR flag set (setting this flag is not possible with iproute2, but for test purpose you can use the enclosed patch against the latest iproute2 tree and then use "./ip -6 rule add from 2001:db8::1000/128 blackhole prio 1000").
>
> - try to delete an address from one of your interface (any address, it can be different from the one you used for the blackhole rule): "ip -6 addr del <v6-addr>/64 dev eth<x>"
>
> and you get an Oops. When trying to remove the connected prefix, the fib6_rule_match() function will match the blackhole rule because RT6_LOOKUP_F_HAS_SADDR is not set and FIB_RULE_FIND_SADDR is set.
>
> With your proposal, the Oops is fixed but the connected prefix route is not deleted. With my initial patch, the Oops is fixed and the connected prefix route is also deleted.
Ok, I get it. I thin,there is two bugs: the oops and the wrong lookup.

Your proposal fix only a particular case. Try this (with your ip route2 patch):
ip -6 addr add 2002::1/64 dev eth0
ip -6 route add 2002::/64 table 257 dev eth0
ip -6 addr del 2002::1/64 dev eth0

The route deleted is not the connected prefix, but the route added in table 257.
The connected prefix is still here in the main table. It's not what we want.
Maybe the lookup should be done directly into the right table, ie table 
RT6_TABLE_PREFIX. What do you think?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
YOSHIFUJI Hideaki / 吉藤英明 Jan. 8, 2013, 5:18 p.m. UTC | #6
Nicolas Dichtel wrote:
> Le 08/01/2013 12:38, Romain KUNTZ a écrit :
>> On Jan 7, 2013, at 16:43 , Nicolas Dichtel <nicolas.dichtel@6wind.com> wrote:
>>> Le 07/01/2013 12:30, Romain KUNTZ a écrit :
>>>> Hello Nicolas,
>>>>
>>>> On Jan 7, 2013, at 11:25 , Nicolas Dichtel <nicolas.dichtel@6wind.com> wrote:
>>>>
>>>>> Le 05/01/2013 22:44, Romain KUNTZ a écrit :
>>>>>> Mobile IPv6 provokes a kernel Oops since commit 64c6d08e (ipv6:
>>>>>> del unreachable route when an addr is deleted on lo), because
>>>>>> ip6_route_lookup() may also return blackhole and prohibited
>>>>>> entry. However, these entries have a NULL rt6i_table argument,
>>>>>> which provokes an Oops in __ip6_del_rt() when trying to lock
>>>>>> rt6i_table->tb6_lock.
>>>>>>
>>>>>> Beside, when purging a prefix, blakhole and prohibited entries
>>>>>> should not be selected because they are not what we are looking
>>>>>> for.
>>>>>>
>>>>>> We fix this by adding two new lookup flags (RT6_LOOKUP_F_NO_BLK_HOLE
>>>>>> and RT6_LOOKUP_F_NO_PROHIBIT) in order to ensure that such entries
>>>>>> are skipped during lookup and that the correct entry is returned.
>>>>>>
>>>>>> [v2]: use 'goto out;' instead of 'goto again;' to avoid unnecessary
>>>>>> oprations on rt (as suggested by Eric Dumazet).
>>>>>>
>>>>>> Signed-off-by: Romain Kuntz <r.kuntz@ipflavors.com>
>>>>>> ---
>>>>>>   include/net/ip6_route.h |    2 ++
>>>>>>   net/ipv6/addrconf.c     |    4 +++-
>>>>>>   net/ipv6/fib6_rules.c   |    4 ++++
>>>>>>   3 files changed, 9 insertions(+), 1 deletions(-)
>>>>>>
>>>>>> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
>>>>>> index 27d8318..3c93743 100644
>>>>>> --- a/include/net/ip6_route.h
>>>>>> +++ b/include/net/ip6_route.h
>>>>>> @@ -30,6 +30,8 @@ struct route_info {
>>>>>>   #define RT6_LOOKUP_F_SRCPREF_TMP    0x00000008
>>>>>>   #define RT6_LOOKUP_F_SRCPREF_PUBLIC    0x00000010
>>>>>>   #define RT6_LOOKUP_F_SRCPREF_COA    0x00000020
>>>>>> +#define RT6_LOOKUP_F_NO_BLK_HOLE    0x00000040
>>>>>> +#define RT6_LOOKUP_F_NO_PROHIBIT    0x00000080
>>>>>>
>>>>>>   /*
>>>>>>    * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
>>>>>> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
>>>>>> index 408cac4a..1891e23 100644
>>>>>> --- a/net/ipv6/addrconf.c
>>>>>> +++ b/net/ipv6/addrconf.c
>>>>>> @@ -948,7 +948,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
>>>>>>           fl6.flowi6_oif = ifp->idev->dev->ifindex;
>>>>>>           fl6.daddr = prefix;
>>>>>>           rt = (struct rt6_info *)ip6_route_lookup(net, &fl6,
>>>>>> -                             RT6_LOOKUP_F_IFACE);
>>>>>> +                        RT6_LOOKUP_F_IFACE |
>>>>>> +                        RT6_LOOKUP_F_NO_BLK_HOLE |
>>>>>> +                        RT6_LOOKUP_F_NO_PROHIBIT);
>>>>>>
>>>>>>           if (rt != net->ipv6.ip6_null_entry &&
>>>>> Is it not simpler to test the result here (net->ipv6.ip6_blk_hole_entry and
>>>>> net->ipv6.ip6_prohibit_entry) like for the null_entry?
>>>>> It will also avoid adding more flags.
>>>>
>>>> Your proposal would only solve part of the problem (the Oops in __ip6_del_rt()). Another problem here is that blackhole and prohibited rules should not be selected when trying to purge a prefix (correct me if I'm wrong) because they are not what we are looking for. This can prevent the targeted prefix from being purged.
>>> In fact, I'm not sure to get the scenario. This part of the code just tries
>>> to remove the connected prefix, added by the kernel when the address was added.
>>> Can you describe your scenario?
>>
>>
>> I should have given more details from the beginning, my mistake. The scenario where this happens is quite simple:
>>
>> - install a blackhole rule (e.g. "from 2001:db8::1000 blackhole" - the source address does not matter at all) with the FIB_RULE_FIND_SADDR flag set (setting this flag is not possible with iproute2, but for test purpose you can use the enclosed patch against the latest iproute2 tree and then use "./ip -6 rule add from 2001:db8::1000/128 blackhole prio 1000").
>>
>> - try to delete an address from one of your interface (any address, it can be different from the one you used for the blackhole rule): "ip -6 addr del <v6-addr>/64 dev eth<x>"
>>
>> and you get an Oops. When trying to remove the connected prefix, the fib6_rule_match() function will match the blackhole rule because RT6_LOOKUP_F_HAS_SADDR is not set and FIB_RULE_FIND_SADDR is set.
>>
>> With your proposal, the Oops is fixed but the connected prefix route is not deleted. With my initial patch, the Oops is fixed and the connected prefix route is also deleted.
> Ok, I get it. I thin,there is two bugs: the oops and the wrong lookup.
> 
> Your proposal fix only a particular case. Try this (with your ip route2 patch):
> ip -6 addr add 2002::1/64 dev eth0
> ip -6 route add 2002::/64 table 257 dev eth0
> ip -6 addr del 2002::1/64 dev eth0
> 
> The route deleted is not the connected prefix, but the route added in table 257.
> The connected prefix is still here in the main table. It's not what we want.
> Maybe the lookup should be done directly into the right table, ie table RT6_TABLE_PREFIX. What do you think?

I agree.  I think we can use addrconf_get_prefix_route() here.

--yoshfuji
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 27d8318..3c93743 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -30,6 +30,8 @@  struct route_info {
 #define RT6_LOOKUP_F_SRCPREF_TMP	0x00000008
 #define RT6_LOOKUP_F_SRCPREF_PUBLIC	0x00000010
 #define RT6_LOOKUP_F_SRCPREF_COA	0x00000020
+#define RT6_LOOKUP_F_NO_BLK_HOLE	0x00000040
+#define RT6_LOOKUP_F_NO_PROHIBIT	0x00000080
 
 /*
  * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 408cac4a..1891e23 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -948,7 +948,9 @@  static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 		fl6.flowi6_oif = ifp->idev->dev->ifindex;
 		fl6.daddr = prefix;
 		rt = (struct rt6_info *)ip6_route_lookup(net, &fl6,
-							 RT6_LOOKUP_F_IFACE);
+						RT6_LOOKUP_F_IFACE |
+						RT6_LOOKUP_F_NO_BLK_HOLE |
+						RT6_LOOKUP_F_NO_PROHIBIT);
 
 		if (rt != net->ipv6.ip6_null_entry &&
 		    addrconf_is_prefix_route(rt)) {
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2e1a432..9b48128 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -64,9 +64,13 @@  static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 		goto discard_pkt;
 	default:
 	case FR_ACT_BLACKHOLE:
+		if (flags & RT6_LOOKUP_F_NO_BLK_HOLE)
+			goto out;
 		rt = net->ipv6.ip6_blk_hole_entry;
 		goto discard_pkt;
 	case FR_ACT_PROHIBIT:
+		if (flags & RT6_LOOKUP_F_NO_PROHIBIT)
+			goto out;
 		rt = net->ipv6.ip6_prohibit_entry;
 		goto discard_pkt;
 	}