diff mbox

[PATCHv2,ipsec-next] xfrm: Namespacify xfrm_policy_sk_bundles

Message ID 1387337658-28951-1-git-send-email-fan.du@windriver.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

fan.du Dec. 18, 2013, 3:34 a.m. UTC
xfrm_policy_sk_bundles, protected by net->xfrm.xfrm_policy_sk_bundle_lock
should be put into netns xfrm structure, otherwise xfrm_policy_sk_bundles
can be corrupted from different net namespace.

Moreover current xfrm_policy_sk_bundle_lock used in below two scenarios:

1. xfrm_lookup(Process context)  vs  __xfrm_garbage_collect(softirq context)
2. xfrm_lookup(Process context)  vs  __xfrm_garbage_collect(Process context
                                                when SPD change or dev down) 

we can use xchg to avoid the spinlock, at the same time cover above scenarios,
inspired by discussion in: http://marc.info/?l=linux-netdev&m=138713363113003&w=2

Signed-off-by: Fan Du <fan.du@windriver.com>
---
v2:
  Fix incorrect commit log.

---
 include/net/netns/xfrm.h |    2 +-
 net/xfrm/xfrm_policy.c   |   17 +++--------------
 2 files changed, 4 insertions(+), 15 deletions(-)

Comments

Eric Dumazet Dec. 18, 2013, 4:50 a.m. UTC | #1
On Wed, 2013-12-18 at 11:34 +0800, Fan Du wrote:
> xfrm_policy_sk_bundles, protected by net->xfrm.xfrm_policy_sk_bundle_lock
> should be put into netns xfrm structure, otherwise xfrm_policy_sk_bundles
> can be corrupted from different net namespace.
> 
> Moreover current xfrm_policy_sk_bundle_lock used in below two scenarios:
> 
> 1. xfrm_lookup(Process context)  vs  __xfrm_garbage_collect(softirq context)
> 2. xfrm_lookup(Process context)  vs  __xfrm_garbage_collect(Process context
>                                                 when SPD change or dev down) 
> 
> we can use xchg to avoid the spinlock, at the same time cover above scenarios,
> inspired by discussion in: http://marc.info/?l=linux-netdev&m=138713363113003&w=2
> 
> Signed-off-by: Fan Du <fan.du@windriver.com>
> ---
> v2:
>   Fix incorrect commit log.
> 
> ---
>  include/net/netns/xfrm.h |    2 +-
>  net/xfrm/xfrm_policy.c   |   17 +++--------------
>  2 files changed, 4 insertions(+), 15 deletions(-)
> 
> diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
> index 1006a26..4a30b1b 100644
> --- a/include/net/netns/xfrm.h
> +++ b/include/net/netns/xfrm.h
> @@ -58,9 +58,9 @@ struct netns_xfrm {
>  	struct dst_ops		xfrm6_dst_ops;
>  #endif
>  	spinlock_t xfrm_state_lock;
> -	spinlock_t xfrm_policy_sk_bundle_lock;
>  	rwlock_t xfrm_policy_lock;
>  	struct mutex xfrm_cfg_mutex;
> +	struct dst_entry *xfrm_policy_sk_bundles;
>  };
>  
>  #endif
> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
> index a7487f3..26d79c0 100644
> --- a/net/xfrm/xfrm_policy.c
> +++ b/net/xfrm/xfrm_policy.c
> @@ -39,8 +39,6 @@
>  #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
>  #define XFRM_MAX_QUEUE_LEN	100
>  
> -static struct dst_entry *xfrm_policy_sk_bundles;
> -
>  static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
>  static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
>  						__read_mostly;
> @@ -2108,12 +2106,8 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
>  			}
>  
>  			dst_hold(&xdst->u.dst);
> -
> -			spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
> -			xdst->u.dst.next = xfrm_policy_sk_bundles;
> -			xfrm_policy_sk_bundles = &xdst->u.dst;
> -			spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
> -
> +			xdst->u.dst.next = xchg(&net->xfrm.xfrm_policy_sk_bundles,
> +						&xdst->u.dst);

This is not safe.

Take a look at include/linux/llist.h if you really want to avoid the
spinlock.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
fan.du Dec. 18, 2013, 5:33 a.m. UTC | #2
On 2013年12月18日 12:50, Eric Dumazet wrote:
> On Wed, 2013-12-18 at 11:34 +0800, Fan Du wrote:
>> xfrm_policy_sk_bundles, protected by net->xfrm.xfrm_policy_sk_bundle_lock
>> should be put into netns xfrm structure, otherwise xfrm_policy_sk_bundles
>> can be corrupted from different net namespace.
>>
>> Moreover current xfrm_policy_sk_bundle_lock used in below two scenarios:
>>
>> 1. xfrm_lookup(Process context)  vs  __xfrm_garbage_collect(softirq context)
>> 2. xfrm_lookup(Process context)  vs  __xfrm_garbage_collect(Process context
>>                                                  when SPD change or dev down)
>>
>> we can use xchg to avoid the spinlock, at the same time cover above scenarios,
>> inspired by discussion in: http://marc.info/?l=linux-netdev&m=138713363113003&w=2
>>
>> Signed-off-by: Fan Du<fan.du@windriver.com>
>> ---
>> v2:
>>    Fix incorrect commit log.
>>
>> ---
>>   include/net/netns/xfrm.h |    2 +-
>>   net/xfrm/xfrm_policy.c   |   17 +++--------------
>>   2 files changed, 4 insertions(+), 15 deletions(-)
>>
>> diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
>> index 1006a26..4a30b1b 100644
>> --- a/include/net/netns/xfrm.h
>> +++ b/include/net/netns/xfrm.h
>> @@ -58,9 +58,9 @@ struct netns_xfrm {
>>   	struct dst_ops		xfrm6_dst_ops;
>>   #endif
>>   	spinlock_t xfrm_state_lock;
>> -	spinlock_t xfrm_policy_sk_bundle_lock;
>>   	rwlock_t xfrm_policy_lock;
>>   	struct mutex xfrm_cfg_mutex;
>> +	struct dst_entry *xfrm_policy_sk_bundles;
>>   };
>>
>>   #endif
>> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
>> index a7487f3..26d79c0 100644
>> --- a/net/xfrm/xfrm_policy.c
>> +++ b/net/xfrm/xfrm_policy.c
>> @@ -39,8 +39,6 @@
>>   #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
>>   #define XFRM_MAX_QUEUE_LEN	100
>>
>> -static struct dst_entry *xfrm_policy_sk_bundles;
>> -
>>   static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
>>   static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
>>   						__read_mostly;
>> @@ -2108,12 +2106,8 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
>>   			}
>>
>>   			dst_hold(&xdst->u.dst);
>> -
>> -			spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
>> -			xdst->u.dst.next = xfrm_policy_sk_bundles;
>> -			xfrm_policy_sk_bundles =&xdst->u.dst;
>> -			spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
>> -
>> +			xdst->u.dst.next = xchg(&net->xfrm.xfrm_policy_sk_bundles,
>> +						&xdst->u.dst);
>
> This is not safe.
>
> Take a look at include/linux/llist.h if you really want to avoid the
> spinlock.

Hi Eric

Thanks for your attention,
I'm not follow why xchg here is unsafe, could you please elaborate a bit more?
Cong Wang Dec. 18, 2013, 5:33 a.m. UTC | #3
On Wed, 18 Dec 2013 at 04:50 GMT, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Wed, 2013-12-18 at 11:34 +0800, Fan Du wrote:
>> -
>> -			spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
>> -			xdst->u.dst.next = xfrm_policy_sk_bundles;
>> -			xfrm_policy_sk_bundles = &xdst->u.dst;
>> -			spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
>> -
>> +			xdst->u.dst.next = xchg(&net->xfrm.xfrm_policy_sk_bundles,
>> +						&xdst->u.dst);
>
> This is not safe.
>
> Take a look at include/linux/llist.h if you really want to avoid the
> spinlock.
>

Exactly, xfrm_policy_sk_bundles list is only traversed after deletion.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Dec. 18, 2013, 5:44 a.m. UTC | #4
On Wed, 2013-12-18 at 13:33 +0800, Fan Du wrote:

> Hi Eric
> 
> Thanks for your attention,
> I'm not follow why xchg here is unsafe, could you please elaborate a bit more?

Read include/linux/llist.h and llist_add_batch() implementation,
it will be very clear why your implementation is not safe.




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
fan.du Dec. 19, 2013, 1:35 a.m. UTC | #5
Hi, Eric and Cong

On 2013年12月18日 12:50, Eric Dumazet wrote:
>>   static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
>> >    static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
>> >    						__read_mostly;
>> >  @@ -2108,12 +2106,8 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
>> >    			}
>> >
>> >    			dst_hold(&xdst->u.dst);
>> >  -
>> >  -			spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
>> >  -			xdst->u.dst.next = xfrm_policy_sk_bundles;
>> >  -			xfrm_policy_sk_bundles =&xdst->u.dst;
>> >  -			spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
>> >  -
>> >  +			xdst->u.dst.next = xchg(&net->xfrm.xfrm_policy_sk_bundles,
>> >  +						&xdst->u.dst);
> This is not safe.
>
> Take a look at include/linux/llist.h if you really want to avoid the
> spinlock.

Here is my understanding about llist_add_batch, assume when add one single
entry into list, i.e., new_last equates new_first.

   1         do {
   2                 new->next = first = ACCESS_ONCE(head->first);
   3         } while (cmpxchg(&head->first, first, new) != first);


Caller 1:Add new1 into head                          Caller 2:Add new2 into head
      line 2: Link head into new1
                                                      line 2: Link head into new2
                                                      line 3: Make the cmpxchg, then succeed.

                                                      After this, new2 -> old_head
      line 3: Make the cmpxchg, failed, try
              again will succeed, after this
              new1 -> new2 -> old_head

So in order to not use locks, try again if cmpxchg found out someone else
has update the head. For the case involved in the patch, the problem is, after
xchg, assign the old head to the new->next will race with the delete part when
saving the head for deleting after setting the head to NULL, as the traverse
of saved head probably not see a consistent list, that's a broken one.

I think an analogy of llist_add_batch for the updating part will be ok for this:

                         struct dst_entry *first;
                         do {
                                 xdst->u.dst.next = first = ACCESS_ONCE(xfrm_policy_sk_bundles);
                         } while (cmpxchg(&xfrm_policy_sk_bundles, first, &xdst->u.dst) != first);


And the deleting part:

                         head = xchg(&net->xfrm.xfrm_policy_sk_bundles, NULL);
Eric Dumazet Dec. 19, 2013, 2:15 a.m. UTC | #6
On Thu, 2013-12-19 at 09:35 +0800, Fan Du wrote:
> Hi, Eric and Cong
> 

> Here is my understanding about llist_add_batch, assume when add one single
> entry into list, i.e., new_last equates new_first.
> 
>    1         do {
>    2                 new->next = first = ACCESS_ONCE(head->first);
>    3         } while (cmpxchg(&head->first, first, new) != first);
> 
> 
> Caller 1:Add new1 into head                          Caller 2:Add new2 into head
>       line 2: Link head into new1
>                                                       line 2: Link head into new2
>                                                       line 3: Make the cmpxchg, then succeed.
> 
>                                                       After this, new2 -> old_head
>       line 3: Make the cmpxchg, failed, try
>               again will succeed, after this
>               new1 -> new2 -> old_head
> 
> So in order to not use locks, try again if cmpxchg found out someone else
> has update the head. For the case involved in the patch, the problem is, after
> xchg, assign the old head to the new->next will race with the delete part when
> saving the head for deleting after setting the head to NULL, as the traverse
> of saved head probably not see a consistent list, that's a broken one.
> 
> I think an analogy of llist_add_batch for the updating part will be ok for this:
> 
>                          struct dst_entry *first;
>                          do {
>                                  xdst->u.dst.next = first = ACCESS_ONCE(xfrm_policy_sk_bundles);
>                          } while (cmpxchg(&xfrm_policy_sk_bundles, first, &xdst->u.dst) != first);
> 
> 
> And the deleting part:
> 
>                          head = xchg(&net->xfrm.xfrm_policy_sk_bundles, NULL);
> 
> 

The point is to _use_ the helpers, so that code review is easy.

llist.h is safe, it was extensively discussed and adopted.

If you want to get rid of the spinlock, then use llist_del_all() for the
deleting, and llist_add() for the addition.

Really, its that simple.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 1006a26..4a30b1b 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -58,9 +58,9 @@  struct netns_xfrm {
 	struct dst_ops		xfrm6_dst_ops;
 #endif
 	spinlock_t xfrm_state_lock;
-	spinlock_t xfrm_policy_sk_bundle_lock;
 	rwlock_t xfrm_policy_lock;
 	struct mutex xfrm_cfg_mutex;
+	struct dst_entry *xfrm_policy_sk_bundles;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a7487f3..26d79c0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -39,8 +39,6 @@ 
 #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
 #define XFRM_MAX_QUEUE_LEN	100
 
-static struct dst_entry *xfrm_policy_sk_bundles;
-
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
 						__read_mostly;
@@ -2108,12 +2106,8 @@  struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 			}
 
 			dst_hold(&xdst->u.dst);
-
-			spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
-			xdst->u.dst.next = xfrm_policy_sk_bundles;
-			xfrm_policy_sk_bundles = &xdst->u.dst;
-			spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
-
+			xdst->u.dst.next = xchg(&net->xfrm.xfrm_policy_sk_bundles,
+						&xdst->u.dst);
 			route = xdst->route;
 		}
 	}
@@ -2551,11 +2545,7 @@  static void __xfrm_garbage_collect(struct net *net)
 {
 	struct dst_entry *head, *next;
 
-	spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
-	head = xfrm_policy_sk_bundles;
-	xfrm_policy_sk_bundles = NULL;
-	spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
-
+	head = xchg(&net->xfrm.xfrm_policy_sk_bundles, NULL);
 	while (head) {
 		next = head->next;
 		dst_free(head);
@@ -2942,7 +2932,6 @@  static int __net_init xfrm_net_init(struct net *net)
 	/* Initialize the per-net locks here */
 	spin_lock_init(&net->xfrm.xfrm_state_lock);
 	rwlock_init(&net->xfrm.xfrm_policy_lock);
-	spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock);
 	mutex_init(&net->xfrm.xfrm_cfg_mutex);
 
 	return 0;