diff mbox

[net-next,v2,6/10] bonding: rebuild the lock use for bond_activebackup_arp_mon()

Message ID 527C4784.1030402@huawei.com
State Deferred, archived
Delegated to: David Miller
Headers show

Commit Message

Ding Tianhong Nov. 8, 2013, 2:08 a.m. UTC
The bond_activebackup_arp_mon() use the bond lock for read to
protect the slave list, it is no effect, and the RTNL is only
called for bond_ab_arp_commit() and peer notify, for the performance
better, use RCU instead of the bond lock, because the bond slave
list need to called in RCU, add a new bond_first_slave_rcu()
to get the first slave in RCU protection.

When bond_ab_arp_inspect() and should_notify_peers is true, the
RTNL will called twice, it is a loss of performance, so make the
two RTNL together to avoid performance loss.

Suggested-by: Jay Vosburgh <fubar@us.ibm.com>
Suggested-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 drivers/net/bonding/bond_main.c | 35 +++++++++++++++++++----------------
 drivers/net/bonding/bonding.h   |  7 +++++++
 2 files changed, 26 insertions(+), 16 deletions(-)

Comments

Nikolay Aleksandrov Nov. 8, 2013, 4:01 p.m. UTC | #1
On 11/08/2013 03:08 AM, Ding Tianhong wrote:
> The bond_activebackup_arp_mon() use the bond lock for read to
> protect the slave list, it is no effect, and the RTNL is only
> called for bond_ab_arp_commit() and peer notify, for the performance
> better, use RCU instead of the bond lock, because the bond slave
> list need to called in RCU, add a new bond_first_slave_rcu()
> to get the first slave in RCU protection.
> 
> When bond_ab_arp_inspect() and should_notify_peers is true, the
> RTNL will called twice, it is a loss of performance, so make the
> two RTNL together to avoid performance loss.
> 
> Suggested-by: Jay Vosburgh <fubar@us.ibm.com>
> Suggested-by: Veaceslav Falico <vfalico@redhat.com>
> Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
> ---
>  drivers/net/bonding/bond_main.c | 35 +++++++++++++++++++----------------
>  drivers/net/bonding/bonding.h   |  7 +++++++
>  2 files changed, 26 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 759dcd0..b48ca55 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -2524,7 +2524,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
>  	struct slave *slave;
>  	int commit = 0;
>  
> -	bond_for_each_slave(bond, slave, iter) {
> +	bond_for_each_slave_rcu(bond, slave, iter) {
>  		slave->new_link = BOND_LINK_NOCHANGE;
>  		last_rx = slave_last_rx(bond, slave);
>  
> @@ -2586,7 +2586,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
>   * Called to commit link state changes noted by inspection step of
>   * active-backup mode ARP monitor.
>   *
> - * Called with RTNL and bond->lock for read.
> + * Called with RTNL hold.
>   */
>  static void bond_ab_arp_commit(struct bonding *bond)
>  {
> @@ -2661,7 +2661,7 @@ do_failover:
>  /*
>   * Send ARP probes for active-backup mode ARP monitor.
>   *
> - * Called with bond->lock held for read.
> + * Called with rcu_read_lock hold.
>   */
>  static void bond_ab_arp_probe(struct bonding *bond)
>  {
> @@ -2690,14 +2690,14 @@ static void bond_ab_arp_probe(struct bonding *bond)
>  	 */
>  
>  	if (!bond->current_arp_slave) {
> -		bond->current_arp_slave = bond_first_slave(bond);
> +		bond->current_arp_slave = bond_first_slave_rcu(bond);
>  		if (!bond->current_arp_slave)
>  			return;
>  	}
>  
>  	bond_set_slave_inactive_flags(bond->current_arp_slave);
>  
> -	bond_for_each_slave(bond, slave, iter) {
> +	bond_for_each_slave_rcu(bond, slave, iter) {
>  		if (!found && !before && IS_UP(slave->dev))
>  			before = slave;
>  
> @@ -2745,43 +2745,46 @@ void bond_activebackup_arp_mon(struct work_struct *work)
>  	bool should_notify_peers = false;
>  	int delta_in_ticks;
>  
> -	read_lock(&bond->lock);
> -
>  	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
>  
> -	if (!bond_has_slaves(bond))
> +	rcu_read_lock();
> +
> +	if (!bond_has_slaves_rcu(bond)) {
> +		rcu_read_unlock();
>  		goto re_arm;
> +	}
>  
>  	should_notify_peers = bond_should_notify_peers(bond);
Again, bond_should_notify_peers() is not RCU-safe.

>  
>  	if (bond_ab_arp_inspect(bond)) {
> -		read_unlock(&bond->lock);
> +		rcu_read_unlock();
>  
>  		/* Race avoidance with bond_close flush of workqueue */
>  		if (!rtnl_trylock()) {
> -			read_lock(&bond->lock);
>  			delta_in_ticks = 1;
>  			should_notify_peers = false;
>  			goto re_arm;
>  		}
>  
> -		read_lock(&bond->lock);
> -
>  		bond_ab_arp_commit(bond);
>  
> -		read_unlock(&bond->lock);
> +		if (should_notify_peers) {
> +			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
> +					bond->dev);
> +			should_notify_peers = false;
> +		}
> +
>  		rtnl_unlock();
> -		read_lock(&bond->lock);
> +		rcu_read_lock();
>  	}
>  
>  	bond_ab_arp_probe(bond);
Generally you might be safe in bond_ab_arp_probe() due to the synchronization
done by netdev_rx_handler_unregister(), but this code may run after that (and
after the unlinked slave) but before current_arp_slave is set to NULL and thus
use it. Now I don't see a direct problem with that, only a complication that can
bite us later. I vaguely remember that I re-worked the bond_ab_arp_probe() and
the way current_arp_slave works when doing this transition in my patches.

> +	rcu_read_unlock();
>  
>  re_arm:
>  	if (bond->params.arp_interval)
>  		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
>  
> -	read_unlock(&bond->lock);
> -
>  	if (should_notify_peers) {
>  		if (!rtnl_trylock())
>  			return;
> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
> index deb9738..90b745c 100644
> --- a/drivers/net/bonding/bonding.h
> +++ b/drivers/net/bonding/bonding.h
> @@ -97,6 +97,13 @@
>  		netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \
>  		NULL)
>  
> +#define bond_first_slave_rcu(bond) \
> +	({struct list_head *__ptr = (bond_slave_list(bond)); \
> +	 struct list_head *__next = ACCESS_ONCE(__ptr->next); \
> +	 likely(__ptr != __next) ? \
> +	 netdev_adjacent_get_private_rcu(__next) : NULL; \
> +	 })
> +
Honestly, I don't like this, it sure can be re-written in a more
straight-forward manner.

>  #define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond))
>  #define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond))
>  
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ding Tianhong Nov. 9, 2013, 2:08 p.m. UTC | #2
于 2013/11/9 0:01, Nikolay Aleksandrov 写道:
> On 11/08/2013 03:08 AM, Ding Tianhong wrote:
>> The bond_activebackup_arp_mon() use the bond lock for read to
>> protect the slave list, it is no effect, and the RTNL is only
>> called for bond_ab_arp_commit() and peer notify, for the performance
>> better, use RCU instead of the bond lock, because the bond slave
>> list need to called in RCU, add a new bond_first_slave_rcu()
>> to get the first slave in RCU protection.
>>
>> When bond_ab_arp_inspect() and should_notify_peers is true, the
>> RTNL will called twice, it is a loss of performance, so make the
>> two RTNL together to avoid performance loss.
>>
>> Suggested-by: Jay Vosburgh <fubar@us.ibm.com>
>> Suggested-by: Veaceslav Falico <vfalico@redhat.com>
>> Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
>> ---
>>   drivers/net/bonding/bond_main.c | 35 +++++++++++++++++++----------------
>>   drivers/net/bonding/bonding.h   |  7 +++++++
>>   2 files changed, 26 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index 759dcd0..b48ca55 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -2524,7 +2524,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
>>   	struct slave *slave;
>>   	int commit = 0;
>>   
>> -	bond_for_each_slave(bond, slave, iter) {
>> +	bond_for_each_slave_rcu(bond, slave, iter) {
>>   		slave->new_link = BOND_LINK_NOCHANGE;
>>   		last_rx = slave_last_rx(bond, slave);
>>   
>> @@ -2586,7 +2586,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
>>    * Called to commit link state changes noted by inspection step of
>>    * active-backup mode ARP monitor.
>>    *
>> - * Called with RTNL and bond->lock for read.
>> + * Called with RTNL hold.
>>    */
>>   static void bond_ab_arp_commit(struct bonding *bond)
>>   {
>> @@ -2661,7 +2661,7 @@ do_failover:
>>   /*
>>    * Send ARP probes for active-backup mode ARP monitor.
>>    *
>> - * Called with bond->lock held for read.
>> + * Called with rcu_read_lock hold.
>>    */
>>   static void bond_ab_arp_probe(struct bonding *bond)
>>   {
>> @@ -2690,14 +2690,14 @@ static void bond_ab_arp_probe(struct bonding *bond)
>>   	 */
>>   
>>   	if (!bond->current_arp_slave) {
>> -		bond->current_arp_slave = bond_first_slave(bond);
>> +		bond->current_arp_slave = bond_first_slave_rcu(bond);
>>   		if (!bond->current_arp_slave)
>>   			return;
>>   	}
>>   
>>   	bond_set_slave_inactive_flags(bond->current_arp_slave);
>>   
>> -	bond_for_each_slave(bond, slave, iter) {
>> +	bond_for_each_slave_rcu(bond, slave, iter) {
>>   		if (!found && !before && IS_UP(slave->dev))
>>   			before = slave;
>>   
>> @@ -2745,43 +2745,46 @@ void bond_activebackup_arp_mon(struct work_struct *work)
>>   	bool should_notify_peers = false;
>>   	int delta_in_ticks;
>>   
>> -	read_lock(&bond->lock);
>> -
>>   	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
>>   
>> -	if (!bond_has_slaves(bond))
>> +	rcu_read_lock();
>> +
>> +	if (!bond_has_slaves_rcu(bond)) {
>> +		rcu_read_unlock();
>>   		goto re_arm;
>> +	}
>>   
>>   	should_notify_peers = bond_should_notify_peers(bond);
> Again, bond_should_notify_peers() is not RCU-safe.

yes.
>>   
>>   	if (bond_ab_arp_inspect(bond)) {
>> -		read_unlock(&bond->lock);
>> +		rcu_read_unlock();
>>   
>>   		/* Race avoidance with bond_close flush of workqueue */
>>   		if (!rtnl_trylock()) {
>> -			read_lock(&bond->lock);
>>   			delta_in_ticks = 1;
>>   			should_notify_peers = false;
>>   			goto re_arm;
>>   		}
>>   
>> -		read_lock(&bond->lock);
>> -
>>   		bond_ab_arp_commit(bond);
>>   
>> -		read_unlock(&bond->lock);
>> +		if (should_notify_peers) {
>> +			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
>> +					bond->dev);
>> +			should_notify_peers = false;
>> +		}
>> +
>>   		rtnl_unlock();
>> -		read_lock(&bond->lock);
>> +		rcu_read_lock();
>>   	}
>>   
>>   	bond_ab_arp_probe(bond);
> Generally you might be safe in bond_ab_arp_probe() due to the synchronization
> done by netdev_rx_handler_unregister(), but this code may run after that (and
> after the unlinked slave) but before current_arp_slave is set to NULL and thus
> use it. Now I don't see a direct problem with that, only a complication that can
> bite us later. I vaguely remember that I re-worked the bond_ab_arp_probe() and
> the way current_arp_slave works when doing this transition in my patches.

maybe I miss the patch, pls send me the commit and I will check it again.
>> +	rcu_read_unlock();
>>   
>>   re_arm:
>>   	if (bond->params.arp_interval)
>>   		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
>>   
>> -	read_unlock(&bond->lock);
>> -
>>   	if (should_notify_peers) {
>>   		if (!rtnl_trylock())
>>   			return;
>> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>> index deb9738..90b745c 100644
>> --- a/drivers/net/bonding/bonding.h
>> +++ b/drivers/net/bonding/bonding.h
>> @@ -97,6 +97,13 @@
>>   		netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \
>>   		NULL)
>>   
>> +#define bond_first_slave_rcu(bond) \
>> +	({struct list_head *__ptr = (bond_slave_list(bond)); \
>> +	 struct list_head *__next = ACCESS_ONCE(__ptr->next); \
>> +	 likely(__ptr != __next) ? \
>> +	 netdev_adjacent_get_private_rcu(__next) : NULL; \
>> +	 })
>> +
> Honestly, I don't like this, it sure can be re-written in a more
> straight-forward manner.

ok, I will re-write it and make it more comfortable.

Regards.
Ding

>>   #define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond))
>>   #define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond))
>>   
>>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ding Tianhong Nov. 10, 2013, 4:08 a.m. UTC | #3
>> +	rcu_read_unlock();
>>   
>>   re_arm:
>>   	if (bond->params.arp_interval)
>>   		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
>>   
>> -	read_unlock(&bond->lock);
>> -
>>   	if (should_notify_peers) {
>>   		if (!rtnl_trylock())
>>   			return;
>> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>> index deb9738..90b745c 100644
>> --- a/drivers/net/bonding/bonding.h
>> +++ b/drivers/net/bonding/bonding.h
>> @@ -97,6 +97,13 @@
>>   		netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \
>>   		NULL)
>>   
>> +#define bond_first_slave_rcu(bond) \
>> +	({struct list_head *__ptr = (bond_slave_list(bond)); \
>> +	 struct list_head *__next = ACCESS_ONCE(__ptr->next); \
>> +	 likely(__ptr != __next) ? \
>> +	 netdev_adjacent_get_private_rcu(__next) : NULL; \
>> +	 })
>> +
> Honestly, I don't like this, it sure can be re-written in a more
> straight-forward manner.

I have re-write the function by 2 ways, the first one just like 
list_first_or_null_rcu,
the second one just used the exist function 
netdev_lower_get_next_private_rcu,
I think the first one is better, it is more exactly.

1:

+#define bond_first_slave_rcu(bond) \
+	({struct list_head *__ptr = (bond_slave_list(bond)); \
+	 struct list_head *__next = ACCESS_ONCE(__ptr->next); \
+	 likely(__ptr != __next) ? \
+	 (list_entry_rcu(__next, struct netdev_adjacent, list))->private : NULL; \
+	 })
+

2:

+#define bond_first_slave_rcu(bond) \
+	({struct list_head *iter = (bond_slave_list(bond)); \
+	 netdev_lower_get_next_private_rcu(bond->dev, &iter) ? : NULL; \
+	})


what do you think about is, maybe you have more wonderful idea, pls 
remind me,
thanks

Regards
Ding

> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 759dcd0..b48ca55 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2524,7 +2524,7 @@  static int bond_ab_arp_inspect(struct bonding *bond)
 	struct slave *slave;
 	int commit = 0;
 
-	bond_for_each_slave(bond, slave, iter) {
+	bond_for_each_slave_rcu(bond, slave, iter) {
 		slave->new_link = BOND_LINK_NOCHANGE;
 		last_rx = slave_last_rx(bond, slave);
 
@@ -2586,7 +2586,7 @@  static int bond_ab_arp_inspect(struct bonding *bond)
  * Called to commit link state changes noted by inspection step of
  * active-backup mode ARP monitor.
  *
- * Called with RTNL and bond->lock for read.
+ * Called with RTNL hold.
  */
 static void bond_ab_arp_commit(struct bonding *bond)
 {
@@ -2661,7 +2661,7 @@  do_failover:
 /*
  * Send ARP probes for active-backup mode ARP monitor.
  *
- * Called with bond->lock held for read.
+ * Called with rcu_read_lock hold.
  */
 static void bond_ab_arp_probe(struct bonding *bond)
 {
@@ -2690,14 +2690,14 @@  static void bond_ab_arp_probe(struct bonding *bond)
 	 */
 
 	if (!bond->current_arp_slave) {
-		bond->current_arp_slave = bond_first_slave(bond);
+		bond->current_arp_slave = bond_first_slave_rcu(bond);
 		if (!bond->current_arp_slave)
 			return;
 	}
 
 	bond_set_slave_inactive_flags(bond->current_arp_slave);
 
-	bond_for_each_slave(bond, slave, iter) {
+	bond_for_each_slave_rcu(bond, slave, iter) {
 		if (!found && !before && IS_UP(slave->dev))
 			before = slave;
 
@@ -2745,43 +2745,46 @@  void bond_activebackup_arp_mon(struct work_struct *work)
 	bool should_notify_peers = false;
 	int delta_in_ticks;
 
-	read_lock(&bond->lock);
-
 	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
 
-	if (!bond_has_slaves(bond))
+	rcu_read_lock();
+
+	if (!bond_has_slaves_rcu(bond)) {
+		rcu_read_unlock();
 		goto re_arm;
+	}
 
 	should_notify_peers = bond_should_notify_peers(bond);
 
 	if (bond_ab_arp_inspect(bond)) {
-		read_unlock(&bond->lock);
+		rcu_read_unlock();
 
 		/* Race avoidance with bond_close flush of workqueue */
 		if (!rtnl_trylock()) {
-			read_lock(&bond->lock);
 			delta_in_ticks = 1;
 			should_notify_peers = false;
 			goto re_arm;
 		}
 
-		read_lock(&bond->lock);
-
 		bond_ab_arp_commit(bond);
 
-		read_unlock(&bond->lock);
+		if (should_notify_peers) {
+			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
+					bond->dev);
+			should_notify_peers = false;
+		}
+
 		rtnl_unlock();
-		read_lock(&bond->lock);
+		rcu_read_lock();
 	}
 
 	bond_ab_arp_probe(bond);
+	rcu_read_unlock();
 
 re_arm:
 	if (bond->params.arp_interval)
 		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
 
-	read_unlock(&bond->lock);
-
 	if (should_notify_peers) {
 		if (!rtnl_trylock())
 			return;
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index deb9738..90b745c 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -97,6 +97,13 @@ 
 		netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \
 		NULL)
 
+#define bond_first_slave_rcu(bond) \
+	({struct list_head *__ptr = (bond_slave_list(bond)); \
+	 struct list_head *__next = ACCESS_ONCE(__ptr->next); \
+	 likely(__ptr != __next) ? \
+	 netdev_adjacent_get_private_rcu(__next) : NULL; \
+	 })
+
 #define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond))
 #define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond))