diff mbox

[net-next-2.6] bonding: introduce primary_lazy option

Message ID 20090813150513.GB10449@psychotron.englab.brq.redhat.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Jiri Pirko Aug. 13, 2009, 3:05 p.m. UTC
In some cases there is not desirable to switch back to primary interface when
it's link recovers and rather stay wiith currently active one. We need to avoid
packetloss as much as we can in some cases. This is solved by introducing
primary_lazy option. Note that enslaved primary slave is set as current
active no matter what.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jay Vosburgh Aug. 13, 2009, 3:44 p.m. UTC | #1
Jiri Pirko <jpirko@redhat.com> wrote:

>In some cases there is not desirable to switch back to primary interface when
>it's link recovers and rather stay wiith currently active one. We need to avoid
>packetloss as much as we can in some cases. This is solved by introducing
>primary_lazy option. Note that enslaved primary slave is set as current
>active no matter what.

	Are you just looking for a way to insure that, when the bond
first comes up (is configured at boot, for example), a particular slave
is the active slave?  In that case, why can't an explicit selection be
made via either ifenslave -c or /sys/class/net/bond0/bonding/active ?

	-J

>Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>
>diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
>index d5181ce..f1b82af 100644
>--- a/Documentation/networking/bonding.txt
>+++ b/Documentation/networking/bonding.txt
>@@ -614,6 +614,15 @@ primary
>
> 	The primary option is only valid for active-backup mode.
>
>+primary_lazy
>+
>+	Specifies the behaviour of the primary slave in case of
>+	it's link recovery has been detected. By default (value 0) the
>+	primary slave is set as active slave immediately after the link
>+	recovery. If the value is 1 then current active slave doesn't
>+	change as long as it's link status doesn't change. This prevents
>+	the bonding device from flip-flopping.
>+
> updelay
>
> 	Specifies the time, in milliseconds, to wait before enabling a
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 3bf0cc6..00fbb9d 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -94,6 +94,7 @@ static int downdelay;
> static int use_carrier	= 1;
> static char *mode;
> static char *primary;
>+static int primary_lazy;
> static char *lacp_rate;
> static char *ad_select;
> static char *xmit_hash_policy;
>@@ -126,6 +127,9 @@ MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
> 		       "6 for balance-alb");
> module_param(primary, charp, 0);
> MODULE_PARM_DESC(primary, "Primary network device to use");
>+module_param(primary_lazy, int, 0);
>+MODULE_PARM_DESC(primary_lazy, "Do not set primary slave active once it comes up; "
>+			       "0 for off (default), 1 for on");
> module_param(lacp_rate, charp, 0);
> MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
> 			    "(slow/fast)");
>@@ -1067,7 +1071,6 @@ out:
>
> }
>
>-
> /**
>  * find_best_interface - select the best available slave to be the active one
>  * @bond: our bonding struct
>@@ -1097,9 +1100,11 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
> 	 * and primary_slave that may be up and able to arp
> 	 */
> 	if ((bond->primary_slave) &&
>-	    (!bond->params.arp_interval) &&
>-	    (IS_UP(bond->primary_slave->dev))) {
>+	    (IS_UP(bond->primary_slave->dev)) &&
>+	    (!(bond->params.primary_lazy && old_active &&
>+	       (IS_UP(old_active->dev))) || bond->force_primary)) {
> 		new_active = bond->primary_slave;
>+		bond->force_primary = false;
> 	}
>
> 	/* remember where to stop iterating over the slaves */
>@@ -1674,8 +1679,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
>
> 	if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
> 		/* if there is a primary slave, remember it */
>-		if (strcmp(bond->params.primary, new_slave->dev->name) == 0)
>+		if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
> 			bond->primary_slave = new_slave;
>+			bond->force_primary = true;
>+		}
> 	}
>
> 	write_lock_bh(&bond->curr_slave_lock);
>@@ -2929,7 +2936,9 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
> 	 */
> 	if (bond->primary_slave &&
> 	    (bond->primary_slave != bond->curr_active_slave) &&
>-	    (bond->primary_slave->link == BOND_LINK_UP))
>+	    (bond->primary_slave->link == BOND_LINK_UP) &&
>+	    !(bond->params.primary_lazy && bond->curr_active_slave &&
>+              bond->curr_active_slave->link == BOND_LINK_UP))
> 		commit++;
>
> 	read_unlock(&bond->curr_slave_lock);
>@@ -3035,7 +3044,9 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
> 	 */
> 	if (bond->primary_slave &&
> 	    (bond->primary_slave != bond->curr_active_slave) &&
>-	    (bond->primary_slave->link == BOND_LINK_UP)) {
>+	    (bond->primary_slave->link == BOND_LINK_UP) &&
>+	    !(bond->params.primary_lazy && bond->curr_active_slave &&
>+              bond->curr_active_slave->link == BOND_LINK_UP)) {
> 		write_lock_bh(&bond->curr_slave_lock);
> 		bond_change_active_slave(bond, bond->primary_slave);
> 		write_unlock_bh(&bond->curr_slave_lock);
>@@ -4987,6 +4998,17 @@ static int bond_check_params(struct bond_params *params)
> 		primary = NULL;
> 	}
>
>+	if (primary) {
>+		if ((primary_lazy != 0) && (primary_lazy != 1)) {
>+			pr_warning(DRV_NAME
>+				   ": Warning: primary_lazy module parameter "
>+				   "(%d), not of valid value (0/1), so it was "
>+				   "set to 0\n",
>+				   primary_lazy);
>+			primary_lazy = 1;
>+		}
>+	}
>+
> 	if (fail_over_mac) {
> 		fail_over_mac_value = bond_parse_parm(fail_over_mac,
> 						      fail_over_mac_tbl);
>@@ -5018,6 +5040,7 @@ static int bond_check_params(struct bond_params *params)
> 	params->use_carrier = use_carrier;
> 	params->lacp_fast = lacp_fast;
> 	params->primary[0] = 0;
>+	params->primary_lazy = primary_lazy;
> 	params->fail_over_mac = fail_over_mac_value;
>
> 	if (primary) {
>diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
>index 55bf34f..573fe82 100644
>--- a/drivers/net/bonding/bond_sysfs.c
>+++ b/drivers/net/bonding/bond_sysfs.c
>@@ -1209,6 +1209,59 @@ static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
> 		   bonding_show_primary, bonding_store_primary);
>
> /*
>+ * Show and set the primary_lazy flag.
>+ */
>+static ssize_t bonding_show_primary_lazy(struct device *d,
>+				    struct device_attribute *attr,
>+				    char *buf)
>+{
>+	struct bonding *bond = to_bond(d);
>+
>+	return sprintf(buf, "%d\n", bond->params.primary_lazy);
>+}
>+
>+static ssize_t bonding_store_primary_lazy(struct device *d,
>+				     struct device_attribute *attr,
>+				     const char *buf, size_t count)
>+{
>+	int new_value, ret = count;
>+	struct bonding *bond = to_bond(d);
>+
>+	if (!rtnl_trylock())
>+		return restart_syscall();
>+
>+	if (sscanf(buf, "%d", &new_value) != 1) {
>+		pr_err(DRV_NAME
>+		       ": %s: no primary_lazy value specified.\n",
>+		       bond->dev->name);
>+		ret = -EINVAL;
>+		goto out;
>+	}
>+	if ((new_value == 0) || (new_value == 1)) {
>+		bond->params.primary_lazy = new_value;
>+		pr_info(DRV_NAME ": %s: Setting primary_lazy to %d.\n",
>+		       bond->dev->name, new_value);
>+		if (!bond->params.primary_lazy) {
>+			bond->force_primary = true;
>+			read_lock(&bond->lock);
>+			write_lock_bh(&bond->curr_slave_lock);
>+			bond_select_active_slave(bond);
>+			write_unlock_bh(&bond->curr_slave_lock);
>+			read_unlock(&bond->lock);
>+		}
>+	} else {
>+		pr_info(DRV_NAME
>+		       ": %s: Ignoring invalid primary_lazy value %d.\n",
>+		       bond->dev->name, new_value);
>+	}
>+out:
>+	rtnl_unlock();
>+	return count;
>+}
>+static DEVICE_ATTR(primary_lazy, S_IRUGO | S_IWUSR,
>+		   bonding_show_primary_lazy, bonding_store_primary_lazy);
>+
>+/*
>  * Show and set the use_carrier flag.
>  */
> static ssize_t bonding_show_carrier(struct device *d,
>@@ -1497,6 +1550,7 @@ static struct attribute *per_bond_attrs[] = {
> 	&dev_attr_num_unsol_na.attr,
> 	&dev_attr_miimon.attr,
> 	&dev_attr_primary.attr,
>+	&dev_attr_primary_lazy.attr,
> 	&dev_attr_use_carrier.attr,
> 	&dev_attr_active_slave.attr,
> 	&dev_attr_mii_status.attr,
>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>index 6290a50..ac35c6c 100644
>--- a/drivers/net/bonding/bonding.h
>+++ b/drivers/net/bonding/bonding.h
>@@ -131,6 +131,7 @@ struct bond_params {
> 	int lacp_fast;
> 	int ad_select;
> 	char primary[IFNAMSIZ];
>+	int primary_lazy;
> 	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
> };
>
>@@ -190,6 +191,7 @@ struct bonding {
> 	struct   slave *curr_active_slave;
> 	struct   slave *current_arp_slave;
> 	struct   slave *primary_slave;
>+	bool     force_primary;
> 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
> 	rwlock_t lock;
> 	rwlock_t curr_slave_lock;

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas de Pesloüan Aug. 13, 2009, 7:41 p.m. UTC | #2
Jiri Pirko wrote:
> In some cases there is not desirable to switch back to primary interface when
> it's link recovers and rather stay wiith currently active one. We need to avoid
> packetloss as much as we can in some cases. This is solved by introducing
> primary_lazy option. Note that enslaved primary slave is set as current
> active no matter what.

May I suggest that instead of creating a new option to better define how
the "primary" option is expected to behave for active-backup mode, we 
try the "weight" slave  option I proposed in the thread "alternative to 
primary" earlier this year ?

http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel

Giving the same "weight" to two different slaves means "chose at random
on startup and keep the active one until it fails". And if the "at
random" behavior is not appropriate, one can force the active slave
using what Jay suggested  (/sys/class/net/bond0/bonding/active).

The proposed "weight" slave's option is able to prevent the slaves from
flip-flopping, by stating the fact that two slaves share the same 
"primary" level, and may provide several other enhancements as described 
in the thread.

Hence, it is a more general configuration interface than what you 
proposed. I must admit that despite the fact that I suggested this in 
april, I didn't posted any patch for it until now. Unfortunately, 
I'didn't had time for it and probably not the proper skills anyway :-).

	Nicolas.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Aug. 14, 2009, 10:52 a.m. UTC | #3
Thu, Aug 13, 2009 at 05:44:55PM CEST, fubar@us.ibm.com wrote:
>Jiri Pirko <jpirko@redhat.com> wrote:
>
>>In some cases there is not desirable to switch back to primary interface when
>>it's link recovers and rather stay wiith currently active one. We need to avoid
>>packetloss as much as we can in some cases. This is solved by introducing
>>primary_lazy option. Note that enslaved primary slave is set as current
>>active no matter what.
>
>	Are you just looking for a way to insure that, when the bond
>first comes up (is configured at boot, for example), a particular slave
>is the active slave?  In that case, why can't an explicit selection be
>made via either ifenslave -c or /sys/class/net/bond0/bonding/active ?

Indeed this can be done this way and I'm aware of it. I'm leaving aside that you
must put this somewhere into init scripts which brings downsides.

But imagine you have bond with 3 slaves:
eth0		eth1		eth2
UP(curr)	UP		UP
DOWN		UP(curr)	UP
UP		UP(curr)	UP
UP		DOWN		UP(curr)

eth2 ends up being current active but we prefer eth0 (as primary interface).
This is not desirable and is solved by primary_lazy option.

Jirka

>
>	-J
>
>>Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>>
>>diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
>>index d5181ce..f1b82af 100644
>>--- a/Documentation/networking/bonding.txt
>>+++ b/Documentation/networking/bonding.txt
>>@@ -614,6 +614,15 @@ primary
>>
>> 	The primary option is only valid for active-backup mode.
>>
>>+primary_lazy
>>+
>>+	Specifies the behaviour of the primary slave in case of
>>+	it's link recovery has been detected. By default (value 0) the
>>+	primary slave is set as active slave immediately after the link
>>+	recovery. If the value is 1 then current active slave doesn't
>>+	change as long as it's link status doesn't change. This prevents
>>+	the bonding device from flip-flopping.
>>+
>> updelay
>>
>> 	Specifies the time, in milliseconds, to wait before enabling a
>>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>>index 3bf0cc6..00fbb9d 100644
>>--- a/drivers/net/bonding/bond_main.c
>>+++ b/drivers/net/bonding/bond_main.c
>>@@ -94,6 +94,7 @@ static int downdelay;
>> static int use_carrier	= 1;
>> static char *mode;
>> static char *primary;
>>+static int primary_lazy;
>> static char *lacp_rate;
>> static char *ad_select;
>> static char *xmit_hash_policy;
>>@@ -126,6 +127,9 @@ MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
>> 		       "6 for balance-alb");
>> module_param(primary, charp, 0);
>> MODULE_PARM_DESC(primary, "Primary network device to use");
>>+module_param(primary_lazy, int, 0);
>>+MODULE_PARM_DESC(primary_lazy, "Do not set primary slave active once it comes up; "
>>+			       "0 for off (default), 1 for on");
>> module_param(lacp_rate, charp, 0);
>> MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
>> 			    "(slow/fast)");
>>@@ -1067,7 +1071,6 @@ out:
>>
>> }
>>
>>-
>> /**
>>  * find_best_interface - select the best available slave to be the active one
>>  * @bond: our bonding struct
>>@@ -1097,9 +1100,11 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
>> 	 * and primary_slave that may be up and able to arp
>> 	 */
>> 	if ((bond->primary_slave) &&
>>-	    (!bond->params.arp_interval) &&
>>-	    (IS_UP(bond->primary_slave->dev))) {
>>+	    (IS_UP(bond->primary_slave->dev)) &&
>>+	    (!(bond->params.primary_lazy && old_active &&
>>+	       (IS_UP(old_active->dev))) || bond->force_primary)) {
>> 		new_active = bond->primary_slave;
>>+		bond->force_primary = false;
>> 	}
>>
>> 	/* remember where to stop iterating over the slaves */
>>@@ -1674,8 +1679,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
>>
>> 	if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
>> 		/* if there is a primary slave, remember it */
>>-		if (strcmp(bond->params.primary, new_slave->dev->name) == 0)
>>+		if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
>> 			bond->primary_slave = new_slave;
>>+			bond->force_primary = true;
>>+		}
>> 	}
>>
>> 	write_lock_bh(&bond->curr_slave_lock);
>>@@ -2929,7 +2936,9 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
>> 	 */
>> 	if (bond->primary_slave &&
>> 	    (bond->primary_slave != bond->curr_active_slave) &&
>>-	    (bond->primary_slave->link == BOND_LINK_UP))
>>+	    (bond->primary_slave->link == BOND_LINK_UP) &&
>>+	    !(bond->params.primary_lazy && bond->curr_active_slave &&
>>+              bond->curr_active_slave->link == BOND_LINK_UP))
>> 		commit++;
>>
>> 	read_unlock(&bond->curr_slave_lock);
>>@@ -3035,7 +3044,9 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
>> 	 */
>> 	if (bond->primary_slave &&
>> 	    (bond->primary_slave != bond->curr_active_slave) &&
>>-	    (bond->primary_slave->link == BOND_LINK_UP)) {
>>+	    (bond->primary_slave->link == BOND_LINK_UP) &&
>>+	    !(bond->params.primary_lazy && bond->curr_active_slave &&
>>+              bond->curr_active_slave->link == BOND_LINK_UP)) {
>> 		write_lock_bh(&bond->curr_slave_lock);
>> 		bond_change_active_slave(bond, bond->primary_slave);
>> 		write_unlock_bh(&bond->curr_slave_lock);
>>@@ -4987,6 +4998,17 @@ static int bond_check_params(struct bond_params *params)
>> 		primary = NULL;
>> 	}
>>
>>+	if (primary) {
>>+		if ((primary_lazy != 0) && (primary_lazy != 1)) {
>>+			pr_warning(DRV_NAME
>>+				   ": Warning: primary_lazy module parameter "
>>+				   "(%d), not of valid value (0/1), so it was "
>>+				   "set to 0\n",
>>+				   primary_lazy);
>>+			primary_lazy = 1;
>>+		}
>>+	}
>>+
>> 	if (fail_over_mac) {
>> 		fail_over_mac_value = bond_parse_parm(fail_over_mac,
>> 						      fail_over_mac_tbl);
>>@@ -5018,6 +5040,7 @@ static int bond_check_params(struct bond_params *params)
>> 	params->use_carrier = use_carrier;
>> 	params->lacp_fast = lacp_fast;
>> 	params->primary[0] = 0;
>>+	params->primary_lazy = primary_lazy;
>> 	params->fail_over_mac = fail_over_mac_value;
>>
>> 	if (primary) {
>>diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
>>index 55bf34f..573fe82 100644
>>--- a/drivers/net/bonding/bond_sysfs.c
>>+++ b/drivers/net/bonding/bond_sysfs.c
>>@@ -1209,6 +1209,59 @@ static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
>> 		   bonding_show_primary, bonding_store_primary);
>>
>> /*
>>+ * Show and set the primary_lazy flag.
>>+ */
>>+static ssize_t bonding_show_primary_lazy(struct device *d,
>>+				    struct device_attribute *attr,
>>+				    char *buf)
>>+{
>>+	struct bonding *bond = to_bond(d);
>>+
>>+	return sprintf(buf, "%d\n", bond->params.primary_lazy);
>>+}
>>+
>>+static ssize_t bonding_store_primary_lazy(struct device *d,
>>+				     struct device_attribute *attr,
>>+				     const char *buf, size_t count)
>>+{
>>+	int new_value, ret = count;
>>+	struct bonding *bond = to_bond(d);
>>+
>>+	if (!rtnl_trylock())
>>+		return restart_syscall();
>>+
>>+	if (sscanf(buf, "%d", &new_value) != 1) {
>>+		pr_err(DRV_NAME
>>+		       ": %s: no primary_lazy value specified.\n",
>>+		       bond->dev->name);
>>+		ret = -EINVAL;
>>+		goto out;
>>+	}
>>+	if ((new_value == 0) || (new_value == 1)) {
>>+		bond->params.primary_lazy = new_value;
>>+		pr_info(DRV_NAME ": %s: Setting primary_lazy to %d.\n",
>>+		       bond->dev->name, new_value);
>>+		if (!bond->params.primary_lazy) {
>>+			bond->force_primary = true;
>>+			read_lock(&bond->lock);
>>+			write_lock_bh(&bond->curr_slave_lock);
>>+			bond_select_active_slave(bond);
>>+			write_unlock_bh(&bond->curr_slave_lock);
>>+			read_unlock(&bond->lock);
>>+		}
>>+	} else {
>>+		pr_info(DRV_NAME
>>+		       ": %s: Ignoring invalid primary_lazy value %d.\n",
>>+		       bond->dev->name, new_value);
>>+	}
>>+out:
>>+	rtnl_unlock();
>>+	return count;
>>+}
>>+static DEVICE_ATTR(primary_lazy, S_IRUGO | S_IWUSR,
>>+		   bonding_show_primary_lazy, bonding_store_primary_lazy);
>>+
>>+/*
>>  * Show and set the use_carrier flag.
>>  */
>> static ssize_t bonding_show_carrier(struct device *d,
>>@@ -1497,6 +1550,7 @@ static struct attribute *per_bond_attrs[] = {
>> 	&dev_attr_num_unsol_na.attr,
>> 	&dev_attr_miimon.attr,
>> 	&dev_attr_primary.attr,
>>+	&dev_attr_primary_lazy.attr,
>> 	&dev_attr_use_carrier.attr,
>> 	&dev_attr_active_slave.attr,
>> 	&dev_attr_mii_status.attr,
>>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>>index 6290a50..ac35c6c 100644
>>--- a/drivers/net/bonding/bonding.h
>>+++ b/drivers/net/bonding/bonding.h
>>@@ -131,6 +131,7 @@ struct bond_params {
>> 	int lacp_fast;
>> 	int ad_select;
>> 	char primary[IFNAMSIZ];
>>+	int primary_lazy;
>> 	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
>> };
>>
>>@@ -190,6 +191,7 @@ struct bonding {
>> 	struct   slave *curr_active_slave;
>> 	struct   slave *current_arp_slave;
>> 	struct   slave *primary_slave;
>>+	bool     force_primary;
>> 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
>> 	rwlock_t lock;
>> 	rwlock_t curr_slave_lock;
>
>---
>	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Aug. 14, 2009, 10:59 a.m. UTC | #4
Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote:
> Jiri Pirko wrote:
>> In some cases there is not desirable to switch back to primary interface when
>> it's link recovers and rather stay wiith currently active one. We need to avoid
>> packetloss as much as we can in some cases. This is solved by introducing
>> primary_lazy option. Note that enslaved primary slave is set as current
>> active no matter what.
>
> May I suggest that instead of creating a new option to better define how
> the "primary" option is expected to behave for active-backup mode, we  
> try the "weight" slave  option I proposed in the thread "alternative to  
> primary" earlier this year ?
>
> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel

This link does not work for me :(

>
> Giving the same "weight" to two different slaves means "chose at random
> on startup and keep the active one until it fails". And if the "at
> random" behavior is not appropriate, one can force the active slave
> using what Jay suggested  (/sys/class/net/bond0/bonding/active).
>
> The proposed "weight" slave's option is able to prevent the slaves from
> flip-flopping, by stating the fact that two slaves share the same  
> "primary" level, and may provide several other enhancements as described  
> in the thread.
>

Although I cannot reach the thread, this looks interesting. But I'm not sure it
has real benefits over primary_lazy option (and it doesn't solve initial curr
active slave setup)

Jirka

> Hence, it is a more general configuration interface than what you  
> proposed. I must admit that despite the fact that I suggested this in  
> april, I didn't posted any patch for it until now. Unfortunately,  
> I'didn't had time for it and probably not the proper skills anyway :-).
>
> 	Nicolas.
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas de Pesloüan Aug. 14, 2009, 4:27 p.m. UTC | #5
Jiri Pirko wrote:
> Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote:
>> Jiri Pirko wrote:
>>> In some cases there is not desirable to switch back to primary interface when
>>> it's link recovers and rather stay wiith currently active one. We need to avoid
>>> packetloss as much as we can in some cases. This is solved by introducing
>>> primary_lazy option. Note that enslaved primary slave is set as current
>>> active no matter what.
>> May I suggest that instead of creating a new option to better define how
>> the "primary" option is expected to behave for active-backup mode, we  
>> try the "weight" slave  option I proposed in the thread "alternative to  
>> primary" earlier this year ?
>>
>> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel
> 
> This link does not work for me :(

Nor for me... Sourceforge apparently decided to drop the bonding-devel 
list archive just now. 'hope the list archive will be back soon.

Originally, the proposed "weight" option for slaves was designed just to 
provide a way to better define which slave should become active when the 
active one just went down. As you know, the current "primary" option 
does not allow for a predictable selection of the new active slave when 
the primary loose connectivity. The new active slave is chosen "at 
random" between the remaining slaves.

After a short thread, involving Jay Vosburg and Andy Gospodarek, we end 
up with a general configuration interface, that provide a way to tune 
many things in slave management :

- Active slave selection in active/backup mode, even in the presence of 
more than two slaves.
- Active aggregator selection in 802.3ad mode.
- Load balancing tuning for most load balancing modes.

The sysfs interface would be /sys/class/net/eth0/bonding/weight. Writing 
a number there would give a "user supplied weight" to a slave. The speed 
and link state of the slave would give a "natural weight" for the slave. 
And the "effective weight" would be computed every time one of user 
supplied or natural weight change (upon speed or link state changes) and 
would be used everywhere we need a slave weight.

I suggest that :
- slave's natural weight = speed of the slave if link UP, else 0.
- slave's effective weight = slave's natural weight * slave's user 
supplied weight.
- aggregator's effective weight = sum of the effective weights of the 
slaves inside the aggregator.

For the active/backup mode, the exact behavior would be :

- When the active slave disappear, the new active slave is the one whose 
effective weight is the highest.
- When a slave comes back, it only becomes active if its effective 
weight is strictly higher than the one of the current active slave. 
(This stop the flip-flop risk you stated).
- To keep the old "primary" option, we simply give a very high user 
supplied weight to the primary slave. Jay suggested :
#define BOND_PRIMARY_PRIO 0x80000000
user_supplied_weight &= BOND_PRIMARY_PRIO /* to set the primary */
user_supplied_weight &= ~BOND_PRIMAY_PRIO  /* to clear the primary */

The same apply to aggregator : Every time a slave enter (link UP) or 
leave (link DOWN) an aggregator, the aggregator effective weight is 
recomputed. Then, if an aggregator exist with an strictly higher 
effective weight than the current active one, the new best aggregator 
becomes active.

For others modes, the weight might be used later to tune the load 
balancing logic in some way.

A default value of 1 for slave weight would cause slave speed to be used 
alone, hence the "natural weight".

>> Giving the same "weight" to two different slaves means "chose at random
>> on startup and keep the active one until it fails". And if the "at
>> random" behavior is not appropriate, one can force the active slave
>> using what Jay suggested  (/sys/class/net/bond0/bonding/active).
>>
>> The proposed "weight" slave's option is able to prevent the slaves from
>> flip-flopping, by stating the fact that two slaves share the same  
>> "primary" level, and may provide several other enhancements as described  
>> in the thread.
>>
> 
> Although I cannot reach the thread, this looks interesting. But I'm not sure it
> has real benefits over primary_lazy option (and it doesn't solve initial curr
> active slave setup)

You are right, it doesn't solve the initial active slave selection. But 
why would it be so important to properly select the initial active 
slave, if you feel comfortable with staying with a new active slave, 
after a failure and return of the original active slave ? This kind of 
failures may last for only a few seconds (just unplugging and plugging 
back the wire), and you configuration may then stay with the new active 
slave "forever". If "forever" is acceptable, may be "at startup" is 
acceptable too. :-)

 From my point of view (and Andy Gospodarek apparently agreed), the real 
benefits of the weight slave option is that is it more generic and allow 
for later usage in other modes, that we don't anticipate for now.

Quoted from a mail from Andy Gospodarek in the original thread :

"I really have no objection to that.  Adding this as a base part of
bonding for a few modes with known features would be a nice start.
I'm sure others will be kind enough to send suggestions or patches for
ways this could benefit other modes."

	Nicolas.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Aug. 17, 2009, 11:49 a.m. UTC | #6
Fri, Aug 14, 2009 at 06:27:03PM CEST, nicolas.2p.debian@free.fr wrote:
> Jiri Pirko wrote:
>> Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote:
>>> Jiri Pirko wrote:
>>>> In some cases there is not desirable to switch back to primary interface when
>>>> it's link recovers and rather stay wiith currently active one. We need to avoid
>>>> packetloss as much as we can in some cases. This is solved by introducing
>>>> primary_lazy option. Note that enslaved primary slave is set as current
>>>> active no matter what.
>>> May I suggest that instead of creating a new option to better define how
>>> the "primary" option is expected to behave for active-backup mode, we 
>>>  try the "weight" slave  option I proposed in the thread "alternative 
>>> to  primary" earlier this year ?
>>>
>>> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel
>>
>> This link does not work for me :(
>
> Nor for me... Sourceforge apparently decided to drop the bonding-devel  
> list archive just now. 'hope the list archive will be back soon.
>
> Originally, the proposed "weight" option for slaves was designed just to  
> provide a way to better define which slave should become active when the  
> active one just went down. As you know, the current "primary" option  
> does not allow for a predictable selection of the new active slave when  
> the primary loose connectivity. The new active slave is chosen "at  
> random" between the remaining slaves.
>
> After a short thread, involving Jay Vosburg and Andy Gospodarek, we end  
> up with a general configuration interface, that provide a way to tune  
> many things in slave management :
>
> - Active slave selection in active/backup mode, even in the presence of  
> more than two slaves.
> - Active aggregator selection in 802.3ad mode.
> - Load balancing tuning for most load balancing modes.
>
> The sysfs interface would be /sys/class/net/eth0/bonding/weight. Writing  
> a number there would give a "user supplied weight" to a slave. The speed  
> and link state of the slave would give a "natural weight" for the slave.  
> And the "effective weight" would be computed every time one of user  
> supplied or natural weight change (upon speed or link state changes) and  
> would be used everywhere we need a slave weight.
>
> I suggest that :
> - slave's natural weight = speed of the slave if link UP, else 0.
> - slave's effective weight = slave's natural weight * slave's user  
> supplied weight.
> - aggregator's effective weight = sum of the effective weights of the  
> slaves inside the aggregator.
>
> For the active/backup mode, the exact behavior would be :
>
> - When the active slave disappear, the new active slave is the one whose  
> effective weight is the highest.
> - When a slave comes back, it only becomes active if its effective  
> weight is strictly higher than the one of the current active slave.  
> (This stop the flip-flop risk you stated).
> - To keep the old "primary" option, we simply give a very high user  
> supplied weight to the primary slave. Jay suggested :
> #define BOND_PRIMARY_PRIO 0x80000000
> user_supplied_weight &= BOND_PRIMARY_PRIO /* to set the primary */
> user_supplied_weight &= ~BOND_PRIMAY_PRIO  /* to clear the primary */
>
> The same apply to aggregator : Every time a slave enter (link UP) or  
> leave (link DOWN) an aggregator, the aggregator effective weight is  
> recomputed. Then, if an aggregator exist with an strictly higher  
> effective weight than the current active one, the new best aggregator  
> becomes active.
>
> For others modes, the weight might be used later to tune the load  
> balancing logic in some way.
>
> A default value of 1 for slave weight would cause slave speed to be used  
> alone, hence the "natural weight".
>

I read your text and also the original list thread and I must say I see no
solution in this "weight" parameter for this issue. Because it's desired for one
link to stay active even if second come up, these 2 must have the same weight.
But imagine 3 links of the same weight. In that case you cannot insure that the
"primary one" will be chosen as active (see my picture in the reply to Jay's
post). Correct me if I'm wrong but for that what I want to fix by primary_lazy
option, your proposed weight option has no effect.

Therefor I still think the primary_lazy is the only solution now.

Jirka

>>> Giving the same "weight" to two different slaves means "chose at random
>>> on startup and keep the active one until it fails". And if the "at
>>> random" behavior is not appropriate, one can force the active slave
>>> using what Jay suggested  (/sys/class/net/bond0/bonding/active).
>>>
>>> The proposed "weight" slave's option is able to prevent the slaves from
>>> flip-flopping, by stating the fact that two slaves share the same   
>>> "primary" level, and may provide several other enhancements as 
>>> described  in the thread.
>>>
>>
>> Although I cannot reach the thread, this looks interesting. But I'm not sure it
>> has real benefits over primary_lazy option (and it doesn't solve initial curr
>> active slave setup)
>
> You are right, it doesn't solve the initial active slave selection. But  
> why would it be so important to properly select the initial active  
> slave, if you feel comfortable with staying with a new active slave,  
> after a failure and return of the original active slave ? This kind of  
> failures may last for only a few seconds (just unplugging and plugging  
> back the wire), and you configuration may then stay with the new active  
> slave "forever". If "forever" is acceptable, may be "at startup" is  
> acceptable too. :-)
>
> From my point of view (and Andy Gospodarek apparently agreed), the real  
> benefits of the weight slave option is that is it more generic and allow  
> for later usage in other modes, that we don't anticipate for now.
>
> Quoted from a mail from Andy Gospodarek in the original thread :
>
> "I really have no objection to that.  Adding this as a base part of
> bonding for a few modes with known features would be a nice start.
> I'm sure others will be kind enough to send suggestions or patches for
> ways this could benefit other modes."
>
> 	Nicolas.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas de Pesloüan Aug. 17, 2009, 8:55 p.m. UTC | #7
Jiri Pirko a écrit :
> Fri, Aug 14, 2009 at 06:27:03PM CEST, nicolas.2p.debian@free.fr wrote:
>> Jiri Pirko wrote:
>>> Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote:
>>>> Jiri Pirko wrote:
>>>>> In some cases there is not desirable to switch back to primary interface when
>>>>> it's link recovers and rather stay wiith currently active one. We need to avoid
>>>>> packetloss as much as we can in some cases. This is solved by introducing
>>>>> primary_lazy option. Note that enslaved primary slave is set as current
>>>>> active no matter what.
>>>> May I suggest that instead of creating a new option to better define how
>>>> the "primary" option is expected to behave for active-backup mode, we 
>>>>  try the "weight" slave  option I proposed in the thread "alternative 
>>>> to  primary" earlier this year ?
>>>>
>>>> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel
>>> This link does not work for me :(
>> Nor for me... Sourceforge apparently decided to drop the bonding-devel  
>> list archive just now. 'hope the list archive will be back soon.
>>
>> Originally, the proposed "weight" option for slaves was designed just to  
>> provide a way to better define which slave should become active when the  
>> active one just went down. As you know, the current "primary" option  
>> does not allow for a predictable selection of the new active slave when  
>> the primary loose connectivity. The new active slave is chosen "at  
>> random" between the remaining slaves.
>>
>> After a short thread, involving Jay Vosburg and Andy Gospodarek, we end  
>> up with a general configuration interface, that provide a way to tune  
>> many things in slave management :
>>
>> - Active slave selection in active/backup mode, even in the presence of  
>> more than two slaves.
>> - Active aggregator selection in 802.3ad mode.
>> - Load balancing tuning for most load balancing modes.
>>
>> The sysfs interface would be /sys/class/net/eth0/bonding/weight. Writing  
>> a number there would give a "user supplied weight" to a slave. The speed  
>> and link state of the slave would give a "natural weight" for the slave.  
>> And the "effective weight" would be computed every time one of user  
>> supplied or natural weight change (upon speed or link state changes) and  
>> would be used everywhere we need a slave weight.
>>
>> I suggest that :
>> - slave's natural weight = speed of the slave if link UP, else 0.
>> - slave's effective weight = slave's natural weight * slave's user  
>> supplied weight.
>> - aggregator's effective weight = sum of the effective weights of the  
>> slaves inside the aggregator.
>>
>> For the active/backup mode, the exact behavior would be :
>>
>> - When the active slave disappear, the new active slave is the one whose  
>> effective weight is the highest.
>> - When a slave comes back, it only becomes active if its effective  
>> weight is strictly higher than the one of the current active slave.  
>> (This stop the flip-flop risk you stated).
>> - To keep the old "primary" option, we simply give a very high user  
>> supplied weight to the primary slave. Jay suggested :
>> #define BOND_PRIMARY_PRIO 0x80000000
>> user_supplied_weight &= BOND_PRIMARY_PRIO /* to set the primary */
>> user_supplied_weight &= ~BOND_PRIMAY_PRIO  /* to clear the primary */
>>
>> The same apply to aggregator : Every time a slave enter (link UP) or  
>> leave (link DOWN) an aggregator, the aggregator effective weight is  
>> recomputed. Then, if an aggregator exist with an strictly higher  
>> effective weight than the current active one, the new best aggregator  
>> becomes active.
>>
>> For others modes, the weight might be used later to tune the load  
>> balancing logic in some way.
>>
>> A default value of 1 for slave weight would cause slave speed to be used  
>> alone, hence the "natural weight".
>>
> 
> I read your text and also the original list thread and I must say I see no
> solution in this "weight" parameter for this issue. Because it's desired for one
> link to stay active even if second come up, these 2 must have the same weight.
> But imagine 3 links of the same weight. In that case you cannot insure that the
> "primary one" will be chosen as active (see my picture in the reply to Jay's
> post). Correct me if I'm wrong but for that what I want to fix by primary_lazy
> option, your proposed weight option has no effect.
> 
> Therefor I still think the primary_lazy is the only solution now.
> 
> Jirka

Hi Jirka,

 From your previous posts (first one and reply to Jay), I understand 
that your want to achieve  the following behavior :

eth0 is primary and active.
eth1 is allowed to be active is eth0 is down.
Also, eth1 should stay active, even if eth0 comes back up.
Switch active to eth0 if eth1 eventually fall down.
Switch active to eth2 only if both eth0 and eth1 are down.

eth0		eth1		eth2
UP(curr)	UP		UP
DOWN		UP(curr)	UP
UP		UP(curr)	UP
UP(curr)	DOWN		UP
DOWN		DOWN		UP(curr)

Using weight, the following setup should give this result :

echo 1000 > /sys/class/net/eth0/bonding/weight
echo 1000 > /sys/class/net/eth1/bonding/weight
echo 1 > /sys/class/net/eth2/bonding/weight
echo eth0 > /sys/class/net/bond0/bonding/active_slave

I hope this is clear now.

	Nicolas.

> 
>>>> Giving the same "weight" to two different slaves means "chose at random
>>>> on startup and keep the active one until it fails". And if the "at
>>>> random" behavior is not appropriate, one can force the active slave
>>>> using what Jay suggested  (/sys/class/net/bond0/bonding/active).
>>>>
>>>> The proposed "weight" slave's option is able to prevent the slaves from
>>>> flip-flopping, by stating the fact that two slaves share the same   
>>>> "primary" level, and may provide several other enhancements as 
>>>> described  in the thread.
>>>>
>>> Although I cannot reach the thread, this looks interesting. But I'm not sure it
>>> has real benefits over primary_lazy option (and it doesn't solve initial curr
>>> active slave setup)
>> You are right, it doesn't solve the initial active slave selection. But  
>> why would it be so important to properly select the initial active  
>> slave, if you feel comfortable with staying with a new active slave,  
>> after a failure and return of the original active slave ? This kind of  
>> failures may last for only a few seconds (just unplugging and plugging  
>> back the wire), and you configuration may then stay with the new active  
>> slave "forever". If "forever" is acceptable, may be "at startup" is  
>> acceptable too. :-)
>>
>> From my point of view (and Andy Gospodarek apparently agreed), the real  
>> benefits of the weight slave option is that is it more generic and allow  
>> for later usage in other modes, that we don't anticipate for now.
>>
>> Quoted from a mail from Andy Gospodarek in the original thread :
>>
>> "I really have no objection to that.  Adding this as a base part of
>> bonding for a few modes with known features would be a nice start.
>> I'm sure others will be kind enough to send suggestions or patches for
>> ways this could benefit other modes."
>>
>> 	Nicolas.
> 


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Aug. 18, 2009, 12:45 p.m. UTC | #8
Mon, Aug 17, 2009 at 10:55:13PM CEST, nicolas.2p.debian@free.fr wrote:
> Jiri Pirko a écrit :
>> Fri, Aug 14, 2009 at 06:27:03PM CEST, nicolas.2p.debian@free.fr wrote:
>>> Jiri Pirko wrote:
>>>> Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote:
>>>>> Jiri Pirko wrote:
>>>>>> In some cases there is not desirable to switch back to primary interface when
>>>>>> it's link recovers and rather stay wiith currently active one. We need to avoid
>>>>>> packetloss as much as we can in some cases. This is solved by introducing
>>>>>> primary_lazy option. Note that enslaved primary slave is set as current
>>>>>> active no matter what.
>>>>> May I suggest that instead of creating a new option to better define how
>>>>> the "primary" option is expected to behave for active-backup 
>>>>> mode, we  try the "weight" slave  option I proposed in the thread 
>>>>> "alternative to  primary" earlier this year ?
>>>>>
>>>>> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel
>>>> This link does not work for me :(
>>> Nor for me... Sourceforge apparently decided to drop the 
>>> bonding-devel  list archive just now. 'hope the list archive will be 
>>> back soon.
>>>
>>> Originally, the proposed "weight" option for slaves was designed just 
>>> to  provide a way to better define which slave should become active 
>>> when the  active one just went down. As you know, the current 
>>> "primary" option  does not allow for a predictable selection of the 
>>> new active slave when  the primary loose connectivity. The new active 
>>> slave is chosen "at  random" between the remaining slaves.
>>>
>>> After a short thread, involving Jay Vosburg and Andy Gospodarek, we 
>>> end  up with a general configuration interface, that provide a way to 
>>> tune  many things in slave management :
>>>
>>> - Active slave selection in active/backup mode, even in the presence 
>>> of  more than two slaves.
>>> - Active aggregator selection in 802.3ad mode.
>>> - Load balancing tuning for most load balancing modes.
>>>
>>> The sysfs interface would be /sys/class/net/eth0/bonding/weight. 
>>> Writing  a number there would give a "user supplied weight" to a 
>>> slave. The speed  and link state of the slave would give a "natural 
>>> weight" for the slave.  And the "effective weight" would be computed 
>>> every time one of user  supplied or natural weight change (upon speed 
>>> or link state changes) and  would be used everywhere we need a slave 
>>> weight.
>>>
>>> I suggest that :
>>> - slave's natural weight = speed of the slave if link UP, else 0.
>>> - slave's effective weight = slave's natural weight * slave's user   
>>> supplied weight.
>>> - aggregator's effective weight = sum of the effective weights of the 
>>>  slaves inside the aggregator.
>>>
>>> For the active/backup mode, the exact behavior would be :
>>>
>>> - When the active slave disappear, the new active slave is the one 
>>> whose  effective weight is the highest.
>>> - When a slave comes back, it only becomes active if its effective   
>>> weight is strictly higher than the one of the current active slave.   
>>> (This stop the flip-flop risk you stated).
>>> - To keep the old "primary" option, we simply give a very high user   
>>> supplied weight to the primary slave. Jay suggested :
>>> #define BOND_PRIMARY_PRIO 0x80000000
>>> user_supplied_weight &= BOND_PRIMARY_PRIO /* to set the primary */
>>> user_supplied_weight &= ~BOND_PRIMAY_PRIO  /* to clear the primary */
>>>
>>> The same apply to aggregator : Every time a slave enter (link UP) or  
>>> leave (link DOWN) an aggregator, the aggregator effective weight is   
>>> recomputed. Then, if an aggregator exist with an strictly higher   
>>> effective weight than the current active one, the new best aggregator 
>>>  becomes active.
>>>
>>> For others modes, the weight might be used later to tune the load   
>>> balancing logic in some way.
>>>
>>> A default value of 1 for slave weight would cause slave speed to be 
>>> used  alone, hence the "natural weight".
>>>
>>
>> I read your text and also the original list thread and I must say I see no
>> solution in this "weight" parameter for this issue. Because it's desired for one
>> link to stay active even if second come up, these 2 must have the same weight.
>> But imagine 3 links of the same weight. In that case you cannot insure that the
>> "primary one" will be chosen as active (see my picture in the reply to Jay's
>> post). Correct me if I'm wrong but for that what I want to fix by primary_lazy
>> option, your proposed weight option has no effect.
>>
>> Therefor I still think the primary_lazy is the only solution now.
>>
>> Jirka
>
> Hi Jirka,
>
> From your previous posts (first one and reply to Jay), I understand that 
> your want to achieve  the following behavior :
>
> eth0 is primary and active.
> eth1 is allowed to be active is eth0 is down.
> Also, eth1 should stay active, even if eth0 comes back up.
> Switch active to eth0 if eth1 eventually fall down.
> Switch active to eth2 only if both eth0 and eth1 are down.
>
> eth0		eth1		eth2
> UP(curr)	UP		UP
> DOWN		UP(curr)	UP
> UP		UP(curr)	UP
> UP(curr)	DOWN		UP
> DOWN		DOWN		UP(curr)
>
> Using weight, the following setup should give this result :
>
> echo 1000 > /sys/class/net/eth0/bonding/weight
> echo 1000 > /sys/class/net/eth1/bonding/weight
> echo 1 > /sys/class/net/eth2/bonding/weight
> echo eth0 > /sys/class/net/bond0/bonding/active_slave
>
> I hope this is clear now.

Hmm... I ment the eth1 and eth2 to be the equivalent...
If eth1 is down (let's say for good) and eth0 comes down, eth2 is
selected as current active. But when eth0 comes up then eth0 is selected. That
is not desired.


>
> 	Nicolas.
>
>>
>>>>> Giving the same "weight" to two different slaves means "chose at random
>>>>> on startup and keep the active one until it fails". And if the "at
>>>>> random" behavior is not appropriate, one can force the active slave
>>>>> using what Jay suggested  (/sys/class/net/bond0/bonding/active).
>>>>>
>>>>> The proposed "weight" slave's option is able to prevent the slaves from
>>>>> flip-flopping, by stating the fact that two slaves share the same 
>>>>>   "primary" level, and may provide several other enhancements as  
>>>>> described  in the thread.
>>>>>
>>>> Although I cannot reach the thread, this looks interesting. But I'm not sure it
>>>> has real benefits over primary_lazy option (and it doesn't solve initial curr
>>>> active slave setup)
>>> You are right, it doesn't solve the initial active slave selection. 
>>> But  why would it be so important to properly select the initial 
>>> active  slave, if you feel comfortable with staying with a new active 
>>> slave,  after a failure and return of the original active slave ? 
>>> This kind of  failures may last for only a few seconds (just 
>>> unplugging and plugging  back the wire), and you configuration may 
>>> then stay with the new active  slave "forever". If "forever" is 
>>> acceptable, may be "at startup" is  acceptable too. :-)
>>>
>>> From my point of view (and Andy Gospodarek apparently agreed), the 
>>> real  benefits of the weight slave option is that is it more generic 
>>> and allow  for later usage in other modes, that we don't anticipate 
>>> for now.
>>>
>>> Quoted from a mail from Andy Gospodarek in the original thread :
>>>
>>> "I really have no objection to that.  Adding this as a base part of
>>> bonding for a few modes with known features would be a nice start.
>>> I'm sure others will be kind enough to send suggestions or patches for
>>> ways this could benefit other modes."
>>>
>>> 	Nicolas.
>>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas de Pesloüan Aug. 20, 2009, 12:40 p.m. UTC | #9
Jiri Pirko awrote:
> Mon, Aug 17, 2009 at 10:55:13PM CEST, nicolas.2p.debian@free.fr wrote:
>> Jiri Pirko wrote:
>>> Fri, Aug 14, 2009 at 06:27:03PM CEST, nicolas.2p.debian@free.fr wrote:
>>>> Jiri Pirko wrote:
>>>>> Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote:
>>>>>> Jiri Pirko wrote:
>>>>>>> In some cases there is not desirable to switch back to primary interface when
>>>>>>> it's link recovers and rather stay wiith currently active one. We need to avoid
>>>>>>> packetloss as much as we can in some cases. This is solved by introducing
>>>>>>> primary_lazy option. Note that enslaved primary slave is set as current
>>>>>>> active no matter what.
>>>>>> May I suggest that instead of creating a new option to better define how
>>>>>> the "primary" option is expected to behave for active-backup 
>>>>>> mode, we  try the "weight" slave  option I proposed in the thread 
>>>>>> "alternative to  primary" earlier this year ?
>>>>>>
>>>>>> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel
>>>>> This link does not work for me :(
>>>> Nor for me... Sourceforge apparently decided to drop the 
>>>> bonding-devel  list archive just now. 'hope the list archive will be 
>>>> back soon.
>>>>
>>>> Originally, the proposed "weight" option for slaves was designed just 
>>>> to  provide a way to better define which slave should become active 
>>>> when the  active one just went down. As you know, the current 
>>>> "primary" option  does not allow for a predictable selection of the 
>>>> new active slave when  the primary loose connectivity. The new active 
>>>> slave is chosen "at  random" between the remaining slaves.
>>>>
>>>> After a short thread, involving Jay Vosburg and Andy Gospodarek, we 
>>>> end  up with a general configuration interface, that provide a way to 
>>>> tune  many things in slave management :
>>>>
>>>> - Active slave selection in active/backup mode, even in the presence 
>>>> of  more than two slaves.
>>>> - Active aggregator selection in 802.3ad mode.
>>>> - Load balancing tuning for most load balancing modes.
>>>>
>>>> The sysfs interface would be /sys/class/net/eth0/bonding/weight. 
>>>> Writing  a number there would give a "user supplied weight" to a 
>>>> slave. The speed  and link state of the slave would give a "natural 
>>>> weight" for the slave.  And the "effective weight" would be computed 
>>>> every time one of user  supplied or natural weight change (upon speed 
>>>> or link state changes) and  would be used everywhere we need a slave 
>>>> weight.
>>>>
>>>> I suggest that :
>>>> - slave's natural weight = speed of the slave if link UP, else 0.
>>>> - slave's effective weight = slave's natural weight * slave's user   
>>>> supplied weight.
>>>> - aggregator's effective weight = sum of the effective weights of the 
>>>>  slaves inside the aggregator.
>>>>
>>>> For the active/backup mode, the exact behavior would be :
>>>>
>>>> - When the active slave disappear, the new active slave is the one 
>>>> whose  effective weight is the highest.
>>>> - When a slave comes back, it only becomes active if its effective   
>>>> weight is strictly higher than the one of the current active slave.   
>>>> (This stop the flip-flop risk you stated).
>>>> - To keep the old "primary" option, we simply give a very high user   
>>>> supplied weight to the primary slave. Jay suggested :
>>>> #define BOND_PRIMARY_PRIO 0x80000000
>>>> user_supplied_weight &= BOND_PRIMARY_PRIO /* to set the primary */
>>>> user_supplied_weight &= ~BOND_PRIMAY_PRIO  /* to clear the primary */
>>>>
>>>> The same apply to aggregator : Every time a slave enter (link UP) or  
>>>> leave (link DOWN) an aggregator, the aggregator effective weight is   
>>>> recomputed. Then, if an aggregator exist with an strictly higher   
>>>> effective weight than the current active one, the new best aggregator 
>>>>  becomes active.
>>>>
>>>> For others modes, the weight might be used later to tune the load   
>>>> balancing logic in some way.
>>>>
>>>> A default value of 1 for slave weight would cause slave speed to be 
>>>> used  alone, hence the "natural weight".
>>>>
>>> I read your text and also the original list thread and I must say I see no
>>> solution in this "weight" parameter for this issue. Because it's desired for one
>>> link to stay active even if second come up, these 2 must have the same weight.
>>> But imagine 3 links of the same weight. In that case you cannot insure that the
>>> "primary one" will be chosen as active (see my picture in the reply to Jay's
>>> post). Correct me if I'm wrong but for that what I want to fix by primary_lazy
>>> option, your proposed weight option has no effect.
>>>
>>> Therefor I still think the primary_lazy is the only solution now.
>>>
>>> Jirka
>> Hi Jirka,
>>
>> From your previous posts (first one and reply to Jay), I understand that 
>> your want to achieve  the following behavior :
>>
>> eth0 is primary and active.
>> eth1 is allowed to be active is eth0 is down.
>> Also, eth1 should stay active, even if eth0 comes back up.
>> Switch active to eth0 if eth1 eventually fall down.
>> Switch active to eth2 only if both eth0 and eth1 are down.
>>
>> eth0		eth1		eth2
>> UP(curr)	UP		UP
>> DOWN		UP(curr)	UP
>> UP		UP(curr)	UP
>> UP(curr)	DOWN		UP
>> DOWN		DOWN		UP(curr)
>>
>> Using weight, the following setup should give this result :
>>
>> echo 1000 > /sys/class/net/eth0/bonding/weight
>> echo 1000 > /sys/class/net/eth1/bonding/weight
>> echo 1 > /sys/class/net/eth2/bonding/weight
>> echo eth0 > /sys/class/net/bond0/bonding/active_slave
>>
>> I hope this is clear now.
> 
> Hmm... I ment the eth1 and eth2 to be the equivalent...
> If eth1 is down (let's say for good) and eth0 comes down, eth2 is
> selected as current active. But when eth0 comes up then eth0 is selected. That
> is not desired.

OK, now I think I really understand your exact requirement.

You want the ability to keep the current active slave active, even if a
better slave comes back up, so the only reason for the active slave to
change would be that the current active slave falls down:

eth0		eth1		eth2
UP(curr)	UP		UP
DOWN		UP(curr)	UP
UP		UP(curr)	UP
UP(curr)	DOWN		UP
DOWN		DOWN		UP(curr)
UP		DOWN		UP(curr)  <-

But at the same time, you still need the ability to properly select the
best new active slave when the current one falls down, hence your answer
in reply to Jay's proposal:

	> But imagine you have bond with 3 slaves:
	> eth0		eth1		eth2
	> UP(curr)	UP		UP
	> DOWN		UP(curr)	UP
	> UP		UP(curr)	UP
	> UP		DOWN		UP(curr)

	> eth2 ends up being current active but we prefer eth0 (as
	> primary interface).
	> This is not desirable and is solved by primary_lazy option.

I think your proposed "primary_lazy" option suffer some limits and
should not be a per bond option but a per slave option.

You are right that some slave should be able to be "sticky" when active,
in order to reduce packets loose when switching. But due to performance
reason, it might be desirable to say that some other slaves are not
"sticky" when active, in the same configuration.

Let's imagine the following configuration :

eth0: 1 Gb/s - primary
eth1: 1 Gb/s
eth2: 100 Mb/s

With "primary_lazy=1, eth2 has a chance to stay active, after eth0
and eth1 both failed at the same time. The risk of loosing a few packets
while switching back from eth2 to eth0 or eth1 might be seen acceptable,
compared to sticking to a 100 Mb/s interface when a 1 Gb/s interface
is available.

Due to eth2 speed, one might want to have the following behavior:

If eth1 is active, keep it active, even if eth0 comes back up. But if
eth2 is active, switch to any better slave right at the time one comes
back up.

I suggest that instead of having a per bond "primary_lazy" option, we
define a per slave option, describing whether this particular slave is
"sticky when active" or not.

The above setup would become :

echo 1 > /sys/class/net/eth0/bonding/sticky_active
echo 1 > /sys/class/net/eth1/bonding/sticky_active
echo 0 > /sys/class/net/eth2/bonding/sticky_active
echo eth0 > /sys/class/net/bond0/bonding/primary

Or may be better, keeping the "weight" idea in mind, a per slave option
"active_weight" that gives the weight of the slave, *when active*.

The effective weight of a slave would become :

effective_slave =
(is_active ? user_supplied_active_weight ? user_supplied_weight) *
natural_weight

# Prefer eth0, then one of eth1 or eth2, then eth3.
echo 1000 > /sys/class/net/eth0/bonding/weight
echo 999 > /sys/class/net/eth1/bonding/weight
echo 999 > /sys/class/net/eth2/bonding/weight
echo 10 > /sys/class/net/eth3/bonding/weight

# Do not switch back to primary eth0 if eth1 or eth2 is active.
echo 1000 > /sys/class/net/eth1/bonding/active_weight
echo 1000 > /sys/class/net/eth2/bonding/active_weight

Every time one changes the user_supplied_weight, then
user_supplied_active_weight must be reset to the same value. This way, 
if no special setup is done on active_weight, then the current normal
behavior is achieved.

If none of those options seem acceptable to you, I suggest a third one:

You keep primary_lazy, but with the following values :

# Switch back to primary slaves when it comes back.
echo 0 > /sys/class/net/bond0/bonding/primary_lazy

# Switch back to primary when it comes back, only if the speed of the
# primary slave is higher than the speed of the current active slave.
echo 1 > /sys/class/net/bond0/bonding/primary_lazy

# Stick to the current active slave when the primary slave comes back,
# even if the primary slave speed is higher than the speed of the
# current active slave.
echo 2 > /sys/class/net/bond0/bonding/primary_lazy

You can consider the value as being the level of laziness of the primary.

	Nicolas.

>>>>>> Giving the same "weight" to two different slaves means "chose at random
>>>>>> on startup and keep the active one until it fails". And if the "at
>>>>>> random" behavior is not appropriate, one can force the active slave
>>>>>> using what Jay suggested  (/sys/class/net/bond0/bonding/active).
>>>>>>
>>>>>> The proposed "weight" slave's option is able to prevent the slaves from
>>>>>> flip-flopping, by stating the fact that two slaves share the same 
>>>>>>   "primary" level, and may provide several other enhancements as  
>>>>>> described  in the thread.
>>>>>>
>>>>> Although I cannot reach the thread, this looks interesting. But I'm not sure it
>>>>> has real benefits over primary_lazy option (and it doesn't solve initial curr
>>>>> active slave setup)
>>>> You are right, it doesn't solve the initial active slave selection. 
>>>> But  why would it be so important to properly select the initial 
>>>> active  slave, if you feel comfortable with staying with a new active 
>>>> slave,  after a failure and return of the original active slave ? 
>>>> This kind of  failures may last for only a few seconds (just 
>>>> unplugging and plugging  back the wire), and you configuration may 
>>>> then stay with the new active  slave "forever". If "forever" is 
>>>> acceptable, may be "at startup" is  acceptable too. :-)
>>>>
>>>> From my point of view (and Andy Gospodarek apparently agreed), the 
>>>> real  benefits of the weight slave option is that is it more generic 
>>>> and allow  for later usage in other modes, that we don't anticipate 
>>>> for now.
>>>>
>>>> Quoted from a mail from Andy Gospodarek in the original thread :
>>>>
>>>> "I really have no objection to that.  Adding this as a base part of
>>>> bonding for a few modes with known features would be a nice start.
>>>> I'm sure others will be kind enough to send suggestions or patches for
>>>> ways this could benefit other modes."



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Aug. 24, 2009, 11:16 a.m. UTC | #10
Thu, Aug 20, 2009 at 02:40:07PM CEST, nicolas.2p.debian@free.fr wrote:
> Jiri Pirko awrote:
>> Mon, Aug 17, 2009 at 10:55:13PM CEST, nicolas.2p.debian@free.fr wrote:
>>> Jiri Pirko wrote:
>>>> Fri, Aug 14, 2009 at 06:27:03PM CEST, nicolas.2p.debian@free.fr wrote:
>>>>> Jiri Pirko wrote:
>>>>>> Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote:
>>>>>>> Jiri Pirko wrote:
>>>>>>>> In some cases there is not desirable to switch back to primary interface when
>>>>>>>> it's link recovers and rather stay wiith currently active one. We need to avoid
>>>>>>>> packetloss as much as we can in some cases. This is solved by introducing
>>>>>>>> primary_lazy option. Note that enslaved primary slave is set as current
>>>>>>>> active no matter what.
>>>>>>> May I suggest that instead of creating a new option to better define how
>>>>>>> the "primary" option is expected to behave for active-backup  
>>>>>>> mode, we  try the "weight" slave  option I proposed in the 
>>>>>>> thread "alternative to  primary" earlier this year ?
>>>>>>>
>>>>>>> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel
>>>>>> This link does not work for me :(
>>>>> Nor for me... Sourceforge apparently decided to drop the  
>>>>> bonding-devel  list archive just now. 'hope the list archive will 
>>>>> be back soon.
>>>>>
>>>>> Originally, the proposed "weight" option for slaves was designed 
>>>>> just to  provide a way to better define which slave should become 
>>>>> active when the  active one just went down. As you know, the 
>>>>> current "primary" option  does not allow for a predictable 
>>>>> selection of the new active slave when  the primary loose 
>>>>> connectivity. The new active slave is chosen "at  random" between 
>>>>> the remaining slaves.
>>>>>
>>>>> After a short thread, involving Jay Vosburg and Andy Gospodarek, 
>>>>> we end  up with a general configuration interface, that provide a 
>>>>> way to tune  many things in slave management :
>>>>>
>>>>> - Active slave selection in active/backup mode, even in the 
>>>>> presence of  more than two slaves.
>>>>> - Active aggregator selection in 802.3ad mode.
>>>>> - Load balancing tuning for most load balancing modes.
>>>>>
>>>>> The sysfs interface would be /sys/class/net/eth0/bonding/weight.  
>>>>> Writing  a number there would give a "user supplied weight" to a  
>>>>> slave. The speed  and link state of the slave would give a 
>>>>> "natural weight" for the slave.  And the "effective weight" would 
>>>>> be computed every time one of user  supplied or natural weight 
>>>>> change (upon speed or link state changes) and  would be used 
>>>>> everywhere we need a slave weight.
>>>>>
>>>>> I suggest that :
>>>>> - slave's natural weight = speed of the slave if link UP, else 0.
>>>>> - slave's effective weight = slave's natural weight * slave's 
>>>>> user   supplied weight.
>>>>> - aggregator's effective weight = sum of the effective weights of 
>>>>> the  slaves inside the aggregator.
>>>>>
>>>>> For the active/backup mode, the exact behavior would be :
>>>>>
>>>>> - When the active slave disappear, the new active slave is the 
>>>>> one whose  effective weight is the highest.
>>>>> - When a slave comes back, it only becomes active if its 
>>>>> effective   weight is strictly higher than the one of the current 
>>>>> active slave.   (This stop the flip-flop risk you stated).
>>>>> - To keep the old "primary" option, we simply give a very high 
>>>>> user   supplied weight to the primary slave. Jay suggested :
>>>>> #define BOND_PRIMARY_PRIO 0x80000000
>>>>> user_supplied_weight &= BOND_PRIMARY_PRIO /* to set the primary */
>>>>> user_supplied_weight &= ~BOND_PRIMAY_PRIO  /* to clear the primary */
>>>>>
>>>>> The same apply to aggregator : Every time a slave enter (link UP) 
>>>>> or  leave (link DOWN) an aggregator, the aggregator effective 
>>>>> weight is   recomputed. Then, if an aggregator exist with an 
>>>>> strictly higher   effective weight than the current active one, 
>>>>> the new best aggregator  becomes active.
>>>>>
>>>>> For others modes, the weight might be used later to tune the load 
>>>>>   balancing logic in some way.
>>>>>
>>>>> A default value of 1 for slave weight would cause slave speed to 
>>>>> be used  alone, hence the "natural weight".
>>>>>
>>>> I read your text and also the original list thread and I must say I see no
>>>> solution in this "weight" parameter for this issue. Because it's desired for one
>>>> link to stay active even if second come up, these 2 must have the same weight.
>>>> But imagine 3 links of the same weight. In that case you cannot insure that the
>>>> "primary one" will be chosen as active (see my picture in the reply to Jay's
>>>> post). Correct me if I'm wrong but for that what I want to fix by primary_lazy
>>>> option, your proposed weight option has no effect.
>>>>
>>>> Therefor I still think the primary_lazy is the only solution now.
>>>>
>>>> Jirka
>>> Hi Jirka,
>>>
>>> From your previous posts (first one and reply to Jay), I understand 
>>> that your want to achieve  the following behavior :
>>>
>>> eth0 is primary and active.
>>> eth1 is allowed to be active is eth0 is down.
>>> Also, eth1 should stay active, even if eth0 comes back up.
>>> Switch active to eth0 if eth1 eventually fall down.
>>> Switch active to eth2 only if both eth0 and eth1 are down.
>>>
>>> eth0		eth1		eth2
>>> UP(curr)	UP		UP
>>> DOWN		UP(curr)	UP
>>> UP		UP(curr)	UP
>>> UP(curr)	DOWN		UP
>>> DOWN		DOWN		UP(curr)
>>>
>>> Using weight, the following setup should give this result :
>>>
>>> echo 1000 > /sys/class/net/eth0/bonding/weight
>>> echo 1000 > /sys/class/net/eth1/bonding/weight
>>> echo 1 > /sys/class/net/eth2/bonding/weight
>>> echo eth0 > /sys/class/net/bond0/bonding/active_slave
>>>
>>> I hope this is clear now.
>>
>> Hmm... I ment the eth1 and eth2 to be the equivalent...
>> If eth1 is down (let's say for good) and eth0 comes down, eth2 is
>> selected as current active. But when eth0 comes up then eth0 is selected. That
>> is not desired.
>
> OK, now I think I really understand your exact requirement.
>
> You want the ability to keep the current active slave active, even if a
> better slave comes back up, so the only reason for the active slave to
> change would be that the current active slave falls down:
>
> eth0		eth1		eth2
> UP(curr)	UP		UP
> DOWN		UP(curr)	UP
> UP		UP(curr)	UP
> UP(curr)	DOWN		UP
> DOWN		DOWN		UP(curr)
> UP		DOWN		UP(curr)  <-
>
> But at the same time, you still need the ability to properly select the
> best new active slave when the current one falls down, hence your answer
> in reply to Jay's proposal:
>
> 	> But imagine you have bond with 3 slaves:
> 	> eth0		eth1		eth2
> 	> UP(curr)	UP		UP
> 	> DOWN		UP(curr)	UP
> 	> UP		UP(curr)	UP
> 	> UP		DOWN		UP(curr)
>
> 	> eth2 ends up being current active but we prefer eth0 (as
> 	> primary interface).
> 	> This is not desirable and is solved by primary_lazy option.
>
> I think your proposed "primary_lazy" option suffer some limits and
> should not be a per bond option but a per slave option.
>
> You are right that some slave should be able to be "sticky" when active,
> in order to reduce packets loose when switching. But due to performance
> reason, it might be desirable to say that some other slaves are not
> "sticky" when active, in the same configuration.
>
> Let's imagine the following configuration :
>
> eth0: 1 Gb/s - primary
> eth1: 1 Gb/s
> eth2: 100 Mb/s
>
> With "primary_lazy=1, eth2 has a chance to stay active, after eth0
> and eth1 both failed at the same time. The risk of loosing a few packets
> while switching back from eth2 to eth0 or eth1 might be seen acceptable,
> compared to sticking to a 100 Mb/s interface when a 1 Gb/s interface
> is available.
>
> Due to eth2 speed, one might want to have the following behavior:
>
> If eth1 is active, keep it active, even if eth0 comes back up. But if
> eth2 is active, switch to any better slave right at the time one comes
> back up.
>
> I suggest that instead of having a per bond "primary_lazy" option, we
> define a per slave option, describing whether this particular slave is
> "sticky when active" or not.
>
> The above setup would become :
>
> echo 1 > /sys/class/net/eth0/bonding/sticky_active
> echo 1 > /sys/class/net/eth1/bonding/sticky_active
> echo 0 > /sys/class/net/eth2/bonding/sticky_active
> echo eth0 > /sys/class/net/bond0/bonding/primary
>
> Or may be better, keeping the "weight" idea in mind, a per slave option
> "active_weight" that gives the weight of the slave, *when active*.
>
> The effective weight of a slave would become :
>
> effective_slave =
> (is_active ? user_supplied_active_weight ? user_supplied_weight) *
> natural_weight
>
> # Prefer eth0, then one of eth1 or eth2, then eth3.
> echo 1000 > /sys/class/net/eth0/bonding/weight
> echo 999 > /sys/class/net/eth1/bonding/weight
> echo 999 > /sys/class/net/eth2/bonding/weight
> echo 10 > /sys/class/net/eth3/bonding/weight
>
> # Do not switch back to primary eth0 if eth1 or eth2 is active.
> echo 1000 > /sys/class/net/eth1/bonding/active_weight
> echo 1000 > /sys/class/net/eth2/bonding/active_weight
>
> Every time one changes the user_supplied_weight, then
> user_supplied_active_weight must be reset to the same value. This way,  
> if no special setup is done on active_weight, then the current normal
> behavior is achieved.

I must say I like this approach. But it would be not trivial to implement this.
Therefore I would stick with your propose of extending primary lazy to 3 values
until the weight option is implemented.

I'm going to implement your propose below.

>
> If none of those options seem acceptable to you, I suggest a third one:
>
> You keep primary_lazy, but with the following values :
>
> # Switch back to primary slaves when it comes back.
> echo 0 > /sys/class/net/bond0/bonding/primary_lazy
>
> # Switch back to primary when it comes back, only if the speed of the
> # primary slave is higher than the speed of the current active slave.
> echo 1 > /sys/class/net/bond0/bonding/primary_lazy
>
> # Stick to the current active slave when the primary slave comes back,
> # even if the primary slave speed is higher than the speed of the
> # current active slave.
> echo 2 > /sys/class/net/bond0/bonding/primary_lazy
>
> You can consider the value as being the level of laziness of the primary.
>
> 	Nicolas.
>
>>>>>>> Giving the same "weight" to two different slaves means "chose at random
>>>>>>> on startup and keep the active one until it fails". And if the "at
>>>>>>> random" behavior is not appropriate, one can force the active slave
>>>>>>> using what Jay suggested  (/sys/class/net/bond0/bonding/active).
>>>>>>>
>>>>>>> The proposed "weight" slave's option is able to prevent the slaves from
>>>>>>> flip-flopping, by stating the fact that two slaves share the 
>>>>>>> same   "primary" level, and may provide several other 
>>>>>>> enhancements as  described  in the thread.
>>>>>>>
>>>>>> Although I cannot reach the thread, this looks interesting. But I'm not sure it
>>>>>> has real benefits over primary_lazy option (and it doesn't solve initial curr
>>>>>> active slave setup)
>>>>> You are right, it doesn't solve the initial active slave 
>>>>> selection. But  why would it be so important to properly select 
>>>>> the initial active  slave, if you feel comfortable with staying 
>>>>> with a new active slave,  after a failure and return of the 
>>>>> original active slave ? This kind of  failures may last for only 
>>>>> a few seconds (just unplugging and plugging  back the wire), and 
>>>>> you configuration may then stay with the new active  slave 
>>>>> "forever". If "forever" is acceptable, may be "at startup" is  
>>>>> acceptable too. :-)
>>>>>
>>>>> From my point of view (and Andy Gospodarek apparently agreed), 
>>>>> the real  benefits of the weight slave option is that is it more 
>>>>> generic and allow  for later usage in other modes, that we don't 
>>>>> anticipate for now.
>>>>>
>>>>> Quoted from a mail from Andy Gospodarek in the original thread :
>>>>>
>>>>> "I really have no objection to that.  Adding this as a base part of
>>>>> bonding for a few modes with known features would be a nice start.
>>>>> I'm sure others will be kind enough to send suggestions or patches for
>>>>> ways this could benefit other modes."
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas de Pesloüan Aug. 24, 2009, 3:07 p.m. UTC | #11
Jiri Pirko wrote:
> Thu, Aug 20, 2009 at 02:40:07PM CEST, nicolas.2p.debian@free.fr wrote:
[--cut--]
>> I suggest that instead of having a per bond "primary_lazy" option, we
>> define a per slave option, describing whether this particular slave is
>> "sticky when active" or not.
>>
>> The above setup would become :
>>
>> echo 1 > /sys/class/net/eth0/bonding/sticky_active
>> echo 1 > /sys/class/net/eth1/bonding/sticky_active
>> echo 0 > /sys/class/net/eth2/bonding/sticky_active
>> echo eth0 > /sys/class/net/bond0/bonding/primary
>>
>> Or may be better, keeping the "weight" idea in mind, a per slave option
>> "active_weight" that gives the weight of the slave, *when active*.
>>
>> The effective weight of a slave would become :
>>
>> effective_slave =
>> (is_active ? user_supplied_active_weight ? user_supplied_weight) *
>> natural_weight
>>
>> # Prefer eth0, then one of eth1 or eth2, then eth3.
>> echo 1000 > /sys/class/net/eth0/bonding/weight
>> echo 999 > /sys/class/net/eth1/bonding/weight
>> echo 999 > /sys/class/net/eth2/bonding/weight
>> echo 10 > /sys/class/net/eth3/bonding/weight
>>
>> # Do not switch back to primary eth0 if eth1 or eth2 is active.
>> echo 1000 > /sys/class/net/eth1/bonding/active_weight
>> echo 1000 > /sys/class/net/eth2/bonding/active_weight
>>
>> Every time one changes the user_supplied_weight, then
>> user_supplied_active_weight must be reset to the same value. This way,  
>> if no special setup is done on active_weight, then the current normal
>> behavior is achieved.
> 
> I must say I like this approach. But it would be not trivial to implement this.
> Therefore I would stick with your propose of extending primary lazy to 3 values
> until the weight option is implemented.

It sounds good for me. Later, if I eventually implement the weight option, it 
shouldn't be that hard to convert internally the primary_lazy setup to 
active_weight, the same way we plan to convert internally primary setup to 
weight setup.

primary and primary_lazy are convenient for simple - two slaves only - 
configurations. weight and active_weight are for more advanced configurations. 
Keeping both configuration interface does sound user friendly.

> I'm going to implement your propose below.

By the way, even if I'm not a native English speaker, I think that primary_lazy 
option should be named lazy_primary instead.

	Nicolas.

>> If none of those options seem acceptable to you, I suggest a third one:
>>
>> You keep primary_lazy, but with the following values :
>>
>> # Switch back to primary slaves when it comes back.
>> echo 0 > /sys/class/net/bond0/bonding/primary_lazy
>>
>> # Switch back to primary when it comes back, only if the speed of the
>> # primary slave is higher than the speed of the current active slave.
>> echo 1 > /sys/class/net/bond0/bonding/primary_lazy
>>
>> # Stick to the current active slave when the primary slave comes back,
>> # even if the primary slave speed is higher than the speed of the
>> # current active slave.
>> echo 2 > /sys/class/net/bond0/bonding/primary_lazy
>>
>> You can consider the value as being the level of laziness of the primary.
>>
>> 	Nicolas.
>>


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Aug. 24, 2009, 3:20 p.m. UTC | #12
Mon, Aug 24, 2009 at 05:07:18PM CEST, nicolas.2p.debian@free.fr wrote:
> Jiri Pirko wrote:
>> Thu, Aug 20, 2009 at 02:40:07PM CEST, nicolas.2p.debian@free.fr wrote:
> [--cut--]
>>> I suggest that instead of having a per bond "primary_lazy" option, we
>>> define a per slave option, describing whether this particular slave is
>>> "sticky when active" or not.
>>>
>>> The above setup would become :
>>>
>>> echo 1 > /sys/class/net/eth0/bonding/sticky_active
>>> echo 1 > /sys/class/net/eth1/bonding/sticky_active
>>> echo 0 > /sys/class/net/eth2/bonding/sticky_active
>>> echo eth0 > /sys/class/net/bond0/bonding/primary
>>>
>>> Or may be better, keeping the "weight" idea in mind, a per slave option
>>> "active_weight" that gives the weight of the slave, *when active*.
>>>
>>> The effective weight of a slave would become :
>>>
>>> effective_slave =
>>> (is_active ? user_supplied_active_weight ? user_supplied_weight) *
>>> natural_weight
>>>
>>> # Prefer eth0, then one of eth1 or eth2, then eth3.
>>> echo 1000 > /sys/class/net/eth0/bonding/weight
>>> echo 999 > /sys/class/net/eth1/bonding/weight
>>> echo 999 > /sys/class/net/eth2/bonding/weight
>>> echo 10 > /sys/class/net/eth3/bonding/weight
>>>
>>> # Do not switch back to primary eth0 if eth1 or eth2 is active.
>>> echo 1000 > /sys/class/net/eth1/bonding/active_weight
>>> echo 1000 > /sys/class/net/eth2/bonding/active_weight
>>>
>>> Every time one changes the user_supplied_weight, then
>>> user_supplied_active_weight must be reset to the same value. This 
>>> way,  if no special setup is done on active_weight, then the current 
>>> normal
>>> behavior is achieved.
>>
>> I must say I like this approach. But it would be not trivial to implement this.
>> Therefore I would stick with your propose of extending primary lazy to 3 values
>> until the weight option is implemented.
>
> It sounds good for me. Later, if I eventually implement the weight 
> option, it shouldn't be that hard to convert internally the primary_lazy 
> setup to active_weight, the same way we plan to convert internally 
> primary setup to weight setup.
>
> primary and primary_lazy are convenient for simple - two slaves only -  
> configurations. weight and active_weight are for more advanced 
> configurations. Keeping both configuration interface does sound user 
> friendly.

Ok, I agree.

>
>> I'm going to implement your propose below.
>
> By the way, even if I'm not a native English speaker, I think that 
> primary_lazy option should be named lazy_primary instead.

Well I've intentionally put "primary" as a first word to amplify it's linked
with primary option... But...

Jirka
>
> 	Nicolas.
>
>>> If none of those options seem acceptable to you, I suggest a third one:
>>>
>>> You keep primary_lazy, but with the following values :
>>>
>>> # Switch back to primary slaves when it comes back.
>>> echo 0 > /sys/class/net/bond0/bonding/primary_lazy
>>>
>>> # Switch back to primary when it comes back, only if the speed of the
>>> # primary slave is higher than the speed of the current active slave.
>>> echo 1 > /sys/class/net/bond0/bonding/primary_lazy
>>>
>>> # Stick to the current active slave when the primary slave comes back,
>>> # even if the primary slave speed is higher than the speed of the
>>> # current active slave.
>>> echo 2 > /sys/class/net/bond0/bonding/primary_lazy
>>>
>>> You can consider the value as being the level of laziness of the primary.
>>>
>>> 	Nicolas.
>>>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jay Vosburgh Aug. 24, 2009, 5:35 p.m. UTC | #13
Jiri Pirko <jpirko@redhat.com> wrote:

>Mon, Aug 24, 2009 at 05:07:18PM CEST, nicolas.2p.debian@free.fr wrote:
>> Jiri Pirko wrote:
>>> Thu, Aug 20, 2009 at 02:40:07PM CEST, nicolas.2p.debian@free.fr wrote:
>> [--cut--]
>>>> I suggest that instead of having a per bond "primary_lazy" option, we
>>>> define a per slave option, describing whether this particular slave is
>>>> "sticky when active" or not.
>>>>
>>>> The above setup would become :
>>>>
>>>> echo 1 > /sys/class/net/eth0/bonding/sticky_active
>>>> echo 1 > /sys/class/net/eth1/bonding/sticky_active
>>>> echo 0 > /sys/class/net/eth2/bonding/sticky_active
>>>> echo eth0 > /sys/class/net/bond0/bonding/primary
>>>>
>>>> Or may be better, keeping the "weight" idea in mind, a per slave option
>>>> "active_weight" that gives the weight of the slave, *when active*.
>>>>
>>>> The effective weight of a slave would become :
>>>>
>>>> effective_slave =
>>>> (is_active ? user_supplied_active_weight ? user_supplied_weight) *
>>>> natural_weight
>>>>
>>>> # Prefer eth0, then one of eth1 or eth2, then eth3.
>>>> echo 1000 > /sys/class/net/eth0/bonding/weight
>>>> echo 999 > /sys/class/net/eth1/bonding/weight
>>>> echo 999 > /sys/class/net/eth2/bonding/weight
>>>> echo 10 > /sys/class/net/eth3/bonding/weight
>>>>
>>>> # Do not switch back to primary eth0 if eth1 or eth2 is active.
>>>> echo 1000 > /sys/class/net/eth1/bonding/active_weight
>>>> echo 1000 > /sys/class/net/eth2/bonding/active_weight
>>>>
>>>> Every time one changes the user_supplied_weight, then
>>>> user_supplied_active_weight must be reset to the same value. This 
>>>> way,  if no special setup is done on active_weight, then the current 
>>>> normal
>>>> behavior is achieved.
>>>
>>> I must say I like this approach. But it would be not trivial to implement this.
>>> Therefore I would stick with your propose of extending primary lazy to 3 values
>>> until the weight option is implemented.
>>
>> It sounds good for me. Later, if I eventually implement the weight 
>> option, it shouldn't be that hard to convert internally the primary_lazy 
>> setup to active_weight, the same way we plan to convert internally 
>> primary setup to weight setup.
>>
>> primary and primary_lazy are convenient for simple - two slaves only -  
>> configurations. weight and active_weight are for more advanced 
>> configurations. Keeping both configuration interface does sound user 
>> friendly.
>
>Ok, I agree.
>
>>
>>> I'm going to implement your propose below.
>>
>> By the way, even if I'm not a native English speaker, I think that 
>> primary_lazy option should be named lazy_primary instead.
>
>Well I've intentionally put "primary" as a first word to amplify it's linked
>with primary option... But...

	I'm still unclear as to why it's better to add another special
case option to bonding instead of changing this in user space, other
than it'd be a change to user space (initscripts / sysconfig).

	The way I see it, this patch is adding a mechanism that says,
effectively, "make slave X the active slave, but do it only once."
There is already a way to do that in bonding (sysfs, as above, or
ifenslave -c); I am reluctant to add another without good reason.

	I'm not necessarily against the "weight" business in general.
For the purposes of this discussion, however, it's a big complex
solution to a pretty simple problem, and the "weight" system still has
to have special sauce added it to to handle this special case.

	Last, presuming for the moment that this goes forward as an
option to bonding, I think this should be named something along the
lines of "make_active" (or perhaps "make_active_once", but that's a bit
long).  The option has the effect of making the specified slave the
active slave one time, then the option setting is cleared.

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Aug. 25, 2009, 6:43 a.m. UTC | #14
Mon, Aug 24, 2009 at 07:35:17PM CEST, fubar@us.ibm.com wrote:
>Jiri Pirko <jpirko@redhat.com> wrote:
>
>>Mon, Aug 24, 2009 at 05:07:18PM CEST, nicolas.2p.debian@free.fr wrote:
>>> Jiri Pirko wrote:
>>>> Thu, Aug 20, 2009 at 02:40:07PM CEST, nicolas.2p.debian@free.fr wrote:
>>> [--cut--]
>>>>> I suggest that instead of having a per bond "primary_lazy" option, we
>>>>> define a per slave option, describing whether this particular slave is
>>>>> "sticky when active" or not.
>>>>>
>>>>> The above setup would become :
>>>>>
>>>>> echo 1 > /sys/class/net/eth0/bonding/sticky_active
>>>>> echo 1 > /sys/class/net/eth1/bonding/sticky_active
>>>>> echo 0 > /sys/class/net/eth2/bonding/sticky_active
>>>>> echo eth0 > /sys/class/net/bond0/bonding/primary
>>>>>
>>>>> Or may be better, keeping the "weight" idea in mind, a per slave option
>>>>> "active_weight" that gives the weight of the slave, *when active*.
>>>>>
>>>>> The effective weight of a slave would become :
>>>>>
>>>>> effective_slave =
>>>>> (is_active ? user_supplied_active_weight ? user_supplied_weight) *
>>>>> natural_weight
>>>>>
>>>>> # Prefer eth0, then one of eth1 or eth2, then eth3.
>>>>> echo 1000 > /sys/class/net/eth0/bonding/weight
>>>>> echo 999 > /sys/class/net/eth1/bonding/weight
>>>>> echo 999 > /sys/class/net/eth2/bonding/weight
>>>>> echo 10 > /sys/class/net/eth3/bonding/weight
>>>>>
>>>>> # Do not switch back to primary eth0 if eth1 or eth2 is active.
>>>>> echo 1000 > /sys/class/net/eth1/bonding/active_weight
>>>>> echo 1000 > /sys/class/net/eth2/bonding/active_weight
>>>>>
>>>>> Every time one changes the user_supplied_weight, then
>>>>> user_supplied_active_weight must be reset to the same value. This 
>>>>> way,  if no special setup is done on active_weight, then the current 
>>>>> normal
>>>>> behavior is achieved.
>>>>
>>>> I must say I like this approach. But it would be not trivial to implement this.
>>>> Therefore I would stick with your propose of extending primary lazy to 3 values
>>>> until the weight option is implemented.
>>>
>>> It sounds good for me. Later, if I eventually implement the weight 
>>> option, it shouldn't be that hard to convert internally the primary_lazy 
>>> setup to active_weight, the same way we plan to convert internally 
>>> primary setup to weight setup.
>>>
>>> primary and primary_lazy are convenient for simple - two slaves only -  
>>> configurations. weight and active_weight are for more advanced 
>>> configurations. Keeping both configuration interface does sound user 
>>> friendly.
>>
>>Ok, I agree.
>>
>>>
>>>> I'm going to implement your propose below.
>>>
>>> By the way, even if I'm not a native English speaker, I think that 
>>> primary_lazy option should be named lazy_primary instead.
>>
>>Well I've intentionally put "primary" as a first word to amplify it's linked
>>with primary option... But...
>
>	I'm still unclear as to why it's better to add another special
>case option to bonding instead of changing this in user space, other
>than it'd be a change to user space (initscripts / sysconfig).
>
>	The way I see it, this patch is adding a mechanism that says,
>effectively, "make slave X the active slave, but do it only once."
>There is already a way to do that in bonding (sysfs, as above, or
>ifenslave -c); I am reluctant to add another without good reason.

Hello Jay.

As I already replied you once it's not only about selecting a slave at the
start. It's also about following:

Imagine you have bond with 3 slaves:
eth0            eth1            eth2
UP(curr)        UP              UP
DOWN            UP(curr)        UP
UP              UP(curr)        UP
UP              DOWN            UP(curr)

eth2 ends up being current active but we prefer eth0 (as primary interface).
This is not desirable and is solved by primary_lazy option.

Jirka

>
>	I'm not necessarily against the "weight" business in general.
>For the purposes of this discussion, however, it's a big complex
>solution to a pretty simple problem, and the "weight" system still has
>to have special sauce added it to to handle this special case.
>
>	Last, presuming for the moment that this goes forward as an
>option to bonding, I think this should be named something along the
>lines of "make_active" (or perhaps "make_active_once", but that's a bit
>long).  The option has the effect of making the specified slave the
>active slave one time, then the option setting is cleared.
>
>	-J
>
>---
>	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas de Pesloüan Aug. 25, 2009, 5:31 p.m. UTC | #15
Jiri Pirko wrote:
> Mon, Aug 24, 2009 at 07:35:17PM CEST, fubar@us.ibm.com wrote:
>> 	I'm still unclear as to why it's better to add another special
>> case option to bonding instead of changing this in user space, other
>> than it'd be a change to user space (initscripts / sysconfig).
>>
>> 	The way I see it, this patch is adding a mechanism that says,
>> effectively, "make slave X the active slave, but do it only once."
>> There is already a way to do that in bonding (sysfs, as above, or
>> ifenslave -c); I am reluctant to add another without good reason.
> 
> Hello Jay.
> 
> As I already replied you once it's not only about selecting a slave at the
> start. It's also about following:
> 
> Imagine you have bond with 3 slaves:
> eth0            eth1            eth2
> UP(curr)        UP              UP
> DOWN            UP(curr)        UP
> UP              UP(curr)        UP
> UP              DOWN            UP(curr)
> 
> eth2 ends up being current active but we prefer eth0 (as primary interface).
> This is not desirable and is solved by primary_lazy option.
> 
> Jirka
> 
>> 	I'm not necessarily against the "weight" business in general.
>> For the purposes of this discussion, however, it's a big complex
>> solution to a pretty simple problem, and the "weight" system still has
>> to have special sauce added it to to handle this special case.
>>
>> 	Last, presuming for the moment that this goes forward as an
>> option to bonding, I think this should be named something along the
>> lines of "make_active" (or perhaps "make_active_once", but that's a bit
>> long).  The option has the effect of making the specified slave the
>> active slave one time, then the option setting is cleared.

Hi Jay,

 From what I understand from Jirka's needs, the exact expected behaviors are :

1/ If a slave is active, keep it active, even if the primary comes back up.
2/ If the current slave just failed, choose the new active slave, giving 
priority to the master.

Selecting the active slave at startup (by using ifenslave -c or writing into 
/sys/class/net/bond0/bonding/active_slave) would solve 1, but not 2.

Also, I suggested to change 1 in this way :

1/ If a slave is active, keep it active, even if the primary comes back up, 
*except if the speed of the primary is better than the speed of the active slave*.

Thinking about all that, I start feeling that some sort of user space system to 
select the "best" slave would be better. If we can design a NETLINK interface to 
report events (slave up, slave down...) to user space, then any user space 
daemon would be able to tell bonding what to do. Only if no process register to 
receive those events would bonding use the normal slave selection rules.

Designing such a NETLINK interface would replace my proposed weight option (at 
least for best slave selection in active-backup mode and for best aggregator 
selection in 802.3ad mode). It would also solve the problem reported by Jirka 
and so replace the proposed primary_lazy option.

Any way, NETLINK is something that is supposed to come into bonding at some 
times, because we know that the sysfs purists hate the sysfs bonding stuff and 
that NETLINK is the target to setup networking.

Any comments ?

	Nicolas.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jay Vosburgh Aug. 25, 2009, 6:41 p.m. UTC | #16
Nicolas de Pesloüan <nicolas.2p.debian@free.fr> wrote:

>Jiri Pirko wrote:
>> Mon, Aug 24, 2009 at 07:35:17PM CEST, fubar@us.ibm.com wrote:
>>> 	I'm still unclear as to why it's better to add another special
>>> case option to bonding instead of changing this in user space, other
>>> than it'd be a change to user space (initscripts / sysconfig).
>>>
>>> 	The way I see it, this patch is adding a mechanism that says,
>>> effectively, "make slave X the active slave, but do it only once."
>>> There is already a way to do that in bonding (sysfs, as above, or
>>> ifenslave -c); I am reluctant to add another without good reason.
>> 
>> Hello Jay.
>> 
>> As I already replied you once it's not only about selecting a slave at the
>> start. It's also about following:
>> 
>> Imagine you have bond with 3 slaves:
>> eth0            eth1            eth2
>> UP(curr)        UP              UP
>> DOWN            UP(curr)        UP
>> UP              UP(curr)        UP
>> UP              DOWN            UP(curr)
>> 
>> eth2 ends up being current active but we prefer eth0 (as primary interface).
>> This is not desirable and is solved by primary_lazy option.
>> 
>> Jirka
>> 
>>> 	I'm not necessarily against the "weight" business in general.
>>> For the purposes of this discussion, however, it's a big complex
>>> solution to a pretty simple problem, and the "weight" system still has
>>> to have special sauce added it to to handle this special case.
>>>
>>> 	Last, presuming for the moment that this goes forward as an
>>> option to bonding, I think this should be named something along the
>>> lines of "make_active" (or perhaps "make_active_once", but that's a bit
>>> long).  The option has the effect of making the specified slave the
>>> active slave one time, then the option setting is cleared.
>
>Hi Jay,
>
> From what I understand from Jirka's needs, the exact expected behaviors are :
>
>1/ If a slave is active, keep it active, even if the primary comes back up.
>2/ If the current slave just failed, choose the new active slave, giving 
>priority to the master.
>
>Selecting the active slave at startup (by using ifenslave -c or writing into 
>/sys/class/net/bond0/bonding/active_slave) would solve 1, but not 2.

	Yah, I had missed step 2.  I'd still call it something other
than "lazy," though; "passive" sounds better to me.

>Also, I suggested to change 1 in this way :
>
>1/ If a slave is active, keep it active, even if the primary comes back up, 
>*except if the speed of the primary is better than the speed of the active slave*.
>
>Thinking about all that, I start feeling that some sort of user space system to 
>select the "best" slave would be better. If we can design a NETLINK interface to 
>report events (slave up, slave down...) to user space, then any user space 
>daemon would be able to tell bonding what to do. Only if no process register to 
>receive those events would bonding use the normal slave selection rules.

	This has been discussed more than once in the past, but hasn't
ever really gotten anywhere.  I suspect the main impediment is the lack
of a suitable API.

>Designing such a NETLINK interface would replace my proposed weight option (at 
>least for best slave selection in active-backup mode and for best aggregator 
>selection in 802.3ad mode). It would also solve the problem reported by Jirka 
>and so replace the proposed primary_lazy option.

	Yes, a lot of the decision making at failover could be moved
into a user space daemon.  The daemon, I think, should be optional; if
the basic selection policies are sufficient, then there's no need for a
trip to user space and back.

>Any way, NETLINK is something that is supposed to come into bonding at some 
>times, because we know that the sysfs purists hate the sysfs bonding stuff and 
>that NETLINK is the target to setup networking.

	I'm not a big fan of the sysfs API, either; it seemed like a
good idea at the time.  It's certainly better than ifenslave in terms of
features, but some of it is pretty convoluted, and there are things that
just can't be done from within sysfs.

	I recall seeing a note from Stephen Hemminger not too long ago
(a month or two ago) that he was working on a netlink API for bonding,
but I don't know how far that ever got.

	One quesiton is, if a netlink API is implemented, whether to
convert ifenslave, or deprecate ifenslave and put the various bonding
functions into ip.

	If a netlink API is on the relatively near horizon (say, within
a few months), then I'm less inclined to put in the "lazy" option, since
it would just become baggage carried forward for the next several years
(until the sysfs API could be deprecated and removed).

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas de Pesloüan Aug. 25, 2009, 8:33 p.m. UTC | #17
Jay Vosburgh wrote:
> Nicolas de Pesloüan <nicolas.2p.debian@free.fr> wrote:
>> Thinking about all that, I start feeling that some sort of user space system to 
>> select the "best" slave would be better. If we can design a NETLINK interface to 
>> report events (slave up, slave down...) to user space, then any user space 
>> daemon would be able to tell bonding what to do. Only if no process register to 
>> receive those events would bonding use the normal slave selection rules.
> 
> 	This has been discussed more than once in the past, but hasn't
> ever really gotten anywhere.  I suspect the main impediment is the lack
> of a suitable API.

Does a 'NETLINK for bonding' document, describing the proposed API, exist ?

I imagine two different parts in the API :

1/ Everything related to configuration (set and read). This should be not far 
from the current sysfs API.
2/ Event notification about "everything" that happens into bonding, to be able 
to notify user space in real time.

It might be also interesting to use the netlink API to notify whoever is 
interested that a given not-enslaved interface just received a 802.3ad related 
packet. This would allow for self enslavement of slaves into the same bond, when 
they happen to be connected to the same 802.3ad capable switch.

>> Designing such a NETLINK interface would replace my proposed weight option (at 
>> least for best slave selection in active-backup mode and for best aggregator 
>> selection in 802.3ad mode). It would also solve the problem reported by Jirka 
>> and so replace the proposed primary_lazy option.
> 
> 	Yes, a lot of the decision making at failover could be moved
> into a user space daemon.  The daemon, I think, should be optional; if
> the basic selection policies are sufficient, then there's no need for a
> trip to user space and back.
> 
>> Any way, NETLINK is something that is supposed to come into bonding at some 
>> times, because we know that the sysfs purists hate the sysfs bonding stuff and 
>> that NETLINK is the target to setup networking.
> 
> 	I'm not a big fan of the sysfs API, either; it seemed like a
> good idea at the time.  It's certainly better than ifenslave in terms of
> features, but some of it is pretty convoluted, and there are things that
> just can't be done from within sysfs.
> 
> 	I recall seeing a note from Stephen Hemminger not too long ago
> (a month or two ago) that he was working on a netlink API for bonding,
> but I don't know how far that ever got.

Yes, I also read this note and remembered he detected that many things need to 
be changed before... :-(

> 	One quesiton is, if a netlink API is implemented, whether to
> convert ifenslave, or deprecate ifenslave and put the various bonding
> functions into ip.

I suggest enhancing ip and removing ifenslave (or converting it to a script that 
would call ip internally, for backward compatibility). Why would we need a 
dedicated tool for bonding ? We can even write this script now and use sysfs 
instead of the ioctl, waiting for the netlink API.

> 	If a netlink API is on the relatively near horizon (say, within
> a few months), then I'm less inclined to put in the "lazy" option, since
> it would just become baggage carried forward for the next several years
> (until the sysfs API could be deprecated and removed).

Does that mean you suggest Jiri works with Stephen on the netlink API ? :-)

	Nicolas.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index d5181ce..f1b82af 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -614,6 +614,15 @@  primary
 
 	The primary option is only valid for active-backup mode.
 
+primary_lazy
+
+	Specifies the behaviour of the primary slave in case of
+	it's link recovery has been detected. By default (value 0) the
+	primary slave is set as active slave immediately after the link
+	recovery. If the value is 1 then current active slave doesn't
+	change as long as it's link status doesn't change. This prevents
+	the bonding device from flip-flopping.
+
 updelay
 
 	Specifies the time, in milliseconds, to wait before enabling a
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3bf0cc6..00fbb9d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -94,6 +94,7 @@  static int downdelay;
 static int use_carrier	= 1;
 static char *mode;
 static char *primary;
+static int primary_lazy;
 static char *lacp_rate;
 static char *ad_select;
 static char *xmit_hash_policy;
@@ -126,6 +127,9 @@  MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
 		       "6 for balance-alb");
 module_param(primary, charp, 0);
 MODULE_PARM_DESC(primary, "Primary network device to use");
+module_param(primary_lazy, int, 0);
+MODULE_PARM_DESC(primary_lazy, "Do not set primary slave active once it comes up; "
+			       "0 for off (default), 1 for on");
 module_param(lacp_rate, charp, 0);
 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
 			    "(slow/fast)");
@@ -1067,7 +1071,6 @@  out:
 
 }
 
-
 /**
  * find_best_interface - select the best available slave to be the active one
  * @bond: our bonding struct
@@ -1097,9 +1100,11 @@  static struct slave *bond_find_best_slave(struct bonding *bond)
 	 * and primary_slave that may be up and able to arp
 	 */
 	if ((bond->primary_slave) &&
-	    (!bond->params.arp_interval) &&
-	    (IS_UP(bond->primary_slave->dev))) {
+	    (IS_UP(bond->primary_slave->dev)) &&
+	    (!(bond->params.primary_lazy && old_active &&
+	       (IS_UP(old_active->dev))) || bond->force_primary)) {
 		new_active = bond->primary_slave;
+		bond->force_primary = false;
 	}
 
 	/* remember where to stop iterating over the slaves */
@@ -1674,8 +1679,10 @@  int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 	if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
 		/* if there is a primary slave, remember it */
-		if (strcmp(bond->params.primary, new_slave->dev->name) == 0)
+		if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
 			bond->primary_slave = new_slave;
+			bond->force_primary = true;
+		}
 	}
 
 	write_lock_bh(&bond->curr_slave_lock);
@@ -2929,7 +2936,9 @@  static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
 	 */
 	if (bond->primary_slave &&
 	    (bond->primary_slave != bond->curr_active_slave) &&
-	    (bond->primary_slave->link == BOND_LINK_UP))
+	    (bond->primary_slave->link == BOND_LINK_UP) &&
+	    !(bond->params.primary_lazy && bond->curr_active_slave &&
+              bond->curr_active_slave->link == BOND_LINK_UP))
 		commit++;
 
 	read_unlock(&bond->curr_slave_lock);
@@ -3035,7 +3044,9 @@  static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
 	 */
 	if (bond->primary_slave &&
 	    (bond->primary_slave != bond->curr_active_slave) &&
-	    (bond->primary_slave->link == BOND_LINK_UP)) {
+	    (bond->primary_slave->link == BOND_LINK_UP) &&
+	    !(bond->params.primary_lazy && bond->curr_active_slave &&
+              bond->curr_active_slave->link == BOND_LINK_UP)) {
 		write_lock_bh(&bond->curr_slave_lock);
 		bond_change_active_slave(bond, bond->primary_slave);
 		write_unlock_bh(&bond->curr_slave_lock);
@@ -4987,6 +4998,17 @@  static int bond_check_params(struct bond_params *params)
 		primary = NULL;
 	}
 
+	if (primary) {
+		if ((primary_lazy != 0) && (primary_lazy != 1)) {
+			pr_warning(DRV_NAME
+				   ": Warning: primary_lazy module parameter "
+				   "(%d), not of valid value (0/1), so it was "
+				   "set to 0\n",
+				   primary_lazy);
+			primary_lazy = 1;
+		}
+	}
+
 	if (fail_over_mac) {
 		fail_over_mac_value = bond_parse_parm(fail_over_mac,
 						      fail_over_mac_tbl);
@@ -5018,6 +5040,7 @@  static int bond_check_params(struct bond_params *params)
 	params->use_carrier = use_carrier;
 	params->lacp_fast = lacp_fast;
 	params->primary[0] = 0;
+	params->primary_lazy = primary_lazy;
 	params->fail_over_mac = fail_over_mac_value;
 
 	if (primary) {
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 55bf34f..573fe82 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1209,6 +1209,59 @@  static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
 		   bonding_show_primary, bonding_store_primary);
 
 /*
+ * Show and set the primary_lazy flag.
+ */
+static ssize_t bonding_show_primary_lazy(struct device *d,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct bonding *bond = to_bond(d);
+
+	return sprintf(buf, "%d\n", bond->params.primary_lazy);
+}
+
+static ssize_t bonding_store_primary_lazy(struct device *d,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	int new_value, ret = count;
+	struct bonding *bond = to_bond(d);
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (sscanf(buf, "%d", &new_value) != 1) {
+		pr_err(DRV_NAME
+		       ": %s: no primary_lazy value specified.\n",
+		       bond->dev->name);
+		ret = -EINVAL;
+		goto out;
+	}
+	if ((new_value == 0) || (new_value == 1)) {
+		bond->params.primary_lazy = new_value;
+		pr_info(DRV_NAME ": %s: Setting primary_lazy to %d.\n",
+		       bond->dev->name, new_value);
+		if (!bond->params.primary_lazy) {
+			bond->force_primary = true;
+			read_lock(&bond->lock);
+			write_lock_bh(&bond->curr_slave_lock);
+			bond_select_active_slave(bond);
+			write_unlock_bh(&bond->curr_slave_lock);
+			read_unlock(&bond->lock);
+		}
+	} else {
+		pr_info(DRV_NAME
+		       ": %s: Ignoring invalid primary_lazy value %d.\n",
+		       bond->dev->name, new_value);
+	}
+out:
+	rtnl_unlock();
+	return count;
+}
+static DEVICE_ATTR(primary_lazy, S_IRUGO | S_IWUSR,
+		   bonding_show_primary_lazy, bonding_store_primary_lazy);
+
+/*
  * Show and set the use_carrier flag.
  */
 static ssize_t bonding_show_carrier(struct device *d,
@@ -1497,6 +1550,7 @@  static struct attribute *per_bond_attrs[] = {
 	&dev_attr_num_unsol_na.attr,
 	&dev_attr_miimon.attr,
 	&dev_attr_primary.attr,
+	&dev_attr_primary_lazy.attr,
 	&dev_attr_use_carrier.attr,
 	&dev_attr_active_slave.attr,
 	&dev_attr_mii_status.attr,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 6290a50..ac35c6c 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -131,6 +131,7 @@  struct bond_params {
 	int lacp_fast;
 	int ad_select;
 	char primary[IFNAMSIZ];
+	int primary_lazy;
 	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
 };
 
@@ -190,6 +191,7 @@  struct bonding {
 	struct   slave *curr_active_slave;
 	struct   slave *current_arp_slave;
 	struct   slave *primary_slave;
+	bool     force_primary;
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	rwlock_t lock;
 	rwlock_t curr_slave_lock;