Message ID | 20090813150513.GB10449@psychotron.englab.brq.redhat.com |
---|---|
State | RFC, archived |
Delegated to: | David Miller |
Headers | show |
Jiri Pirko <jpirko@redhat.com> wrote: >In some cases there is not desirable to switch back to primary interface when >it's link recovers and rather stay wiith currently active one. We need to avoid >packetloss as much as we can in some cases. This is solved by introducing >primary_lazy option. Note that enslaved primary slave is set as current >active no matter what. Are you just looking for a way to insure that, when the bond first comes up (is configured at boot, for example), a particular slave is the active slave? In that case, why can't an explicit selection be made via either ifenslave -c or /sys/class/net/bond0/bonding/active ? -J >Signed-off-by: Jiri Pirko <jpirko@redhat.com> > >diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt >index d5181ce..f1b82af 100644 >--- a/Documentation/networking/bonding.txt >+++ b/Documentation/networking/bonding.txt >@@ -614,6 +614,15 @@ primary > > The primary option is only valid for active-backup mode. > >+primary_lazy >+ >+ Specifies the behaviour of the primary slave in case of >+ it's link recovery has been detected. By default (value 0) the >+ primary slave is set as active slave immediately after the link >+ recovery. If the value is 1 then current active slave doesn't >+ change as long as it's link status doesn't change. This prevents >+ the bonding device from flip-flopping. >+ > updelay > > Specifies the time, in milliseconds, to wait before enabling a >diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c >index 3bf0cc6..00fbb9d 100644 >--- a/drivers/net/bonding/bond_main.c >+++ b/drivers/net/bonding/bond_main.c >@@ -94,6 +94,7 @@ static int downdelay; > static int use_carrier = 1; > static char *mode; > static char *primary; >+static int primary_lazy; > static char *lacp_rate; > static char *ad_select; > static char *xmit_hash_policy; >@@ -126,6 +127,9 @@ MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, " > "6 for balance-alb"); > module_param(primary, charp, 0); > MODULE_PARM_DESC(primary, "Primary network device to use"); >+module_param(primary_lazy, int, 0); >+MODULE_PARM_DESC(primary_lazy, "Do not set primary slave active once it comes up; " >+ "0 for off (default), 1 for on"); > module_param(lacp_rate, charp, 0); > MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner " > "(slow/fast)"); >@@ -1067,7 +1071,6 @@ out: > > } > >- > /** > * find_best_interface - select the best available slave to be the active one > * @bond: our bonding struct >@@ -1097,9 +1100,11 @@ static struct slave *bond_find_best_slave(struct bonding *bond) > * and primary_slave that may be up and able to arp > */ > if ((bond->primary_slave) && >- (!bond->params.arp_interval) && >- (IS_UP(bond->primary_slave->dev))) { >+ (IS_UP(bond->primary_slave->dev)) && >+ (!(bond->params.primary_lazy && old_active && >+ (IS_UP(old_active->dev))) || bond->force_primary)) { > new_active = bond->primary_slave; >+ bond->force_primary = false; > } > > /* remember where to stop iterating over the slaves */ >@@ -1674,8 +1679,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) > > if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { > /* if there is a primary slave, remember it */ >- if (strcmp(bond->params.primary, new_slave->dev->name) == 0) >+ if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { > bond->primary_slave = new_slave; >+ bond->force_primary = true; >+ } > } > > write_lock_bh(&bond->curr_slave_lock); >@@ -2929,7 +2936,9 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) > */ > if (bond->primary_slave && > (bond->primary_slave != bond->curr_active_slave) && >- (bond->primary_slave->link == BOND_LINK_UP)) >+ (bond->primary_slave->link == BOND_LINK_UP) && >+ !(bond->params.primary_lazy && bond->curr_active_slave && >+ bond->curr_active_slave->link == BOND_LINK_UP)) > commit++; > > read_unlock(&bond->curr_slave_lock); >@@ -3035,7 +3044,9 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) > */ > if (bond->primary_slave && > (bond->primary_slave != bond->curr_active_slave) && >- (bond->primary_slave->link == BOND_LINK_UP)) { >+ (bond->primary_slave->link == BOND_LINK_UP) && >+ !(bond->params.primary_lazy && bond->curr_active_slave && >+ bond->curr_active_slave->link == BOND_LINK_UP)) { > write_lock_bh(&bond->curr_slave_lock); > bond_change_active_slave(bond, bond->primary_slave); > write_unlock_bh(&bond->curr_slave_lock); >@@ -4987,6 +4998,17 @@ static int bond_check_params(struct bond_params *params) > primary = NULL; > } > >+ if (primary) { >+ if ((primary_lazy != 0) && (primary_lazy != 1)) { >+ pr_warning(DRV_NAME >+ ": Warning: primary_lazy module parameter " >+ "(%d), not of valid value (0/1), so it was " >+ "set to 0\n", >+ primary_lazy); >+ primary_lazy = 1; >+ } >+ } >+ > if (fail_over_mac) { > fail_over_mac_value = bond_parse_parm(fail_over_mac, > fail_over_mac_tbl); >@@ -5018,6 +5040,7 @@ static int bond_check_params(struct bond_params *params) > params->use_carrier = use_carrier; > params->lacp_fast = lacp_fast; > params->primary[0] = 0; >+ params->primary_lazy = primary_lazy; > params->fail_over_mac = fail_over_mac_value; > > if (primary) { >diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c >index 55bf34f..573fe82 100644 >--- a/drivers/net/bonding/bond_sysfs.c >+++ b/drivers/net/bonding/bond_sysfs.c >@@ -1209,6 +1209,59 @@ static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR, > bonding_show_primary, bonding_store_primary); > > /* >+ * Show and set the primary_lazy flag. >+ */ >+static ssize_t bonding_show_primary_lazy(struct device *d, >+ struct device_attribute *attr, >+ char *buf) >+{ >+ struct bonding *bond = to_bond(d); >+ >+ return sprintf(buf, "%d\n", bond->params.primary_lazy); >+} >+ >+static ssize_t bonding_store_primary_lazy(struct device *d, >+ struct device_attribute *attr, >+ const char *buf, size_t count) >+{ >+ int new_value, ret = count; >+ struct bonding *bond = to_bond(d); >+ >+ if (!rtnl_trylock()) >+ return restart_syscall(); >+ >+ if (sscanf(buf, "%d", &new_value) != 1) { >+ pr_err(DRV_NAME >+ ": %s: no primary_lazy value specified.\n", >+ bond->dev->name); >+ ret = -EINVAL; >+ goto out; >+ } >+ if ((new_value == 0) || (new_value == 1)) { >+ bond->params.primary_lazy = new_value; >+ pr_info(DRV_NAME ": %s: Setting primary_lazy to %d.\n", >+ bond->dev->name, new_value); >+ if (!bond->params.primary_lazy) { >+ bond->force_primary = true; >+ read_lock(&bond->lock); >+ write_lock_bh(&bond->curr_slave_lock); >+ bond_select_active_slave(bond); >+ write_unlock_bh(&bond->curr_slave_lock); >+ read_unlock(&bond->lock); >+ } >+ } else { >+ pr_info(DRV_NAME >+ ": %s: Ignoring invalid primary_lazy value %d.\n", >+ bond->dev->name, new_value); >+ } >+out: >+ rtnl_unlock(); >+ return count; >+} >+static DEVICE_ATTR(primary_lazy, S_IRUGO | S_IWUSR, >+ bonding_show_primary_lazy, bonding_store_primary_lazy); >+ >+/* > * Show and set the use_carrier flag. > */ > static ssize_t bonding_show_carrier(struct device *d, >@@ -1497,6 +1550,7 @@ static struct attribute *per_bond_attrs[] = { > &dev_attr_num_unsol_na.attr, > &dev_attr_miimon.attr, > &dev_attr_primary.attr, >+ &dev_attr_primary_lazy.attr, > &dev_attr_use_carrier.attr, > &dev_attr_active_slave.attr, > &dev_attr_mii_status.attr, >diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h >index 6290a50..ac35c6c 100644 >--- a/drivers/net/bonding/bonding.h >+++ b/drivers/net/bonding/bonding.h >@@ -131,6 +131,7 @@ struct bond_params { > int lacp_fast; > int ad_select; > char primary[IFNAMSIZ]; >+ int primary_lazy; > __be32 arp_targets[BOND_MAX_ARP_TARGETS]; > }; > >@@ -190,6 +191,7 @@ struct bonding { > struct slave *curr_active_slave; > struct slave *current_arp_slave; > struct slave *primary_slave; >+ bool force_primary; > s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ > rwlock_t lock; > rwlock_t curr_slave_lock; --- -Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Jiri Pirko wrote: > In some cases there is not desirable to switch back to primary interface when > it's link recovers and rather stay wiith currently active one. We need to avoid > packetloss as much as we can in some cases. This is solved by introducing > primary_lazy option. Note that enslaved primary slave is set as current > active no matter what. May I suggest that instead of creating a new option to better define how the "primary" option is expected to behave for active-backup mode, we try the "weight" slave option I proposed in the thread "alternative to primary" earlier this year ? http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel Giving the same "weight" to two different slaves means "chose at random on startup and keep the active one until it fails". And if the "at random" behavior is not appropriate, one can force the active slave using what Jay suggested (/sys/class/net/bond0/bonding/active). The proposed "weight" slave's option is able to prevent the slaves from flip-flopping, by stating the fact that two slaves share the same "primary" level, and may provide several other enhancements as described in the thread. Hence, it is a more general configuration interface than what you proposed. I must admit that despite the fact that I suggested this in april, I didn't posted any patch for it until now. Unfortunately, I'didn't had time for it and probably not the proper skills anyway :-). Nicolas. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Thu, Aug 13, 2009 at 05:44:55PM CEST, fubar@us.ibm.com wrote: >Jiri Pirko <jpirko@redhat.com> wrote: > >>In some cases there is not desirable to switch back to primary interface when >>it's link recovers and rather stay wiith currently active one. We need to avoid >>packetloss as much as we can in some cases. This is solved by introducing >>primary_lazy option. Note that enslaved primary slave is set as current >>active no matter what. > > Are you just looking for a way to insure that, when the bond >first comes up (is configured at boot, for example), a particular slave >is the active slave? In that case, why can't an explicit selection be >made via either ifenslave -c or /sys/class/net/bond0/bonding/active ? Indeed this can be done this way and I'm aware of it. I'm leaving aside that you must put this somewhere into init scripts which brings downsides. But imagine you have bond with 3 slaves: eth0 eth1 eth2 UP(curr) UP UP DOWN UP(curr) UP UP UP(curr) UP UP DOWN UP(curr) eth2 ends up being current active but we prefer eth0 (as primary interface). This is not desirable and is solved by primary_lazy option. Jirka > > -J > >>Signed-off-by: Jiri Pirko <jpirko@redhat.com> >> >>diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt >>index d5181ce..f1b82af 100644 >>--- a/Documentation/networking/bonding.txt >>+++ b/Documentation/networking/bonding.txt >>@@ -614,6 +614,15 @@ primary >> >> The primary option is only valid for active-backup mode. >> >>+primary_lazy >>+ >>+ Specifies the behaviour of the primary slave in case of >>+ it's link recovery has been detected. By default (value 0) the >>+ primary slave is set as active slave immediately after the link >>+ recovery. If the value is 1 then current active slave doesn't >>+ change as long as it's link status doesn't change. This prevents >>+ the bonding device from flip-flopping. >>+ >> updelay >> >> Specifies the time, in milliseconds, to wait before enabling a >>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c >>index 3bf0cc6..00fbb9d 100644 >>--- a/drivers/net/bonding/bond_main.c >>+++ b/drivers/net/bonding/bond_main.c >>@@ -94,6 +94,7 @@ static int downdelay; >> static int use_carrier = 1; >> static char *mode; >> static char *primary; >>+static int primary_lazy; >> static char *lacp_rate; >> static char *ad_select; >> static char *xmit_hash_policy; >>@@ -126,6 +127,9 @@ MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, " >> "6 for balance-alb"); >> module_param(primary, charp, 0); >> MODULE_PARM_DESC(primary, "Primary network device to use"); >>+module_param(primary_lazy, int, 0); >>+MODULE_PARM_DESC(primary_lazy, "Do not set primary slave active once it comes up; " >>+ "0 for off (default), 1 for on"); >> module_param(lacp_rate, charp, 0); >> MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner " >> "(slow/fast)"); >>@@ -1067,7 +1071,6 @@ out: >> >> } >> >>- >> /** >> * find_best_interface - select the best available slave to be the active one >> * @bond: our bonding struct >>@@ -1097,9 +1100,11 @@ static struct slave *bond_find_best_slave(struct bonding *bond) >> * and primary_slave that may be up and able to arp >> */ >> if ((bond->primary_slave) && >>- (!bond->params.arp_interval) && >>- (IS_UP(bond->primary_slave->dev))) { >>+ (IS_UP(bond->primary_slave->dev)) && >>+ (!(bond->params.primary_lazy && old_active && >>+ (IS_UP(old_active->dev))) || bond->force_primary)) { >> new_active = bond->primary_slave; >>+ bond->force_primary = false; >> } >> >> /* remember where to stop iterating over the slaves */ >>@@ -1674,8 +1679,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) >> >> if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { >> /* if there is a primary slave, remember it */ >>- if (strcmp(bond->params.primary, new_slave->dev->name) == 0) >>+ if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { >> bond->primary_slave = new_slave; >>+ bond->force_primary = true; >>+ } >> } >> >> write_lock_bh(&bond->curr_slave_lock); >>@@ -2929,7 +2936,9 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) >> */ >> if (bond->primary_slave && >> (bond->primary_slave != bond->curr_active_slave) && >>- (bond->primary_slave->link == BOND_LINK_UP)) >>+ (bond->primary_slave->link == BOND_LINK_UP) && >>+ !(bond->params.primary_lazy && bond->curr_active_slave && >>+ bond->curr_active_slave->link == BOND_LINK_UP)) >> commit++; >> >> read_unlock(&bond->curr_slave_lock); >>@@ -3035,7 +3044,9 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) >> */ >> if (bond->primary_slave && >> (bond->primary_slave != bond->curr_active_slave) && >>- (bond->primary_slave->link == BOND_LINK_UP)) { >>+ (bond->primary_slave->link == BOND_LINK_UP) && >>+ !(bond->params.primary_lazy && bond->curr_active_slave && >>+ bond->curr_active_slave->link == BOND_LINK_UP)) { >> write_lock_bh(&bond->curr_slave_lock); >> bond_change_active_slave(bond, bond->primary_slave); >> write_unlock_bh(&bond->curr_slave_lock); >>@@ -4987,6 +4998,17 @@ static int bond_check_params(struct bond_params *params) >> primary = NULL; >> } >> >>+ if (primary) { >>+ if ((primary_lazy != 0) && (primary_lazy != 1)) { >>+ pr_warning(DRV_NAME >>+ ": Warning: primary_lazy module parameter " >>+ "(%d), not of valid value (0/1), so it was " >>+ "set to 0\n", >>+ primary_lazy); >>+ primary_lazy = 1; >>+ } >>+ } >>+ >> if (fail_over_mac) { >> fail_over_mac_value = bond_parse_parm(fail_over_mac, >> fail_over_mac_tbl); >>@@ -5018,6 +5040,7 @@ static int bond_check_params(struct bond_params *params) >> params->use_carrier = use_carrier; >> params->lacp_fast = lacp_fast; >> params->primary[0] = 0; >>+ params->primary_lazy = primary_lazy; >> params->fail_over_mac = fail_over_mac_value; >> >> if (primary) { >>diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c >>index 55bf34f..573fe82 100644 >>--- a/drivers/net/bonding/bond_sysfs.c >>+++ b/drivers/net/bonding/bond_sysfs.c >>@@ -1209,6 +1209,59 @@ static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR, >> bonding_show_primary, bonding_store_primary); >> >> /* >>+ * Show and set the primary_lazy flag. >>+ */ >>+static ssize_t bonding_show_primary_lazy(struct device *d, >>+ struct device_attribute *attr, >>+ char *buf) >>+{ >>+ struct bonding *bond = to_bond(d); >>+ >>+ return sprintf(buf, "%d\n", bond->params.primary_lazy); >>+} >>+ >>+static ssize_t bonding_store_primary_lazy(struct device *d, >>+ struct device_attribute *attr, >>+ const char *buf, size_t count) >>+{ >>+ int new_value, ret = count; >>+ struct bonding *bond = to_bond(d); >>+ >>+ if (!rtnl_trylock()) >>+ return restart_syscall(); >>+ >>+ if (sscanf(buf, "%d", &new_value) != 1) { >>+ pr_err(DRV_NAME >>+ ": %s: no primary_lazy value specified.\n", >>+ bond->dev->name); >>+ ret = -EINVAL; >>+ goto out; >>+ } >>+ if ((new_value == 0) || (new_value == 1)) { >>+ bond->params.primary_lazy = new_value; >>+ pr_info(DRV_NAME ": %s: Setting primary_lazy to %d.\n", >>+ bond->dev->name, new_value); >>+ if (!bond->params.primary_lazy) { >>+ bond->force_primary = true; >>+ read_lock(&bond->lock); >>+ write_lock_bh(&bond->curr_slave_lock); >>+ bond_select_active_slave(bond); >>+ write_unlock_bh(&bond->curr_slave_lock); >>+ read_unlock(&bond->lock); >>+ } >>+ } else { >>+ pr_info(DRV_NAME >>+ ": %s: Ignoring invalid primary_lazy value %d.\n", >>+ bond->dev->name, new_value); >>+ } >>+out: >>+ rtnl_unlock(); >>+ return count; >>+} >>+static DEVICE_ATTR(primary_lazy, S_IRUGO | S_IWUSR, >>+ bonding_show_primary_lazy, bonding_store_primary_lazy); >>+ >>+/* >> * Show and set the use_carrier flag. >> */ >> static ssize_t bonding_show_carrier(struct device *d, >>@@ -1497,6 +1550,7 @@ static struct attribute *per_bond_attrs[] = { >> &dev_attr_num_unsol_na.attr, >> &dev_attr_miimon.attr, >> &dev_attr_primary.attr, >>+ &dev_attr_primary_lazy.attr, >> &dev_attr_use_carrier.attr, >> &dev_attr_active_slave.attr, >> &dev_attr_mii_status.attr, >>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h >>index 6290a50..ac35c6c 100644 >>--- a/drivers/net/bonding/bonding.h >>+++ b/drivers/net/bonding/bonding.h >>@@ -131,6 +131,7 @@ struct bond_params { >> int lacp_fast; >> int ad_select; >> char primary[IFNAMSIZ]; >>+ int primary_lazy; >> __be32 arp_targets[BOND_MAX_ARP_TARGETS]; >> }; >> >>@@ -190,6 +191,7 @@ struct bonding { >> struct slave *curr_active_slave; >> struct slave *current_arp_slave; >> struct slave *primary_slave; >>+ bool force_primary; >> s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ >> rwlock_t lock; >> rwlock_t curr_slave_lock; > >--- > -Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote: > Jiri Pirko wrote: >> In some cases there is not desirable to switch back to primary interface when >> it's link recovers and rather stay wiith currently active one. We need to avoid >> packetloss as much as we can in some cases. This is solved by introducing >> primary_lazy option. Note that enslaved primary slave is set as current >> active no matter what. > > May I suggest that instead of creating a new option to better define how > the "primary" option is expected to behave for active-backup mode, we > try the "weight" slave option I proposed in the thread "alternative to > primary" earlier this year ? > > http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel This link does not work for me :( > > Giving the same "weight" to two different slaves means "chose at random > on startup and keep the active one until it fails". And if the "at > random" behavior is not appropriate, one can force the active slave > using what Jay suggested (/sys/class/net/bond0/bonding/active). > > The proposed "weight" slave's option is able to prevent the slaves from > flip-flopping, by stating the fact that two slaves share the same > "primary" level, and may provide several other enhancements as described > in the thread. > Although I cannot reach the thread, this looks interesting. But I'm not sure it has real benefits over primary_lazy option (and it doesn't solve initial curr active slave setup) Jirka > Hence, it is a more general configuration interface than what you > proposed. I must admit that despite the fact that I suggested this in > april, I didn't posted any patch for it until now. Unfortunately, > I'didn't had time for it and probably not the proper skills anyway :-). > > Nicolas. > > > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Jiri Pirko wrote: > Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote: >> Jiri Pirko wrote: >>> In some cases there is not desirable to switch back to primary interface when >>> it's link recovers and rather stay wiith currently active one. We need to avoid >>> packetloss as much as we can in some cases. This is solved by introducing >>> primary_lazy option. Note that enslaved primary slave is set as current >>> active no matter what. >> May I suggest that instead of creating a new option to better define how >> the "primary" option is expected to behave for active-backup mode, we >> try the "weight" slave option I proposed in the thread "alternative to >> primary" earlier this year ? >> >> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel > > This link does not work for me :( Nor for me... Sourceforge apparently decided to drop the bonding-devel list archive just now. 'hope the list archive will be back soon. Originally, the proposed "weight" option for slaves was designed just to provide a way to better define which slave should become active when the active one just went down. As you know, the current "primary" option does not allow for a predictable selection of the new active slave when the primary loose connectivity. The new active slave is chosen "at random" between the remaining slaves. After a short thread, involving Jay Vosburg and Andy Gospodarek, we end up with a general configuration interface, that provide a way to tune many things in slave management : - Active slave selection in active/backup mode, even in the presence of more than two slaves. - Active aggregator selection in 802.3ad mode. - Load balancing tuning for most load balancing modes. The sysfs interface would be /sys/class/net/eth0/bonding/weight. Writing a number there would give a "user supplied weight" to a slave. The speed and link state of the slave would give a "natural weight" for the slave. And the "effective weight" would be computed every time one of user supplied or natural weight change (upon speed or link state changes) and would be used everywhere we need a slave weight. I suggest that : - slave's natural weight = speed of the slave if link UP, else 0. - slave's effective weight = slave's natural weight * slave's user supplied weight. - aggregator's effective weight = sum of the effective weights of the slaves inside the aggregator. For the active/backup mode, the exact behavior would be : - When the active slave disappear, the new active slave is the one whose effective weight is the highest. - When a slave comes back, it only becomes active if its effective weight is strictly higher than the one of the current active slave. (This stop the flip-flop risk you stated). - To keep the old "primary" option, we simply give a very high user supplied weight to the primary slave. Jay suggested : #define BOND_PRIMARY_PRIO 0x80000000 user_supplied_weight &= BOND_PRIMARY_PRIO /* to set the primary */ user_supplied_weight &= ~BOND_PRIMAY_PRIO /* to clear the primary */ The same apply to aggregator : Every time a slave enter (link UP) or leave (link DOWN) an aggregator, the aggregator effective weight is recomputed. Then, if an aggregator exist with an strictly higher effective weight than the current active one, the new best aggregator becomes active. For others modes, the weight might be used later to tune the load balancing logic in some way. A default value of 1 for slave weight would cause slave speed to be used alone, hence the "natural weight". >> Giving the same "weight" to two different slaves means "chose at random >> on startup and keep the active one until it fails". And if the "at >> random" behavior is not appropriate, one can force the active slave >> using what Jay suggested (/sys/class/net/bond0/bonding/active). >> >> The proposed "weight" slave's option is able to prevent the slaves from >> flip-flopping, by stating the fact that two slaves share the same >> "primary" level, and may provide several other enhancements as described >> in the thread. >> > > Although I cannot reach the thread, this looks interesting. But I'm not sure it > has real benefits over primary_lazy option (and it doesn't solve initial curr > active slave setup) You are right, it doesn't solve the initial active slave selection. But why would it be so important to properly select the initial active slave, if you feel comfortable with staying with a new active slave, after a failure and return of the original active slave ? This kind of failures may last for only a few seconds (just unplugging and plugging back the wire), and you configuration may then stay with the new active slave "forever". If "forever" is acceptable, may be "at startup" is acceptable too. :-) From my point of view (and Andy Gospodarek apparently agreed), the real benefits of the weight slave option is that is it more generic and allow for later usage in other modes, that we don't anticipate for now. Quoted from a mail from Andy Gospodarek in the original thread : "I really have no objection to that. Adding this as a base part of bonding for a few modes with known features would be a nice start. I'm sure others will be kind enough to send suggestions or patches for ways this could benefit other modes." Nicolas. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Fri, Aug 14, 2009 at 06:27:03PM CEST, nicolas.2p.debian@free.fr wrote: > Jiri Pirko wrote: >> Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote: >>> Jiri Pirko wrote: >>>> In some cases there is not desirable to switch back to primary interface when >>>> it's link recovers and rather stay wiith currently active one. We need to avoid >>>> packetloss as much as we can in some cases. This is solved by introducing >>>> primary_lazy option. Note that enslaved primary slave is set as current >>>> active no matter what. >>> May I suggest that instead of creating a new option to better define how >>> the "primary" option is expected to behave for active-backup mode, we >>> try the "weight" slave option I proposed in the thread "alternative >>> to primary" earlier this year ? >>> >>> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel >> >> This link does not work for me :( > > Nor for me... Sourceforge apparently decided to drop the bonding-devel > list archive just now. 'hope the list archive will be back soon. > > Originally, the proposed "weight" option for slaves was designed just to > provide a way to better define which slave should become active when the > active one just went down. As you know, the current "primary" option > does not allow for a predictable selection of the new active slave when > the primary loose connectivity. The new active slave is chosen "at > random" between the remaining slaves. > > After a short thread, involving Jay Vosburg and Andy Gospodarek, we end > up with a general configuration interface, that provide a way to tune > many things in slave management : > > - Active slave selection in active/backup mode, even in the presence of > more than two slaves. > - Active aggregator selection in 802.3ad mode. > - Load balancing tuning for most load balancing modes. > > The sysfs interface would be /sys/class/net/eth0/bonding/weight. Writing > a number there would give a "user supplied weight" to a slave. The speed > and link state of the slave would give a "natural weight" for the slave. > And the "effective weight" would be computed every time one of user > supplied or natural weight change (upon speed or link state changes) and > would be used everywhere we need a slave weight. > > I suggest that : > - slave's natural weight = speed of the slave if link UP, else 0. > - slave's effective weight = slave's natural weight * slave's user > supplied weight. > - aggregator's effective weight = sum of the effective weights of the > slaves inside the aggregator. > > For the active/backup mode, the exact behavior would be : > > - When the active slave disappear, the new active slave is the one whose > effective weight is the highest. > - When a slave comes back, it only becomes active if its effective > weight is strictly higher than the one of the current active slave. > (This stop the flip-flop risk you stated). > - To keep the old "primary" option, we simply give a very high user > supplied weight to the primary slave. Jay suggested : > #define BOND_PRIMARY_PRIO 0x80000000 > user_supplied_weight &= BOND_PRIMARY_PRIO /* to set the primary */ > user_supplied_weight &= ~BOND_PRIMAY_PRIO /* to clear the primary */ > > The same apply to aggregator : Every time a slave enter (link UP) or > leave (link DOWN) an aggregator, the aggregator effective weight is > recomputed. Then, if an aggregator exist with an strictly higher > effective weight than the current active one, the new best aggregator > becomes active. > > For others modes, the weight might be used later to tune the load > balancing logic in some way. > > A default value of 1 for slave weight would cause slave speed to be used > alone, hence the "natural weight". > I read your text and also the original list thread and I must say I see no solution in this "weight" parameter for this issue. Because it's desired for one link to stay active even if second come up, these 2 must have the same weight. But imagine 3 links of the same weight. In that case you cannot insure that the "primary one" will be chosen as active (see my picture in the reply to Jay's post). Correct me if I'm wrong but for that what I want to fix by primary_lazy option, your proposed weight option has no effect. Therefor I still think the primary_lazy is the only solution now. Jirka >>> Giving the same "weight" to two different slaves means "chose at random >>> on startup and keep the active one until it fails". And if the "at >>> random" behavior is not appropriate, one can force the active slave >>> using what Jay suggested (/sys/class/net/bond0/bonding/active). >>> >>> The proposed "weight" slave's option is able to prevent the slaves from >>> flip-flopping, by stating the fact that two slaves share the same >>> "primary" level, and may provide several other enhancements as >>> described in the thread. >>> >> >> Although I cannot reach the thread, this looks interesting. But I'm not sure it >> has real benefits over primary_lazy option (and it doesn't solve initial curr >> active slave setup) > > You are right, it doesn't solve the initial active slave selection. But > why would it be so important to properly select the initial active > slave, if you feel comfortable with staying with a new active slave, > after a failure and return of the original active slave ? This kind of > failures may last for only a few seconds (just unplugging and plugging > back the wire), and you configuration may then stay with the new active > slave "forever". If "forever" is acceptable, may be "at startup" is > acceptable too. :-) > > From my point of view (and Andy Gospodarek apparently agreed), the real > benefits of the weight slave option is that is it more generic and allow > for later usage in other modes, that we don't anticipate for now. > > Quoted from a mail from Andy Gospodarek in the original thread : > > "I really have no objection to that. Adding this as a base part of > bonding for a few modes with known features would be a nice start. > I'm sure others will be kind enough to send suggestions or patches for > ways this could benefit other modes." > > Nicolas. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Jiri Pirko a écrit : > Fri, Aug 14, 2009 at 06:27:03PM CEST, nicolas.2p.debian@free.fr wrote: >> Jiri Pirko wrote: >>> Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote: >>>> Jiri Pirko wrote: >>>>> In some cases there is not desirable to switch back to primary interface when >>>>> it's link recovers and rather stay wiith currently active one. We need to avoid >>>>> packetloss as much as we can in some cases. This is solved by introducing >>>>> primary_lazy option. Note that enslaved primary slave is set as current >>>>> active no matter what. >>>> May I suggest that instead of creating a new option to better define how >>>> the "primary" option is expected to behave for active-backup mode, we >>>> try the "weight" slave option I proposed in the thread "alternative >>>> to primary" earlier this year ? >>>> >>>> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel >>> This link does not work for me :( >> Nor for me... Sourceforge apparently decided to drop the bonding-devel >> list archive just now. 'hope the list archive will be back soon. >> >> Originally, the proposed "weight" option for slaves was designed just to >> provide a way to better define which slave should become active when the >> active one just went down. As you know, the current "primary" option >> does not allow for a predictable selection of the new active slave when >> the primary loose connectivity. The new active slave is chosen "at >> random" between the remaining slaves. >> >> After a short thread, involving Jay Vosburg and Andy Gospodarek, we end >> up with a general configuration interface, that provide a way to tune >> many things in slave management : >> >> - Active slave selection in active/backup mode, even in the presence of >> more than two slaves. >> - Active aggregator selection in 802.3ad mode. >> - Load balancing tuning for most load balancing modes. >> >> The sysfs interface would be /sys/class/net/eth0/bonding/weight. Writing >> a number there would give a "user supplied weight" to a slave. The speed >> and link state of the slave would give a "natural weight" for the slave. >> And the "effective weight" would be computed every time one of user >> supplied or natural weight change (upon speed or link state changes) and >> would be used everywhere we need a slave weight. >> >> I suggest that : >> - slave's natural weight = speed of the slave if link UP, else 0. >> - slave's effective weight = slave's natural weight * slave's user >> supplied weight. >> - aggregator's effective weight = sum of the effective weights of the >> slaves inside the aggregator. >> >> For the active/backup mode, the exact behavior would be : >> >> - When the active slave disappear, the new active slave is the one whose >> effective weight is the highest. >> - When a slave comes back, it only becomes active if its effective >> weight is strictly higher than the one of the current active slave. >> (This stop the flip-flop risk you stated). >> - To keep the old "primary" option, we simply give a very high user >> supplied weight to the primary slave. Jay suggested : >> #define BOND_PRIMARY_PRIO 0x80000000 >> user_supplied_weight &= BOND_PRIMARY_PRIO /* to set the primary */ >> user_supplied_weight &= ~BOND_PRIMAY_PRIO /* to clear the primary */ >> >> The same apply to aggregator : Every time a slave enter (link UP) or >> leave (link DOWN) an aggregator, the aggregator effective weight is >> recomputed. Then, if an aggregator exist with an strictly higher >> effective weight than the current active one, the new best aggregator >> becomes active. >> >> For others modes, the weight might be used later to tune the load >> balancing logic in some way. >> >> A default value of 1 for slave weight would cause slave speed to be used >> alone, hence the "natural weight". >> > > I read your text and also the original list thread and I must say I see no > solution in this "weight" parameter for this issue. Because it's desired for one > link to stay active even if second come up, these 2 must have the same weight. > But imagine 3 links of the same weight. In that case you cannot insure that the > "primary one" will be chosen as active (see my picture in the reply to Jay's > post). Correct me if I'm wrong but for that what I want to fix by primary_lazy > option, your proposed weight option has no effect. > > Therefor I still think the primary_lazy is the only solution now. > > Jirka Hi Jirka, From your previous posts (first one and reply to Jay), I understand that your want to achieve the following behavior : eth0 is primary and active. eth1 is allowed to be active is eth0 is down. Also, eth1 should stay active, even if eth0 comes back up. Switch active to eth0 if eth1 eventually fall down. Switch active to eth2 only if both eth0 and eth1 are down. eth0 eth1 eth2 UP(curr) UP UP DOWN UP(curr) UP UP UP(curr) UP UP(curr) DOWN UP DOWN DOWN UP(curr) Using weight, the following setup should give this result : echo 1000 > /sys/class/net/eth0/bonding/weight echo 1000 > /sys/class/net/eth1/bonding/weight echo 1 > /sys/class/net/eth2/bonding/weight echo eth0 > /sys/class/net/bond0/bonding/active_slave I hope this is clear now. Nicolas. > >>>> Giving the same "weight" to two different slaves means "chose at random >>>> on startup and keep the active one until it fails". And if the "at >>>> random" behavior is not appropriate, one can force the active slave >>>> using what Jay suggested (/sys/class/net/bond0/bonding/active). >>>> >>>> The proposed "weight" slave's option is able to prevent the slaves from >>>> flip-flopping, by stating the fact that two slaves share the same >>>> "primary" level, and may provide several other enhancements as >>>> described in the thread. >>>> >>> Although I cannot reach the thread, this looks interesting. But I'm not sure it >>> has real benefits over primary_lazy option (and it doesn't solve initial curr >>> active slave setup) >> You are right, it doesn't solve the initial active slave selection. But >> why would it be so important to properly select the initial active >> slave, if you feel comfortable with staying with a new active slave, >> after a failure and return of the original active slave ? This kind of >> failures may last for only a few seconds (just unplugging and plugging >> back the wire), and you configuration may then stay with the new active >> slave "forever". If "forever" is acceptable, may be "at startup" is >> acceptable too. :-) >> >> From my point of view (and Andy Gospodarek apparently agreed), the real >> benefits of the weight slave option is that is it more generic and allow >> for later usage in other modes, that we don't anticipate for now. >> >> Quoted from a mail from Andy Gospodarek in the original thread : >> >> "I really have no objection to that. Adding this as a base part of >> bonding for a few modes with known features would be a nice start. >> I'm sure others will be kind enough to send suggestions or patches for >> ways this could benefit other modes." >> >> Nicolas. > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Mon, Aug 17, 2009 at 10:55:13PM CEST, nicolas.2p.debian@free.fr wrote: > Jiri Pirko a écrit : >> Fri, Aug 14, 2009 at 06:27:03PM CEST, nicolas.2p.debian@free.fr wrote: >>> Jiri Pirko wrote: >>>> Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote: >>>>> Jiri Pirko wrote: >>>>>> In some cases there is not desirable to switch back to primary interface when >>>>>> it's link recovers and rather stay wiith currently active one. We need to avoid >>>>>> packetloss as much as we can in some cases. This is solved by introducing >>>>>> primary_lazy option. Note that enslaved primary slave is set as current >>>>>> active no matter what. >>>>> May I suggest that instead of creating a new option to better define how >>>>> the "primary" option is expected to behave for active-backup >>>>> mode, we try the "weight" slave option I proposed in the thread >>>>> "alternative to primary" earlier this year ? >>>>> >>>>> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel >>>> This link does not work for me :( >>> Nor for me... Sourceforge apparently decided to drop the >>> bonding-devel list archive just now. 'hope the list archive will be >>> back soon. >>> >>> Originally, the proposed "weight" option for slaves was designed just >>> to provide a way to better define which slave should become active >>> when the active one just went down. As you know, the current >>> "primary" option does not allow for a predictable selection of the >>> new active slave when the primary loose connectivity. The new active >>> slave is chosen "at random" between the remaining slaves. >>> >>> After a short thread, involving Jay Vosburg and Andy Gospodarek, we >>> end up with a general configuration interface, that provide a way to >>> tune many things in slave management : >>> >>> - Active slave selection in active/backup mode, even in the presence >>> of more than two slaves. >>> - Active aggregator selection in 802.3ad mode. >>> - Load balancing tuning for most load balancing modes. >>> >>> The sysfs interface would be /sys/class/net/eth0/bonding/weight. >>> Writing a number there would give a "user supplied weight" to a >>> slave. The speed and link state of the slave would give a "natural >>> weight" for the slave. And the "effective weight" would be computed >>> every time one of user supplied or natural weight change (upon speed >>> or link state changes) and would be used everywhere we need a slave >>> weight. >>> >>> I suggest that : >>> - slave's natural weight = speed of the slave if link UP, else 0. >>> - slave's effective weight = slave's natural weight * slave's user >>> supplied weight. >>> - aggregator's effective weight = sum of the effective weights of the >>> slaves inside the aggregator. >>> >>> For the active/backup mode, the exact behavior would be : >>> >>> - When the active slave disappear, the new active slave is the one >>> whose effective weight is the highest. >>> - When a slave comes back, it only becomes active if its effective >>> weight is strictly higher than the one of the current active slave. >>> (This stop the flip-flop risk you stated). >>> - To keep the old "primary" option, we simply give a very high user >>> supplied weight to the primary slave. Jay suggested : >>> #define BOND_PRIMARY_PRIO 0x80000000 >>> user_supplied_weight &= BOND_PRIMARY_PRIO /* to set the primary */ >>> user_supplied_weight &= ~BOND_PRIMAY_PRIO /* to clear the primary */ >>> >>> The same apply to aggregator : Every time a slave enter (link UP) or >>> leave (link DOWN) an aggregator, the aggregator effective weight is >>> recomputed. Then, if an aggregator exist with an strictly higher >>> effective weight than the current active one, the new best aggregator >>> becomes active. >>> >>> For others modes, the weight might be used later to tune the load >>> balancing logic in some way. >>> >>> A default value of 1 for slave weight would cause slave speed to be >>> used alone, hence the "natural weight". >>> >> >> I read your text and also the original list thread and I must say I see no >> solution in this "weight" parameter for this issue. Because it's desired for one >> link to stay active even if second come up, these 2 must have the same weight. >> But imagine 3 links of the same weight. In that case you cannot insure that the >> "primary one" will be chosen as active (see my picture in the reply to Jay's >> post). Correct me if I'm wrong but for that what I want to fix by primary_lazy >> option, your proposed weight option has no effect. >> >> Therefor I still think the primary_lazy is the only solution now. >> >> Jirka > > Hi Jirka, > > From your previous posts (first one and reply to Jay), I understand that > your want to achieve the following behavior : > > eth0 is primary and active. > eth1 is allowed to be active is eth0 is down. > Also, eth1 should stay active, even if eth0 comes back up. > Switch active to eth0 if eth1 eventually fall down. > Switch active to eth2 only if both eth0 and eth1 are down. > > eth0 eth1 eth2 > UP(curr) UP UP > DOWN UP(curr) UP > UP UP(curr) UP > UP(curr) DOWN UP > DOWN DOWN UP(curr) > > Using weight, the following setup should give this result : > > echo 1000 > /sys/class/net/eth0/bonding/weight > echo 1000 > /sys/class/net/eth1/bonding/weight > echo 1 > /sys/class/net/eth2/bonding/weight > echo eth0 > /sys/class/net/bond0/bonding/active_slave > > I hope this is clear now. Hmm... I ment the eth1 and eth2 to be the equivalent... If eth1 is down (let's say for good) and eth0 comes down, eth2 is selected as current active. But when eth0 comes up then eth0 is selected. That is not desired. > > Nicolas. > >> >>>>> Giving the same "weight" to two different slaves means "chose at random >>>>> on startup and keep the active one until it fails". And if the "at >>>>> random" behavior is not appropriate, one can force the active slave >>>>> using what Jay suggested (/sys/class/net/bond0/bonding/active). >>>>> >>>>> The proposed "weight" slave's option is able to prevent the slaves from >>>>> flip-flopping, by stating the fact that two slaves share the same >>>>> "primary" level, and may provide several other enhancements as >>>>> described in the thread. >>>>> >>>> Although I cannot reach the thread, this looks interesting. But I'm not sure it >>>> has real benefits over primary_lazy option (and it doesn't solve initial curr >>>> active slave setup) >>> You are right, it doesn't solve the initial active slave selection. >>> But why would it be so important to properly select the initial >>> active slave, if you feel comfortable with staying with a new active >>> slave, after a failure and return of the original active slave ? >>> This kind of failures may last for only a few seconds (just >>> unplugging and plugging back the wire), and you configuration may >>> then stay with the new active slave "forever". If "forever" is >>> acceptable, may be "at startup" is acceptable too. :-) >>> >>> From my point of view (and Andy Gospodarek apparently agreed), the >>> real benefits of the weight slave option is that is it more generic >>> and allow for later usage in other modes, that we don't anticipate >>> for now. >>> >>> Quoted from a mail from Andy Gospodarek in the original thread : >>> >>> "I really have no objection to that. Adding this as a base part of >>> bonding for a few modes with known features would be a nice start. >>> I'm sure others will be kind enough to send suggestions or patches for >>> ways this could benefit other modes." >>> >>> Nicolas. >> > > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Jiri Pirko awrote: > Mon, Aug 17, 2009 at 10:55:13PM CEST, nicolas.2p.debian@free.fr wrote: >> Jiri Pirko wrote: >>> Fri, Aug 14, 2009 at 06:27:03PM CEST, nicolas.2p.debian@free.fr wrote: >>>> Jiri Pirko wrote: >>>>> Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote: >>>>>> Jiri Pirko wrote: >>>>>>> In some cases there is not desirable to switch back to primary interface when >>>>>>> it's link recovers and rather stay wiith currently active one. We need to avoid >>>>>>> packetloss as much as we can in some cases. This is solved by introducing >>>>>>> primary_lazy option. Note that enslaved primary slave is set as current >>>>>>> active no matter what. >>>>>> May I suggest that instead of creating a new option to better define how >>>>>> the "primary" option is expected to behave for active-backup >>>>>> mode, we try the "weight" slave option I proposed in the thread >>>>>> "alternative to primary" earlier this year ? >>>>>> >>>>>> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel >>>>> This link does not work for me :( >>>> Nor for me... Sourceforge apparently decided to drop the >>>> bonding-devel list archive just now. 'hope the list archive will be >>>> back soon. >>>> >>>> Originally, the proposed "weight" option for slaves was designed just >>>> to provide a way to better define which slave should become active >>>> when the active one just went down. As you know, the current >>>> "primary" option does not allow for a predictable selection of the >>>> new active slave when the primary loose connectivity. The new active >>>> slave is chosen "at random" between the remaining slaves. >>>> >>>> After a short thread, involving Jay Vosburg and Andy Gospodarek, we >>>> end up with a general configuration interface, that provide a way to >>>> tune many things in slave management : >>>> >>>> - Active slave selection in active/backup mode, even in the presence >>>> of more than two slaves. >>>> - Active aggregator selection in 802.3ad mode. >>>> - Load balancing tuning for most load balancing modes. >>>> >>>> The sysfs interface would be /sys/class/net/eth0/bonding/weight. >>>> Writing a number there would give a "user supplied weight" to a >>>> slave. The speed and link state of the slave would give a "natural >>>> weight" for the slave. And the "effective weight" would be computed >>>> every time one of user supplied or natural weight change (upon speed >>>> or link state changes) and would be used everywhere we need a slave >>>> weight. >>>> >>>> I suggest that : >>>> - slave's natural weight = speed of the slave if link UP, else 0. >>>> - slave's effective weight = slave's natural weight * slave's user >>>> supplied weight. >>>> - aggregator's effective weight = sum of the effective weights of the >>>> slaves inside the aggregator. >>>> >>>> For the active/backup mode, the exact behavior would be : >>>> >>>> - When the active slave disappear, the new active slave is the one >>>> whose effective weight is the highest. >>>> - When a slave comes back, it only becomes active if its effective >>>> weight is strictly higher than the one of the current active slave. >>>> (This stop the flip-flop risk you stated). >>>> - To keep the old "primary" option, we simply give a very high user >>>> supplied weight to the primary slave. Jay suggested : >>>> #define BOND_PRIMARY_PRIO 0x80000000 >>>> user_supplied_weight &= BOND_PRIMARY_PRIO /* to set the primary */ >>>> user_supplied_weight &= ~BOND_PRIMAY_PRIO /* to clear the primary */ >>>> >>>> The same apply to aggregator : Every time a slave enter (link UP) or >>>> leave (link DOWN) an aggregator, the aggregator effective weight is >>>> recomputed. Then, if an aggregator exist with an strictly higher >>>> effective weight than the current active one, the new best aggregator >>>> becomes active. >>>> >>>> For others modes, the weight might be used later to tune the load >>>> balancing logic in some way. >>>> >>>> A default value of 1 for slave weight would cause slave speed to be >>>> used alone, hence the "natural weight". >>>> >>> I read your text and also the original list thread and I must say I see no >>> solution in this "weight" parameter for this issue. Because it's desired for one >>> link to stay active even if second come up, these 2 must have the same weight. >>> But imagine 3 links of the same weight. In that case you cannot insure that the >>> "primary one" will be chosen as active (see my picture in the reply to Jay's >>> post). Correct me if I'm wrong but for that what I want to fix by primary_lazy >>> option, your proposed weight option has no effect. >>> >>> Therefor I still think the primary_lazy is the only solution now. >>> >>> Jirka >> Hi Jirka, >> >> From your previous posts (first one and reply to Jay), I understand that >> your want to achieve the following behavior : >> >> eth0 is primary and active. >> eth1 is allowed to be active is eth0 is down. >> Also, eth1 should stay active, even if eth0 comes back up. >> Switch active to eth0 if eth1 eventually fall down. >> Switch active to eth2 only if both eth0 and eth1 are down. >> >> eth0 eth1 eth2 >> UP(curr) UP UP >> DOWN UP(curr) UP >> UP UP(curr) UP >> UP(curr) DOWN UP >> DOWN DOWN UP(curr) >> >> Using weight, the following setup should give this result : >> >> echo 1000 > /sys/class/net/eth0/bonding/weight >> echo 1000 > /sys/class/net/eth1/bonding/weight >> echo 1 > /sys/class/net/eth2/bonding/weight >> echo eth0 > /sys/class/net/bond0/bonding/active_slave >> >> I hope this is clear now. > > Hmm... I ment the eth1 and eth2 to be the equivalent... > If eth1 is down (let's say for good) and eth0 comes down, eth2 is > selected as current active. But when eth0 comes up then eth0 is selected. That > is not desired. OK, now I think I really understand your exact requirement. You want the ability to keep the current active slave active, even if a better slave comes back up, so the only reason for the active slave to change would be that the current active slave falls down: eth0 eth1 eth2 UP(curr) UP UP DOWN UP(curr) UP UP UP(curr) UP UP(curr) DOWN UP DOWN DOWN UP(curr) UP DOWN UP(curr) <- But at the same time, you still need the ability to properly select the best new active slave when the current one falls down, hence your answer in reply to Jay's proposal: > But imagine you have bond with 3 slaves: > eth0 eth1 eth2 > UP(curr) UP UP > DOWN UP(curr) UP > UP UP(curr) UP > UP DOWN UP(curr) > eth2 ends up being current active but we prefer eth0 (as > primary interface). > This is not desirable and is solved by primary_lazy option. I think your proposed "primary_lazy" option suffer some limits and should not be a per bond option but a per slave option. You are right that some slave should be able to be "sticky" when active, in order to reduce packets loose when switching. But due to performance reason, it might be desirable to say that some other slaves are not "sticky" when active, in the same configuration. Let's imagine the following configuration : eth0: 1 Gb/s - primary eth1: 1 Gb/s eth2: 100 Mb/s With "primary_lazy=1, eth2 has a chance to stay active, after eth0 and eth1 both failed at the same time. The risk of loosing a few packets while switching back from eth2 to eth0 or eth1 might be seen acceptable, compared to sticking to a 100 Mb/s interface when a 1 Gb/s interface is available. Due to eth2 speed, one might want to have the following behavior: If eth1 is active, keep it active, even if eth0 comes back up. But if eth2 is active, switch to any better slave right at the time one comes back up. I suggest that instead of having a per bond "primary_lazy" option, we define a per slave option, describing whether this particular slave is "sticky when active" or not. The above setup would become : echo 1 > /sys/class/net/eth0/bonding/sticky_active echo 1 > /sys/class/net/eth1/bonding/sticky_active echo 0 > /sys/class/net/eth2/bonding/sticky_active echo eth0 > /sys/class/net/bond0/bonding/primary Or may be better, keeping the "weight" idea in mind, a per slave option "active_weight" that gives the weight of the slave, *when active*. The effective weight of a slave would become : effective_slave = (is_active ? user_supplied_active_weight ? user_supplied_weight) * natural_weight # Prefer eth0, then one of eth1 or eth2, then eth3. echo 1000 > /sys/class/net/eth0/bonding/weight echo 999 > /sys/class/net/eth1/bonding/weight echo 999 > /sys/class/net/eth2/bonding/weight echo 10 > /sys/class/net/eth3/bonding/weight # Do not switch back to primary eth0 if eth1 or eth2 is active. echo 1000 > /sys/class/net/eth1/bonding/active_weight echo 1000 > /sys/class/net/eth2/bonding/active_weight Every time one changes the user_supplied_weight, then user_supplied_active_weight must be reset to the same value. This way, if no special setup is done on active_weight, then the current normal behavior is achieved. If none of those options seem acceptable to you, I suggest a third one: You keep primary_lazy, but with the following values : # Switch back to primary slaves when it comes back. echo 0 > /sys/class/net/bond0/bonding/primary_lazy # Switch back to primary when it comes back, only if the speed of the # primary slave is higher than the speed of the current active slave. echo 1 > /sys/class/net/bond0/bonding/primary_lazy # Stick to the current active slave when the primary slave comes back, # even if the primary slave speed is higher than the speed of the # current active slave. echo 2 > /sys/class/net/bond0/bonding/primary_lazy You can consider the value as being the level of laziness of the primary. Nicolas. >>>>>> Giving the same "weight" to two different slaves means "chose at random >>>>>> on startup and keep the active one until it fails". And if the "at >>>>>> random" behavior is not appropriate, one can force the active slave >>>>>> using what Jay suggested (/sys/class/net/bond0/bonding/active). >>>>>> >>>>>> The proposed "weight" slave's option is able to prevent the slaves from >>>>>> flip-flopping, by stating the fact that two slaves share the same >>>>>> "primary" level, and may provide several other enhancements as >>>>>> described in the thread. >>>>>> >>>>> Although I cannot reach the thread, this looks interesting. But I'm not sure it >>>>> has real benefits over primary_lazy option (and it doesn't solve initial curr >>>>> active slave setup) >>>> You are right, it doesn't solve the initial active slave selection. >>>> But why would it be so important to properly select the initial >>>> active slave, if you feel comfortable with staying with a new active >>>> slave, after a failure and return of the original active slave ? >>>> This kind of failures may last for only a few seconds (just >>>> unplugging and plugging back the wire), and you configuration may >>>> then stay with the new active slave "forever". If "forever" is >>>> acceptable, may be "at startup" is acceptable too. :-) >>>> >>>> From my point of view (and Andy Gospodarek apparently agreed), the >>>> real benefits of the weight slave option is that is it more generic >>>> and allow for later usage in other modes, that we don't anticipate >>>> for now. >>>> >>>> Quoted from a mail from Andy Gospodarek in the original thread : >>>> >>>> "I really have no objection to that. Adding this as a base part of >>>> bonding for a few modes with known features would be a nice start. >>>> I'm sure others will be kind enough to send suggestions or patches for >>>> ways this could benefit other modes." -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Thu, Aug 20, 2009 at 02:40:07PM CEST, nicolas.2p.debian@free.fr wrote: > Jiri Pirko awrote: >> Mon, Aug 17, 2009 at 10:55:13PM CEST, nicolas.2p.debian@free.fr wrote: >>> Jiri Pirko wrote: >>>> Fri, Aug 14, 2009 at 06:27:03PM CEST, nicolas.2p.debian@free.fr wrote: >>>>> Jiri Pirko wrote: >>>>>> Thu, Aug 13, 2009 at 09:41:02PM CEST, nicolas.2p.debian@free.fr wrote: >>>>>>> Jiri Pirko wrote: >>>>>>>> In some cases there is not desirable to switch back to primary interface when >>>>>>>> it's link recovers and rather stay wiith currently active one. We need to avoid >>>>>>>> packetloss as much as we can in some cases. This is solved by introducing >>>>>>>> primary_lazy option. Note that enslaved primary slave is set as current >>>>>>>> active no matter what. >>>>>>> May I suggest that instead of creating a new option to better define how >>>>>>> the "primary" option is expected to behave for active-backup >>>>>>> mode, we try the "weight" slave option I proposed in the >>>>>>> thread "alternative to primary" earlier this year ? >>>>>>> >>>>>>> http://sourceforge.net/mailarchive/forum.php?thread_name=49D5357E.4020201%40free.fr&forum_name=bonding-devel >>>>>> This link does not work for me :( >>>>> Nor for me... Sourceforge apparently decided to drop the >>>>> bonding-devel list archive just now. 'hope the list archive will >>>>> be back soon. >>>>> >>>>> Originally, the proposed "weight" option for slaves was designed >>>>> just to provide a way to better define which slave should become >>>>> active when the active one just went down. As you know, the >>>>> current "primary" option does not allow for a predictable >>>>> selection of the new active slave when the primary loose >>>>> connectivity. The new active slave is chosen "at random" between >>>>> the remaining slaves. >>>>> >>>>> After a short thread, involving Jay Vosburg and Andy Gospodarek, >>>>> we end up with a general configuration interface, that provide a >>>>> way to tune many things in slave management : >>>>> >>>>> - Active slave selection in active/backup mode, even in the >>>>> presence of more than two slaves. >>>>> - Active aggregator selection in 802.3ad mode. >>>>> - Load balancing tuning for most load balancing modes. >>>>> >>>>> The sysfs interface would be /sys/class/net/eth0/bonding/weight. >>>>> Writing a number there would give a "user supplied weight" to a >>>>> slave. The speed and link state of the slave would give a >>>>> "natural weight" for the slave. And the "effective weight" would >>>>> be computed every time one of user supplied or natural weight >>>>> change (upon speed or link state changes) and would be used >>>>> everywhere we need a slave weight. >>>>> >>>>> I suggest that : >>>>> - slave's natural weight = speed of the slave if link UP, else 0. >>>>> - slave's effective weight = slave's natural weight * slave's >>>>> user supplied weight. >>>>> - aggregator's effective weight = sum of the effective weights of >>>>> the slaves inside the aggregator. >>>>> >>>>> For the active/backup mode, the exact behavior would be : >>>>> >>>>> - When the active slave disappear, the new active slave is the >>>>> one whose effective weight is the highest. >>>>> - When a slave comes back, it only becomes active if its >>>>> effective weight is strictly higher than the one of the current >>>>> active slave. (This stop the flip-flop risk you stated). >>>>> - To keep the old "primary" option, we simply give a very high >>>>> user supplied weight to the primary slave. Jay suggested : >>>>> #define BOND_PRIMARY_PRIO 0x80000000 >>>>> user_supplied_weight &= BOND_PRIMARY_PRIO /* to set the primary */ >>>>> user_supplied_weight &= ~BOND_PRIMAY_PRIO /* to clear the primary */ >>>>> >>>>> The same apply to aggregator : Every time a slave enter (link UP) >>>>> or leave (link DOWN) an aggregator, the aggregator effective >>>>> weight is recomputed. Then, if an aggregator exist with an >>>>> strictly higher effective weight than the current active one, >>>>> the new best aggregator becomes active. >>>>> >>>>> For others modes, the weight might be used later to tune the load >>>>> balancing logic in some way. >>>>> >>>>> A default value of 1 for slave weight would cause slave speed to >>>>> be used alone, hence the "natural weight". >>>>> >>>> I read your text and also the original list thread and I must say I see no >>>> solution in this "weight" parameter for this issue. Because it's desired for one >>>> link to stay active even if second come up, these 2 must have the same weight. >>>> But imagine 3 links of the same weight. In that case you cannot insure that the >>>> "primary one" will be chosen as active (see my picture in the reply to Jay's >>>> post). Correct me if I'm wrong but for that what I want to fix by primary_lazy >>>> option, your proposed weight option has no effect. >>>> >>>> Therefor I still think the primary_lazy is the only solution now. >>>> >>>> Jirka >>> Hi Jirka, >>> >>> From your previous posts (first one and reply to Jay), I understand >>> that your want to achieve the following behavior : >>> >>> eth0 is primary and active. >>> eth1 is allowed to be active is eth0 is down. >>> Also, eth1 should stay active, even if eth0 comes back up. >>> Switch active to eth0 if eth1 eventually fall down. >>> Switch active to eth2 only if both eth0 and eth1 are down. >>> >>> eth0 eth1 eth2 >>> UP(curr) UP UP >>> DOWN UP(curr) UP >>> UP UP(curr) UP >>> UP(curr) DOWN UP >>> DOWN DOWN UP(curr) >>> >>> Using weight, the following setup should give this result : >>> >>> echo 1000 > /sys/class/net/eth0/bonding/weight >>> echo 1000 > /sys/class/net/eth1/bonding/weight >>> echo 1 > /sys/class/net/eth2/bonding/weight >>> echo eth0 > /sys/class/net/bond0/bonding/active_slave >>> >>> I hope this is clear now. >> >> Hmm... I ment the eth1 and eth2 to be the equivalent... >> If eth1 is down (let's say for good) and eth0 comes down, eth2 is >> selected as current active. But when eth0 comes up then eth0 is selected. That >> is not desired. > > OK, now I think I really understand your exact requirement. > > You want the ability to keep the current active slave active, even if a > better slave comes back up, so the only reason for the active slave to > change would be that the current active slave falls down: > > eth0 eth1 eth2 > UP(curr) UP UP > DOWN UP(curr) UP > UP UP(curr) UP > UP(curr) DOWN UP > DOWN DOWN UP(curr) > UP DOWN UP(curr) <- > > But at the same time, you still need the ability to properly select the > best new active slave when the current one falls down, hence your answer > in reply to Jay's proposal: > > > But imagine you have bond with 3 slaves: > > eth0 eth1 eth2 > > UP(curr) UP UP > > DOWN UP(curr) UP > > UP UP(curr) UP > > UP DOWN UP(curr) > > > eth2 ends up being current active but we prefer eth0 (as > > primary interface). > > This is not desirable and is solved by primary_lazy option. > > I think your proposed "primary_lazy" option suffer some limits and > should not be a per bond option but a per slave option. > > You are right that some slave should be able to be "sticky" when active, > in order to reduce packets loose when switching. But due to performance > reason, it might be desirable to say that some other slaves are not > "sticky" when active, in the same configuration. > > Let's imagine the following configuration : > > eth0: 1 Gb/s - primary > eth1: 1 Gb/s > eth2: 100 Mb/s > > With "primary_lazy=1, eth2 has a chance to stay active, after eth0 > and eth1 both failed at the same time. The risk of loosing a few packets > while switching back from eth2 to eth0 or eth1 might be seen acceptable, > compared to sticking to a 100 Mb/s interface when a 1 Gb/s interface > is available. > > Due to eth2 speed, one might want to have the following behavior: > > If eth1 is active, keep it active, even if eth0 comes back up. But if > eth2 is active, switch to any better slave right at the time one comes > back up. > > I suggest that instead of having a per bond "primary_lazy" option, we > define a per slave option, describing whether this particular slave is > "sticky when active" or not. > > The above setup would become : > > echo 1 > /sys/class/net/eth0/bonding/sticky_active > echo 1 > /sys/class/net/eth1/bonding/sticky_active > echo 0 > /sys/class/net/eth2/bonding/sticky_active > echo eth0 > /sys/class/net/bond0/bonding/primary > > Or may be better, keeping the "weight" idea in mind, a per slave option > "active_weight" that gives the weight of the slave, *when active*. > > The effective weight of a slave would become : > > effective_slave = > (is_active ? user_supplied_active_weight ? user_supplied_weight) * > natural_weight > > # Prefer eth0, then one of eth1 or eth2, then eth3. > echo 1000 > /sys/class/net/eth0/bonding/weight > echo 999 > /sys/class/net/eth1/bonding/weight > echo 999 > /sys/class/net/eth2/bonding/weight > echo 10 > /sys/class/net/eth3/bonding/weight > > # Do not switch back to primary eth0 if eth1 or eth2 is active. > echo 1000 > /sys/class/net/eth1/bonding/active_weight > echo 1000 > /sys/class/net/eth2/bonding/active_weight > > Every time one changes the user_supplied_weight, then > user_supplied_active_weight must be reset to the same value. This way, > if no special setup is done on active_weight, then the current normal > behavior is achieved. I must say I like this approach. But it would be not trivial to implement this. Therefore I would stick with your propose of extending primary lazy to 3 values until the weight option is implemented. I'm going to implement your propose below. > > If none of those options seem acceptable to you, I suggest a third one: > > You keep primary_lazy, but with the following values : > > # Switch back to primary slaves when it comes back. > echo 0 > /sys/class/net/bond0/bonding/primary_lazy > > # Switch back to primary when it comes back, only if the speed of the > # primary slave is higher than the speed of the current active slave. > echo 1 > /sys/class/net/bond0/bonding/primary_lazy > > # Stick to the current active slave when the primary slave comes back, > # even if the primary slave speed is higher than the speed of the > # current active slave. > echo 2 > /sys/class/net/bond0/bonding/primary_lazy > > You can consider the value as being the level of laziness of the primary. > > Nicolas. > >>>>>>> Giving the same "weight" to two different slaves means "chose at random >>>>>>> on startup and keep the active one until it fails". And if the "at >>>>>>> random" behavior is not appropriate, one can force the active slave >>>>>>> using what Jay suggested (/sys/class/net/bond0/bonding/active). >>>>>>> >>>>>>> The proposed "weight" slave's option is able to prevent the slaves from >>>>>>> flip-flopping, by stating the fact that two slaves share the >>>>>>> same "primary" level, and may provide several other >>>>>>> enhancements as described in the thread. >>>>>>> >>>>>> Although I cannot reach the thread, this looks interesting. But I'm not sure it >>>>>> has real benefits over primary_lazy option (and it doesn't solve initial curr >>>>>> active slave setup) >>>>> You are right, it doesn't solve the initial active slave >>>>> selection. But why would it be so important to properly select >>>>> the initial active slave, if you feel comfortable with staying >>>>> with a new active slave, after a failure and return of the >>>>> original active slave ? This kind of failures may last for only >>>>> a few seconds (just unplugging and plugging back the wire), and >>>>> you configuration may then stay with the new active slave >>>>> "forever". If "forever" is acceptable, may be "at startup" is >>>>> acceptable too. :-) >>>>> >>>>> From my point of view (and Andy Gospodarek apparently agreed), >>>>> the real benefits of the weight slave option is that is it more >>>>> generic and allow for later usage in other modes, that we don't >>>>> anticipate for now. >>>>> >>>>> Quoted from a mail from Andy Gospodarek in the original thread : >>>>> >>>>> "I really have no objection to that. Adding this as a base part of >>>>> bonding for a few modes with known features would be a nice start. >>>>> I'm sure others will be kind enough to send suggestions or patches for >>>>> ways this could benefit other modes." > > > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Jiri Pirko wrote: > Thu, Aug 20, 2009 at 02:40:07PM CEST, nicolas.2p.debian@free.fr wrote: [--cut--] >> I suggest that instead of having a per bond "primary_lazy" option, we >> define a per slave option, describing whether this particular slave is >> "sticky when active" or not. >> >> The above setup would become : >> >> echo 1 > /sys/class/net/eth0/bonding/sticky_active >> echo 1 > /sys/class/net/eth1/bonding/sticky_active >> echo 0 > /sys/class/net/eth2/bonding/sticky_active >> echo eth0 > /sys/class/net/bond0/bonding/primary >> >> Or may be better, keeping the "weight" idea in mind, a per slave option >> "active_weight" that gives the weight of the slave, *when active*. >> >> The effective weight of a slave would become : >> >> effective_slave = >> (is_active ? user_supplied_active_weight ? user_supplied_weight) * >> natural_weight >> >> # Prefer eth0, then one of eth1 or eth2, then eth3. >> echo 1000 > /sys/class/net/eth0/bonding/weight >> echo 999 > /sys/class/net/eth1/bonding/weight >> echo 999 > /sys/class/net/eth2/bonding/weight >> echo 10 > /sys/class/net/eth3/bonding/weight >> >> # Do not switch back to primary eth0 if eth1 or eth2 is active. >> echo 1000 > /sys/class/net/eth1/bonding/active_weight >> echo 1000 > /sys/class/net/eth2/bonding/active_weight >> >> Every time one changes the user_supplied_weight, then >> user_supplied_active_weight must be reset to the same value. This way, >> if no special setup is done on active_weight, then the current normal >> behavior is achieved. > > I must say I like this approach. But it would be not trivial to implement this. > Therefore I would stick with your propose of extending primary lazy to 3 values > until the weight option is implemented. It sounds good for me. Later, if I eventually implement the weight option, it shouldn't be that hard to convert internally the primary_lazy setup to active_weight, the same way we plan to convert internally primary setup to weight setup. primary and primary_lazy are convenient for simple - two slaves only - configurations. weight and active_weight are for more advanced configurations. Keeping both configuration interface does sound user friendly. > I'm going to implement your propose below. By the way, even if I'm not a native English speaker, I think that primary_lazy option should be named lazy_primary instead. Nicolas. >> If none of those options seem acceptable to you, I suggest a third one: >> >> You keep primary_lazy, but with the following values : >> >> # Switch back to primary slaves when it comes back. >> echo 0 > /sys/class/net/bond0/bonding/primary_lazy >> >> # Switch back to primary when it comes back, only if the speed of the >> # primary slave is higher than the speed of the current active slave. >> echo 1 > /sys/class/net/bond0/bonding/primary_lazy >> >> # Stick to the current active slave when the primary slave comes back, >> # even if the primary slave speed is higher than the speed of the >> # current active slave. >> echo 2 > /sys/class/net/bond0/bonding/primary_lazy >> >> You can consider the value as being the level of laziness of the primary. >> >> Nicolas. >> -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Mon, Aug 24, 2009 at 05:07:18PM CEST, nicolas.2p.debian@free.fr wrote: > Jiri Pirko wrote: >> Thu, Aug 20, 2009 at 02:40:07PM CEST, nicolas.2p.debian@free.fr wrote: > [--cut--] >>> I suggest that instead of having a per bond "primary_lazy" option, we >>> define a per slave option, describing whether this particular slave is >>> "sticky when active" or not. >>> >>> The above setup would become : >>> >>> echo 1 > /sys/class/net/eth0/bonding/sticky_active >>> echo 1 > /sys/class/net/eth1/bonding/sticky_active >>> echo 0 > /sys/class/net/eth2/bonding/sticky_active >>> echo eth0 > /sys/class/net/bond0/bonding/primary >>> >>> Or may be better, keeping the "weight" idea in mind, a per slave option >>> "active_weight" that gives the weight of the slave, *when active*. >>> >>> The effective weight of a slave would become : >>> >>> effective_slave = >>> (is_active ? user_supplied_active_weight ? user_supplied_weight) * >>> natural_weight >>> >>> # Prefer eth0, then one of eth1 or eth2, then eth3. >>> echo 1000 > /sys/class/net/eth0/bonding/weight >>> echo 999 > /sys/class/net/eth1/bonding/weight >>> echo 999 > /sys/class/net/eth2/bonding/weight >>> echo 10 > /sys/class/net/eth3/bonding/weight >>> >>> # Do not switch back to primary eth0 if eth1 or eth2 is active. >>> echo 1000 > /sys/class/net/eth1/bonding/active_weight >>> echo 1000 > /sys/class/net/eth2/bonding/active_weight >>> >>> Every time one changes the user_supplied_weight, then >>> user_supplied_active_weight must be reset to the same value. This >>> way, if no special setup is done on active_weight, then the current >>> normal >>> behavior is achieved. >> >> I must say I like this approach. But it would be not trivial to implement this. >> Therefore I would stick with your propose of extending primary lazy to 3 values >> until the weight option is implemented. > > It sounds good for me. Later, if I eventually implement the weight > option, it shouldn't be that hard to convert internally the primary_lazy > setup to active_weight, the same way we plan to convert internally > primary setup to weight setup. > > primary and primary_lazy are convenient for simple - two slaves only - > configurations. weight and active_weight are for more advanced > configurations. Keeping both configuration interface does sound user > friendly. Ok, I agree. > >> I'm going to implement your propose below. > > By the way, even if I'm not a native English speaker, I think that > primary_lazy option should be named lazy_primary instead. Well I've intentionally put "primary" as a first word to amplify it's linked with primary option... But... Jirka > > Nicolas. > >>> If none of those options seem acceptable to you, I suggest a third one: >>> >>> You keep primary_lazy, but with the following values : >>> >>> # Switch back to primary slaves when it comes back. >>> echo 0 > /sys/class/net/bond0/bonding/primary_lazy >>> >>> # Switch back to primary when it comes back, only if the speed of the >>> # primary slave is higher than the speed of the current active slave. >>> echo 1 > /sys/class/net/bond0/bonding/primary_lazy >>> >>> # Stick to the current active slave when the primary slave comes back, >>> # even if the primary slave speed is higher than the speed of the >>> # current active slave. >>> echo 2 > /sys/class/net/bond0/bonding/primary_lazy >>> >>> You can consider the value as being the level of laziness of the primary. >>> >>> Nicolas. >>> > > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Jiri Pirko <jpirko@redhat.com> wrote: >Mon, Aug 24, 2009 at 05:07:18PM CEST, nicolas.2p.debian@free.fr wrote: >> Jiri Pirko wrote: >>> Thu, Aug 20, 2009 at 02:40:07PM CEST, nicolas.2p.debian@free.fr wrote: >> [--cut--] >>>> I suggest that instead of having a per bond "primary_lazy" option, we >>>> define a per slave option, describing whether this particular slave is >>>> "sticky when active" or not. >>>> >>>> The above setup would become : >>>> >>>> echo 1 > /sys/class/net/eth0/bonding/sticky_active >>>> echo 1 > /sys/class/net/eth1/bonding/sticky_active >>>> echo 0 > /sys/class/net/eth2/bonding/sticky_active >>>> echo eth0 > /sys/class/net/bond0/bonding/primary >>>> >>>> Or may be better, keeping the "weight" idea in mind, a per slave option >>>> "active_weight" that gives the weight of the slave, *when active*. >>>> >>>> The effective weight of a slave would become : >>>> >>>> effective_slave = >>>> (is_active ? user_supplied_active_weight ? user_supplied_weight) * >>>> natural_weight >>>> >>>> # Prefer eth0, then one of eth1 or eth2, then eth3. >>>> echo 1000 > /sys/class/net/eth0/bonding/weight >>>> echo 999 > /sys/class/net/eth1/bonding/weight >>>> echo 999 > /sys/class/net/eth2/bonding/weight >>>> echo 10 > /sys/class/net/eth3/bonding/weight >>>> >>>> # Do not switch back to primary eth0 if eth1 or eth2 is active. >>>> echo 1000 > /sys/class/net/eth1/bonding/active_weight >>>> echo 1000 > /sys/class/net/eth2/bonding/active_weight >>>> >>>> Every time one changes the user_supplied_weight, then >>>> user_supplied_active_weight must be reset to the same value. This >>>> way, if no special setup is done on active_weight, then the current >>>> normal >>>> behavior is achieved. >>> >>> I must say I like this approach. But it would be not trivial to implement this. >>> Therefore I would stick with your propose of extending primary lazy to 3 values >>> until the weight option is implemented. >> >> It sounds good for me. Later, if I eventually implement the weight >> option, it shouldn't be that hard to convert internally the primary_lazy >> setup to active_weight, the same way we plan to convert internally >> primary setup to weight setup. >> >> primary and primary_lazy are convenient for simple - two slaves only - >> configurations. weight and active_weight are for more advanced >> configurations. Keeping both configuration interface does sound user >> friendly. > >Ok, I agree. > >> >>> I'm going to implement your propose below. >> >> By the way, even if I'm not a native English speaker, I think that >> primary_lazy option should be named lazy_primary instead. > >Well I've intentionally put "primary" as a first word to amplify it's linked >with primary option... But... I'm still unclear as to why it's better to add another special case option to bonding instead of changing this in user space, other than it'd be a change to user space (initscripts / sysconfig). The way I see it, this patch is adding a mechanism that says, effectively, "make slave X the active slave, but do it only once." There is already a way to do that in bonding (sysfs, as above, or ifenslave -c); I am reluctant to add another without good reason. I'm not necessarily against the "weight" business in general. For the purposes of this discussion, however, it's a big complex solution to a pretty simple problem, and the "weight" system still has to have special sauce added it to to handle this special case. Last, presuming for the moment that this goes forward as an option to bonding, I think this should be named something along the lines of "make_active" (or perhaps "make_active_once", but that's a bit long). The option has the effect of making the specified slave the active slave one time, then the option setting is cleared. -J --- -Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Mon, Aug 24, 2009 at 07:35:17PM CEST, fubar@us.ibm.com wrote: >Jiri Pirko <jpirko@redhat.com> wrote: > >>Mon, Aug 24, 2009 at 05:07:18PM CEST, nicolas.2p.debian@free.fr wrote: >>> Jiri Pirko wrote: >>>> Thu, Aug 20, 2009 at 02:40:07PM CEST, nicolas.2p.debian@free.fr wrote: >>> [--cut--] >>>>> I suggest that instead of having a per bond "primary_lazy" option, we >>>>> define a per slave option, describing whether this particular slave is >>>>> "sticky when active" or not. >>>>> >>>>> The above setup would become : >>>>> >>>>> echo 1 > /sys/class/net/eth0/bonding/sticky_active >>>>> echo 1 > /sys/class/net/eth1/bonding/sticky_active >>>>> echo 0 > /sys/class/net/eth2/bonding/sticky_active >>>>> echo eth0 > /sys/class/net/bond0/bonding/primary >>>>> >>>>> Or may be better, keeping the "weight" idea in mind, a per slave option >>>>> "active_weight" that gives the weight of the slave, *when active*. >>>>> >>>>> The effective weight of a slave would become : >>>>> >>>>> effective_slave = >>>>> (is_active ? user_supplied_active_weight ? user_supplied_weight) * >>>>> natural_weight >>>>> >>>>> # Prefer eth0, then one of eth1 or eth2, then eth3. >>>>> echo 1000 > /sys/class/net/eth0/bonding/weight >>>>> echo 999 > /sys/class/net/eth1/bonding/weight >>>>> echo 999 > /sys/class/net/eth2/bonding/weight >>>>> echo 10 > /sys/class/net/eth3/bonding/weight >>>>> >>>>> # Do not switch back to primary eth0 if eth1 or eth2 is active. >>>>> echo 1000 > /sys/class/net/eth1/bonding/active_weight >>>>> echo 1000 > /sys/class/net/eth2/bonding/active_weight >>>>> >>>>> Every time one changes the user_supplied_weight, then >>>>> user_supplied_active_weight must be reset to the same value. This >>>>> way, if no special setup is done on active_weight, then the current >>>>> normal >>>>> behavior is achieved. >>>> >>>> I must say I like this approach. But it would be not trivial to implement this. >>>> Therefore I would stick with your propose of extending primary lazy to 3 values >>>> until the weight option is implemented. >>> >>> It sounds good for me. Later, if I eventually implement the weight >>> option, it shouldn't be that hard to convert internally the primary_lazy >>> setup to active_weight, the same way we plan to convert internally >>> primary setup to weight setup. >>> >>> primary and primary_lazy are convenient for simple - two slaves only - >>> configurations. weight and active_weight are for more advanced >>> configurations. Keeping both configuration interface does sound user >>> friendly. >> >>Ok, I agree. >> >>> >>>> I'm going to implement your propose below. >>> >>> By the way, even if I'm not a native English speaker, I think that >>> primary_lazy option should be named lazy_primary instead. >> >>Well I've intentionally put "primary" as a first word to amplify it's linked >>with primary option... But... > > I'm still unclear as to why it's better to add another special >case option to bonding instead of changing this in user space, other >than it'd be a change to user space (initscripts / sysconfig). > > The way I see it, this patch is adding a mechanism that says, >effectively, "make slave X the active slave, but do it only once." >There is already a way to do that in bonding (sysfs, as above, or >ifenslave -c); I am reluctant to add another without good reason. Hello Jay. As I already replied you once it's not only about selecting a slave at the start. It's also about following: Imagine you have bond with 3 slaves: eth0 eth1 eth2 UP(curr) UP UP DOWN UP(curr) UP UP UP(curr) UP UP DOWN UP(curr) eth2 ends up being current active but we prefer eth0 (as primary interface). This is not desirable and is solved by primary_lazy option. Jirka > > I'm not necessarily against the "weight" business in general. >For the purposes of this discussion, however, it's a big complex >solution to a pretty simple problem, and the "weight" system still has >to have special sauce added it to to handle this special case. > > Last, presuming for the moment that this goes forward as an >option to bonding, I think this should be named something along the >lines of "make_active" (or perhaps "make_active_once", but that's a bit >long). The option has the effect of making the specified slave the >active slave one time, then the option setting is cleared. > > -J > >--- > -Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Jiri Pirko wrote: > Mon, Aug 24, 2009 at 07:35:17PM CEST, fubar@us.ibm.com wrote: >> I'm still unclear as to why it's better to add another special >> case option to bonding instead of changing this in user space, other >> than it'd be a change to user space (initscripts / sysconfig). >> >> The way I see it, this patch is adding a mechanism that says, >> effectively, "make slave X the active slave, but do it only once." >> There is already a way to do that in bonding (sysfs, as above, or >> ifenslave -c); I am reluctant to add another without good reason. > > Hello Jay. > > As I already replied you once it's not only about selecting a slave at the > start. It's also about following: > > Imagine you have bond with 3 slaves: > eth0 eth1 eth2 > UP(curr) UP UP > DOWN UP(curr) UP > UP UP(curr) UP > UP DOWN UP(curr) > > eth2 ends up being current active but we prefer eth0 (as primary interface). > This is not desirable and is solved by primary_lazy option. > > Jirka > >> I'm not necessarily against the "weight" business in general. >> For the purposes of this discussion, however, it's a big complex >> solution to a pretty simple problem, and the "weight" system still has >> to have special sauce added it to to handle this special case. >> >> Last, presuming for the moment that this goes forward as an >> option to bonding, I think this should be named something along the >> lines of "make_active" (or perhaps "make_active_once", but that's a bit >> long). The option has the effect of making the specified slave the >> active slave one time, then the option setting is cleared. Hi Jay, From what I understand from Jirka's needs, the exact expected behaviors are : 1/ If a slave is active, keep it active, even if the primary comes back up. 2/ If the current slave just failed, choose the new active slave, giving priority to the master. Selecting the active slave at startup (by using ifenslave -c or writing into /sys/class/net/bond0/bonding/active_slave) would solve 1, but not 2. Also, I suggested to change 1 in this way : 1/ If a slave is active, keep it active, even if the primary comes back up, *except if the speed of the primary is better than the speed of the active slave*. Thinking about all that, I start feeling that some sort of user space system to select the "best" slave would be better. If we can design a NETLINK interface to report events (slave up, slave down...) to user space, then any user space daemon would be able to tell bonding what to do. Only if no process register to receive those events would bonding use the normal slave selection rules. Designing such a NETLINK interface would replace my proposed weight option (at least for best slave selection in active-backup mode and for best aggregator selection in 802.3ad mode). It would also solve the problem reported by Jirka and so replace the proposed primary_lazy option. Any way, NETLINK is something that is supposed to come into bonding at some times, because we know that the sysfs purists hate the sysfs bonding stuff and that NETLINK is the target to setup networking. Any comments ? Nicolas. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Nicolas de Pesloüan <nicolas.2p.debian@free.fr> wrote: >Jiri Pirko wrote: >> Mon, Aug 24, 2009 at 07:35:17PM CEST, fubar@us.ibm.com wrote: >>> I'm still unclear as to why it's better to add another special >>> case option to bonding instead of changing this in user space, other >>> than it'd be a change to user space (initscripts / sysconfig). >>> >>> The way I see it, this patch is adding a mechanism that says, >>> effectively, "make slave X the active slave, but do it only once." >>> There is already a way to do that in bonding (sysfs, as above, or >>> ifenslave -c); I am reluctant to add another without good reason. >> >> Hello Jay. >> >> As I already replied you once it's not only about selecting a slave at the >> start. It's also about following: >> >> Imagine you have bond with 3 slaves: >> eth0 eth1 eth2 >> UP(curr) UP UP >> DOWN UP(curr) UP >> UP UP(curr) UP >> UP DOWN UP(curr) >> >> eth2 ends up being current active but we prefer eth0 (as primary interface). >> This is not desirable and is solved by primary_lazy option. >> >> Jirka >> >>> I'm not necessarily against the "weight" business in general. >>> For the purposes of this discussion, however, it's a big complex >>> solution to a pretty simple problem, and the "weight" system still has >>> to have special sauce added it to to handle this special case. >>> >>> Last, presuming for the moment that this goes forward as an >>> option to bonding, I think this should be named something along the >>> lines of "make_active" (or perhaps "make_active_once", but that's a bit >>> long). The option has the effect of making the specified slave the >>> active slave one time, then the option setting is cleared. > >Hi Jay, > > From what I understand from Jirka's needs, the exact expected behaviors are : > >1/ If a slave is active, keep it active, even if the primary comes back up. >2/ If the current slave just failed, choose the new active slave, giving >priority to the master. > >Selecting the active slave at startup (by using ifenslave -c or writing into >/sys/class/net/bond0/bonding/active_slave) would solve 1, but not 2. Yah, I had missed step 2. I'd still call it something other than "lazy," though; "passive" sounds better to me. >Also, I suggested to change 1 in this way : > >1/ If a slave is active, keep it active, even if the primary comes back up, >*except if the speed of the primary is better than the speed of the active slave*. > >Thinking about all that, I start feeling that some sort of user space system to >select the "best" slave would be better. If we can design a NETLINK interface to >report events (slave up, slave down...) to user space, then any user space >daemon would be able to tell bonding what to do. Only if no process register to >receive those events would bonding use the normal slave selection rules. This has been discussed more than once in the past, but hasn't ever really gotten anywhere. I suspect the main impediment is the lack of a suitable API. >Designing such a NETLINK interface would replace my proposed weight option (at >least for best slave selection in active-backup mode and for best aggregator >selection in 802.3ad mode). It would also solve the problem reported by Jirka >and so replace the proposed primary_lazy option. Yes, a lot of the decision making at failover could be moved into a user space daemon. The daemon, I think, should be optional; if the basic selection policies are sufficient, then there's no need for a trip to user space and back. >Any way, NETLINK is something that is supposed to come into bonding at some >times, because we know that the sysfs purists hate the sysfs bonding stuff and >that NETLINK is the target to setup networking. I'm not a big fan of the sysfs API, either; it seemed like a good idea at the time. It's certainly better than ifenslave in terms of features, but some of it is pretty convoluted, and there are things that just can't be done from within sysfs. I recall seeing a note from Stephen Hemminger not too long ago (a month or two ago) that he was working on a netlink API for bonding, but I don't know how far that ever got. One quesiton is, if a netlink API is implemented, whether to convert ifenslave, or deprecate ifenslave and put the various bonding functions into ip. If a netlink API is on the relatively near horizon (say, within a few months), then I'm less inclined to put in the "lazy" option, since it would just become baggage carried forward for the next several years (until the sysfs API could be deprecated and removed). -J --- -Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Jay Vosburgh wrote: > Nicolas de Pesloüan <nicolas.2p.debian@free.fr> wrote: >> Thinking about all that, I start feeling that some sort of user space system to >> select the "best" slave would be better. If we can design a NETLINK interface to >> report events (slave up, slave down...) to user space, then any user space >> daemon would be able to tell bonding what to do. Only if no process register to >> receive those events would bonding use the normal slave selection rules. > > This has been discussed more than once in the past, but hasn't > ever really gotten anywhere. I suspect the main impediment is the lack > of a suitable API. Does a 'NETLINK for bonding' document, describing the proposed API, exist ? I imagine two different parts in the API : 1/ Everything related to configuration (set and read). This should be not far from the current sysfs API. 2/ Event notification about "everything" that happens into bonding, to be able to notify user space in real time. It might be also interesting to use the netlink API to notify whoever is interested that a given not-enslaved interface just received a 802.3ad related packet. This would allow for self enslavement of slaves into the same bond, when they happen to be connected to the same 802.3ad capable switch. >> Designing such a NETLINK interface would replace my proposed weight option (at >> least for best slave selection in active-backup mode and for best aggregator >> selection in 802.3ad mode). It would also solve the problem reported by Jirka >> and so replace the proposed primary_lazy option. > > Yes, a lot of the decision making at failover could be moved > into a user space daemon. The daemon, I think, should be optional; if > the basic selection policies are sufficient, then there's no need for a > trip to user space and back. > >> Any way, NETLINK is something that is supposed to come into bonding at some >> times, because we know that the sysfs purists hate the sysfs bonding stuff and >> that NETLINK is the target to setup networking. > > I'm not a big fan of the sysfs API, either; it seemed like a > good idea at the time. It's certainly better than ifenslave in terms of > features, but some of it is pretty convoluted, and there are things that > just can't be done from within sysfs. > > I recall seeing a note from Stephen Hemminger not too long ago > (a month or two ago) that he was working on a netlink API for bonding, > but I don't know how far that ever got. Yes, I also read this note and remembered he detected that many things need to be changed before... :-( > One quesiton is, if a netlink API is implemented, whether to > convert ifenslave, or deprecate ifenslave and put the various bonding > functions into ip. I suggest enhancing ip and removing ifenslave (or converting it to a script that would call ip internally, for backward compatibility). Why would we need a dedicated tool for bonding ? We can even write this script now and use sysfs instead of the ioctl, waiting for the netlink API. > If a netlink API is on the relatively near horizon (say, within > a few months), then I'm less inclined to put in the "lazy" option, since > it would just become baggage carried forward for the next several years > (until the sysfs API could be deprecated and removed). Does that mean you suggest Jiri works with Stephen on the netlink API ? :-) Nicolas. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index d5181ce..f1b82af 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -614,6 +614,15 @@ primary The primary option is only valid for active-backup mode. +primary_lazy + + Specifies the behaviour of the primary slave in case of + it's link recovery has been detected. By default (value 0) the + primary slave is set as active slave immediately after the link + recovery. If the value is 1 then current active slave doesn't + change as long as it's link status doesn't change. This prevents + the bonding device from flip-flopping. + updelay Specifies the time, in milliseconds, to wait before enabling a diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3bf0cc6..00fbb9d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -94,6 +94,7 @@ static int downdelay; static int use_carrier = 1; static char *mode; static char *primary; +static int primary_lazy; static char *lacp_rate; static char *ad_select; static char *xmit_hash_policy; @@ -126,6 +127,9 @@ MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, " "6 for balance-alb"); module_param(primary, charp, 0); MODULE_PARM_DESC(primary, "Primary network device to use"); +module_param(primary_lazy, int, 0); +MODULE_PARM_DESC(primary_lazy, "Do not set primary slave active once it comes up; " + "0 for off (default), 1 for on"); module_param(lacp_rate, charp, 0); MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner " "(slow/fast)"); @@ -1067,7 +1071,6 @@ out: } - /** * find_best_interface - select the best available slave to be the active one * @bond: our bonding struct @@ -1097,9 +1100,11 @@ static struct slave *bond_find_best_slave(struct bonding *bond) * and primary_slave that may be up and able to arp */ if ((bond->primary_slave) && - (!bond->params.arp_interval) && - (IS_UP(bond->primary_slave->dev))) { + (IS_UP(bond->primary_slave->dev)) && + (!(bond->params.primary_lazy && old_active && + (IS_UP(old_active->dev))) || bond->force_primary)) { new_active = bond->primary_slave; + bond->force_primary = false; } /* remember where to stop iterating over the slaves */ @@ -1674,8 +1679,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { /* if there is a primary slave, remember it */ - if (strcmp(bond->params.primary, new_slave->dev->name) == 0) + if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { bond->primary_slave = new_slave; + bond->force_primary = true; + } } write_lock_bh(&bond->curr_slave_lock); @@ -2929,7 +2936,9 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) */ if (bond->primary_slave && (bond->primary_slave != bond->curr_active_slave) && - (bond->primary_slave->link == BOND_LINK_UP)) + (bond->primary_slave->link == BOND_LINK_UP) && + !(bond->params.primary_lazy && bond->curr_active_slave && + bond->curr_active_slave->link == BOND_LINK_UP)) commit++; read_unlock(&bond->curr_slave_lock); @@ -3035,7 +3044,9 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) */ if (bond->primary_slave && (bond->primary_slave != bond->curr_active_slave) && - (bond->primary_slave->link == BOND_LINK_UP)) { + (bond->primary_slave->link == BOND_LINK_UP) && + !(bond->params.primary_lazy && bond->curr_active_slave && + bond->curr_active_slave->link == BOND_LINK_UP)) { write_lock_bh(&bond->curr_slave_lock); bond_change_active_slave(bond, bond->primary_slave); write_unlock_bh(&bond->curr_slave_lock); @@ -4987,6 +4998,17 @@ static int bond_check_params(struct bond_params *params) primary = NULL; } + if (primary) { + if ((primary_lazy != 0) && (primary_lazy != 1)) { + pr_warning(DRV_NAME + ": Warning: primary_lazy module parameter " + "(%d), not of valid value (0/1), so it was " + "set to 0\n", + primary_lazy); + primary_lazy = 1; + } + } + if (fail_over_mac) { fail_over_mac_value = bond_parse_parm(fail_over_mac, fail_over_mac_tbl); @@ -5018,6 +5040,7 @@ static int bond_check_params(struct bond_params *params) params->use_carrier = use_carrier; params->lacp_fast = lacp_fast; params->primary[0] = 0; + params->primary_lazy = primary_lazy; params->fail_over_mac = fail_over_mac_value; if (primary) { diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 55bf34f..573fe82 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -1209,6 +1209,59 @@ static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR, bonding_show_primary, bonding_store_primary); /* + * Show and set the primary_lazy flag. + */ +static ssize_t bonding_show_primary_lazy(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + return sprintf(buf, "%d\n", bond->params.primary_lazy); +} + +static ssize_t bonding_store_primary_lazy(struct device *d, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int new_value, ret = count; + struct bonding *bond = to_bond(d); + + if (!rtnl_trylock()) + return restart_syscall(); + + if (sscanf(buf, "%d", &new_value) != 1) { + pr_err(DRV_NAME + ": %s: no primary_lazy value specified.\n", + bond->dev->name); + ret = -EINVAL; + goto out; + } + if ((new_value == 0) || (new_value == 1)) { + bond->params.primary_lazy = new_value; + pr_info(DRV_NAME ": %s: Setting primary_lazy to %d.\n", + bond->dev->name, new_value); + if (!bond->params.primary_lazy) { + bond->force_primary = true; + read_lock(&bond->lock); + write_lock_bh(&bond->curr_slave_lock); + bond_select_active_slave(bond); + write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->lock); + } + } else { + pr_info(DRV_NAME + ": %s: Ignoring invalid primary_lazy value %d.\n", + bond->dev->name, new_value); + } +out: + rtnl_unlock(); + return count; +} +static DEVICE_ATTR(primary_lazy, S_IRUGO | S_IWUSR, + bonding_show_primary_lazy, bonding_store_primary_lazy); + +/* * Show and set the use_carrier flag. */ static ssize_t bonding_show_carrier(struct device *d, @@ -1497,6 +1550,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_num_unsol_na.attr, &dev_attr_miimon.attr, &dev_attr_primary.attr, + &dev_attr_primary_lazy.attr, &dev_attr_use_carrier.attr, &dev_attr_active_slave.attr, &dev_attr_mii_status.attr, diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 6290a50..ac35c6c 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -131,6 +131,7 @@ struct bond_params { int lacp_fast; int ad_select; char primary[IFNAMSIZ]; + int primary_lazy; __be32 arp_targets[BOND_MAX_ARP_TARGETS]; }; @@ -190,6 +191,7 @@ struct bonding { struct slave *curr_active_slave; struct slave *current_arp_slave; struct slave *primary_slave; + bool force_primary; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ rwlock_t lock; rwlock_t curr_slave_lock;
In some cases there is not desirable to switch back to primary interface when it's link recovers and rather stay wiith currently active one. We need to avoid packetloss as much as we can in some cases. This is solved by introducing primary_lazy option. Note that enslaved primary slave is set as current active no matter what. Signed-off-by: Jiri Pirko <jpirko@redhat.com> -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html