Message ID | 20220622142852.872240-1-mkp@redhat.com |
---|---|
State | Changes Requested |
Headers | show |
Series | [ovs-dev,v5] bond: Improve bond and lacp visibility | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/intel-ovs-compilation | success | test: success |
On Wed, Jun 22, 2022 at 4:29 PM Mike Pattrick <mkp@redhat.com> wrote: > > Add additional logging for debug and info level with a focus on code > related to bond members coming up, go down, and changing. > > Several existing log messages were modified to handle sub 1kB log > messages with more grace. Instead of reporting 0kB of traffic load > shifting from one member to another, we now suppress these messages at > the INFO level and display exact byte count at the debug level. > > Signed-off-by: Mike Pattrick <mkp@redhat.com> > > -- > > Since v4: > - Removed prefixes > - Removed unrequired conditional > > Since v3: > - Removed log if bond members were significantly deviant > - Normalized log prefixes, including LACP > - Added details to the member enable/disable log > > Since v2: > - Normalized log capitalization and periods > - Summarized info level hash transfers > - Added log if bond members were significantly deviant > > Signed-off-by: Mike Pattrick <mkp@redhat.com> > --- > lib/lacp.c | 26 +++++++++--- > ofproto/bond.c | 110 +++++++++++++++++++++++++++++++++---------------- > 2 files changed, 96 insertions(+), 40 deletions(-) > > diff --git a/lib/lacp.c b/lib/lacp.c > index 3252f17eb..bca347129 100644 > --- a/lib/lacp.c > +++ b/lib/lacp.c > @@ -367,12 +367,12 @@ lacp_process_packet(struct lacp *lacp, const void *member_, > break; > case SUBTYPE_MARKER: > member->count_rx_pdus_marker++; > - VLOG_DBG("%s: received a LACP marker PDU.", lacp->name); > + VLOG_DBG("%s: received a LACP marker PDU", lacp->name); > goto out; > case SUBTYPE_UNUSED: > default: > member->count_rx_pdus_bad++; > - VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU.", > + VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU", > lacp->name); > goto out; > } > @@ -382,11 +382,16 @@ lacp_process_packet(struct lacp *lacp, const void *member_, > * trigger re-checking of L1 state. */ > if (!member->carrier_up) { > VLOG_INFO_RL(&rl, "%s: carrier state is DOWN," > - " dropping received LACP PDU.", member->name); > + " dropping received LACP PDU", member->name); > seq_change(connectivity_seq_get()); > goto out; > } > > + if (member->status == LACP_DEFAULTED) { > + VLOG_INFO("%s: defaulted member %s reestablished connection with" > + " LACP partner", lacp->name, member->name); > + } > + > member->status = LACP_CURRENT; > tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX; > timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate); > @@ -465,9 +470,16 @@ lacp_member_register(struct lacp *lacp, void *member_, > if (!lacp->key_member) { > lacp->key_member = member; > } > + > + VLOG_DBG("%s: member %s added", lacp->name, s->name); > } > > if (!member->name || strcmp(s->name, member->name)) { > + if (member->name) { > + VLOG_DBG("%s: member %s renamed from %s", lacp->name, > + s->name, member->name); > + } > + > free(member->name); > member->name = xstrdup(s->name); > } > @@ -531,6 +543,9 @@ lacp_member_carrier_changed(const struct lacp *lacp, const void *member_, > member->count_carrier_changed++; > } > > + VLOG_DBG("%s: member %s changed state to %s", lacp->name, > + member->name, carrier_up ? "up" : "down"); > + > out: > lacp_unlock(); > } > @@ -710,8 +725,9 @@ lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex) > /* XXX: In the future allow users to configure the expected system ID. > * For now just special case loopback. */ > if (eth_addr_equals(member->partner.sys_id, member->lacp->sys_id)) { > - VLOG_WARN_RL(&rl, "member %s: Loopback detected. Interface is " > - "connected to its own bond", member->name); > + VLOG_WARN_RL(&rl, "%s: member %s: Loopback detected. " > + "Interface is connected to its own bond", > + lacp->name, member->name); > continue; > } > > diff --git a/ofproto/bond.c b/ofproto/bond.c > index 845f69e21..13702950d 100644 > --- a/ofproto/bond.c > +++ b/ofproto/bond.c > @@ -386,7 +386,8 @@ update_recirc_rules__(struct bond *bond) > char *err_s = match_to_string(&pr_op->match, NULL, > RECIRC_RULE_PRIORITY); > > - VLOG_ERR("failed to add post recirculation flow %s", err_s); > + VLOG_ERR("%s: failed to add post recirculation flow %s", > + bond->name, err_s); > free(err_s); > } > break; > @@ -399,7 +400,8 @@ update_recirc_rules__(struct bond *bond) > char *err_s = match_to_string(&pr_op->match, NULL, > RECIRC_RULE_PRIORITY); > > - VLOG_ERR("failed to remove post recirculation flow %s", err_s); > + VLOG_ERR("%s: failed to remove post recirculation flow " > + "%s", bond->name, err_s); > free(err_s); > } > > @@ -496,7 +498,7 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s) > if (s->use_lb_output_action && > !ovs_lb_output_action_supported(bond->ofproto)) { > VLOG_WARN("%s: Datapath does not support 'lb_output' action, " > - "disabled.", bond->name); > + "disabled", bond->name); > } else { > bond->use_lb_output_action = s->use_lb_output_action; > if (!bond->use_lb_output_action) { > @@ -855,6 +857,8 @@ bond_check_admissibility(struct bond *bond, const void *member_, > ovs_rwlock_rdlock(&rwlock); > member = bond_member_lookup(bond, member_); > if (!member) { > + VLOG_DBG_RL(&rl, "%s: lookup for member on bond failed", > + bond->name); Is there a valid case where this would not be a bug? > goto out; > } > > @@ -874,7 +878,7 @@ bond_check_admissibility(struct bond *bond, const void *member_, > * main thread to run LACP state machine and enable the member. */ > verdict = (member->enabled || member->may_enable) ? BV_ACCEPT : BV_DROP; > if (!member->enabled && member->may_enable) { > - VLOG_DBG_RL(&rl, "bond %s: member %s: " > + VLOG_DBG_RL(&rl, "%s: member %s: " > "main thread has not yet enabled member", > bond->name, bond->active_member->name); > } > @@ -913,7 +917,7 @@ bond_check_admissibility(struct bond *bond, const void *member_, > /* Drop all packets which arrive on backup members. This is similar to > * how Linux bonding handles active-backup bonds. */ > if (bond->active_member != member) { > - VLOG_DBG_RL(&rl, "bond %s: member %s: active-backup bond received " > + VLOG_DBG_RL(&rl, "%s: member %s: active-backup bond received " > "packet on backup member destined for " ETH_ADDR_FMT, > bond->name, member->name, ETH_ADDR_ARGS(eth_dst)); > goto out; > @@ -935,7 +939,7 @@ bond_check_admissibility(struct bond *bond, const void *member_, > OVS_NOT_REACHED(); > out: > if (member && (verdict != BV_ACCEPT)) { > - VLOG_DBG_RL(&rl, "bond %s: member %s: " > + VLOG_DBG_RL(&rl, "%s: member %s: " > "admissibility verdict is to drop pkt%s, " > "active member: %s, may_enable: %s, enabled: %s, " > "LACP status: %s", > @@ -1127,8 +1131,13 @@ log_bals(struct bond *bond, const struct ovs_list *bals) > if (ds.length) { > ds_put_char(&ds, ','); Nit. The separator between members in this log message ends up being ", ". So here, we should push an additonal space... > } > - ds_put_format(&ds, " %s %"PRIu64"kB", > - member->name, member->tx_bytes / 1024); > + if (member->tx_bytes > 1024) { > + ds_put_format(&ds, " %s %"PRIu64" kB", ...and then there is no need for this extra space in front of the member name. Otherwise, we get an extra space for the first member: 2022-07-29T14:14:52.015Z|00147|bond|DBG|dpdkbond0: dpdk0 0 B [...] 2022-07-29T14:15:02.479Z|00150|bond|DBG|dpdkbond0: dpdk1 0 B, dpdk0 0 B > + member->name, member->tx_bytes / 1024); > + } else { > + ds_put_format(&ds, " %s %"PRIu64" B", > + member->name, member->tx_bytes); > + } > > if (!member->enabled) { > ds_put_cstr(&ds, " (disabled)"); > @@ -1141,13 +1150,20 @@ log_bals(struct bond *bond, const struct ovs_list *bals) > if (&e->list_node != ovs_list_front(&member->entries)) { > ds_put_cstr(&ds, " + "); > } > - ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB", > - e - bond->hash, e->tx_bytes / 1024); > + if (e->tx_bytes > 1024) { > + ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" kB", > + e - bond->hash, e->tx_bytes / 1024); > + } else { > + ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" B", > + e - bond->hash, e->tx_bytes); > + } > } > ds_put_cstr(&ds, ")"); > } > } > - VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds)); > + if (ds.length) { > + VLOG_DBG("%s: %s", bond->name, ds_cstr(&ds)); > + } > ds_destroy(&ds); > } > } > @@ -1161,14 +1177,6 @@ bond_shift_load(struct bond_entry *hash, struct bond_member *to) > struct bond *bond = from->bond; > uint64_t delta = hash->tx_bytes; > > - VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %"PRIdPTR") " > - "from %s to %s (now carrying %"PRIu64"kB and " > - "%"PRIu64"kB load, respectively)", > - bond->name, delta / 1024, hash - bond->hash, > - from->name, to->name, > - (from->tx_bytes - delta) / 1024, > - (to->tx_bytes + delta) / 1024); > - > /* Shift load away from 'from' to 'to'. */ > from->tx_bytes -= delta; > to->tx_bytes += delta; > @@ -1345,6 +1353,7 @@ bond_rebalance(struct bond *bond) > insert_bal(&bals, member); > } > } > + One empty line is added, it seems unrelated. > log_bals(bond, &bals); > > /* Shift load from the most-loaded members to the least-loaded members. */ > @@ -1356,6 +1365,7 @@ bond_rebalance(struct bond *bond) > uint64_t overload; > > overload = from->tx_bytes - to->tx_bytes; > + Idem. > if (overload < to->tx_bytes >> 5 || overload < 100000) { > /* The extra load on 'from' (and all less-loaded members), compared > * to that of 'to' (the least-loaded member), is less than ~3%, or > @@ -1388,6 +1398,12 @@ bond_rebalance(struct bond *bond) > reinsert_bal(&bals, from); > reinsert_bal(&bals, to); > } > + VLOG_INFO("%s: shifted %"PRIuSIZE" hashes totaling %"PRIu64 > + " kB from %s (%"PRIu64" kB) to %s (%"PRIu64" kB)", > + bond->name, cnt, > + (overload + to->tx_bytes - from->tx_bytes) >> 1, > + from->name, from->tx_bytes, to->name, to->tx_bytes); > + > rebalanced = true; > } > > @@ -1565,6 +1581,9 @@ bond_print_details(struct ds *ds, const struct bond *bond) > if (be_tx_k) { > ds_put_format(ds, " hash %d: %"PRIu64" kB load\n", > hash, be_tx_k); > + } else if (be->tx_bytes) { > + ds_put_format(ds, " hash %d: %"PRIu64" B load\n", > + hash, be->tx_bytes); > } > > /* XXX How can we list the MACs assigned to hashes of SLB bonds? */ > @@ -1691,7 +1710,7 @@ bond_unixctl_set_active_member(struct unixctl_conn *conn, > if (bond->active_member != member) { > bond->bond_revalidate = true; > bond->active_member = member; > - VLOG_INFO("bond %s: active member is now %s", > + VLOG_INFO("%s: active member is now %s", > bond->name, member->name); > bond->send_learning_packets = true; > unixctl_command_reply(conn, "done"); > @@ -1869,7 +1888,7 @@ bond_enable_member(struct bond_member *member, bool enable) > } > ovs_mutex_unlock(&member->bond->mutex); > > - VLOG_INFO("member %s: %s", member->name, > + VLOG_INFO("%s: member %s %s", bond->name, member->name, > member->enabled ? "enabled" : "disabled"); > } > } > @@ -1879,23 +1898,31 @@ bond_link_status_update(struct bond_member *member) > { > struct bond *bond = member->bond; > bool up; > + bool carrier = netdev_get_carrier(member->netdev); > > - up = netdev_get_carrier(member->netdev) && member->may_enable; > + up = carrier && member->may_enable; > if ((up == member->enabled) != (member->delay_expires == LLONG_MAX)) { > + char * msg; > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > - VLOG_INFO_RL(&rl, "member %s: link state %s", > - member->name, up ? "up" : "down"); > + if (!carrier) { > + msg = "carrier state"; > + } else { > + msg = "link state"; > + } > + VLOG_INFO_RL(&rl, "%s: member %s %s %s", bond->name, > + member->name, msg, up ? "up" : "down"); > if (up == member->enabled) { > member->delay_expires = LLONG_MAX; > - VLOG_INFO_RL(&rl, "member %s: will not be %s", > - member->name, up ? "disabled" : "enabled"); > + VLOG_INFO_RL(&rl, "%s: member %s will not be %s", > + bond->name, member->name, > + up ? "disabled" : "enabled"); > } else { > int delay = up ? bond->updelay : bond->downdelay; > member->delay_expires = time_msec() + delay; > if (delay) { > - VLOG_INFO_RL(&rl, "member %s: will be %s if it stays %s " > - "for %d ms", > - member->name, > + VLOG_INFO_RL(&rl, "%s: member %s will be %s" > + " if it stays %s for %d ms", > + bond->name, member->name, > up ? "enabled" : "disabled", > up ? "up" : "down", > delay); > @@ -1952,14 +1979,15 @@ choose_output_member(const struct bond *bond, const struct flow *flow, > { > struct bond_entry *e; > int balance; > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > > balance = bond->balance; > if (bond->lacp_status == LACP_CONFIGURED) { > /* LACP has been configured on this bond but negotiations were > - * unsuccussful. If lacp_fallback_ab is enabled use active- > + * unsuccessful. If lacp_fallback_ab is enabled use active- > * backup mode else drop all traffic. */ > if (!bond->lacp_fallback_ab) { > - return NULL; > + goto error; > } > balance = BM_AB; > } > @@ -1971,7 +1999,7 @@ choose_output_member(const struct bond *bond, const struct flow *flow, > case BM_TCP: > if (bond->lacp_status != LACP_NEGOTIATED) { > /* Must have LACP negotiations for TCP balanced bonds. */ > - return NULL; > + goto error; > } > if (wc) { > flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP); > @@ -1990,6 +2018,18 @@ choose_output_member(const struct bond *bond, const struct flow *flow, > default: > OVS_NOT_REACHED(); > } > +error: > + if (VLOG_IS_DBG_ENABLED()) { > + struct ds ds = DS_EMPTY_INITIALIZER; > + flow_format(&ds, flow, NULL); > + VLOG_DBG_RL(&rl, "%s: cannot choose output member " > + "for flow %s with LACP status %s", > + bond->name, > + ds_cstr(&ds), > + lacp_status_description(bond->lacp_status)); > + ds_destroy(&ds); > + } > + return NULL; > } > > static struct bond_member * > @@ -2039,10 +2079,10 @@ bond_choose_active_member(struct bond *bond) > bond->active_member = bond_choose_member(bond); > if (bond->active_member) { > if (bond->active_member->enabled) { > - VLOG_INFO_RL(&rl, "bond %s: active member is now %s", > + VLOG_INFO_RL(&rl, "%s: active member is now %s", > bond->name, bond->active_member->name); > } else { > - VLOG_INFO_RL(&rl, "bond %s: active member is now %s, skipping " > + VLOG_INFO_RL(&rl, "%s: active member is now %s, skipping " > "remaining %lld ms updelay (since no member was " > "enabled)", bond->name, bond->active_member->name, > bond->active_member->delay_expires - time_msec()); > @@ -2056,7 +2096,7 @@ bond_choose_active_member(struct bond *bond) > } > } else if (old_active_member) { > bond_active_member_changed(bond); > - VLOG_INFO_RL(&rl, "bond %s: all members disabled", bond->name); > + VLOG_INFO_RL(&rl, "%s: all members disabled", bond->name); > } > } > Reviewed-by: David Marchand <david.marchand@redhat.com>
On 6/22/22 16:28, Mike Pattrick wrote: > Add additional logging for debug and info level with a focus on code > related to bond members coming up, go down, and changing. > > Several existing log messages were modified to handle sub 1kB log > messages with more grace. Instead of reporting 0kB of traffic load > shifting from one member to another, we now suppress these messages at > the INFO level and display exact byte count at the debug level. > > Signed-off-by: Mike Pattrick <mkp@redhat.com> > > -- > > Since v4: > - Removed prefixes > - Removed unrequired conditional > > Since v3: > - Removed log if bond members were significantly deviant > - Normalized log prefixes, including LACP > - Added details to the member enable/disable log > > Since v2: > - Normalized log capitalization and periods > - Summarized info level hash transfers > - Added log if bond members were significantly deviant > > Signed-off-by: Mike Pattrick <mkp@redhat.com> > --- Thanks for the patch, Mike! See some comments inline, besides the ones that David already made. Best regards, Ilya Maximets. > lib/lacp.c | 26 +++++++++--- > ofproto/bond.c | 110 +++++++++++++++++++++++++++++++++---------------- > 2 files changed, 96 insertions(+), 40 deletions(-) > > diff --git a/lib/lacp.c b/lib/lacp.c > index 3252f17eb..bca347129 100644 > --- a/lib/lacp.c > +++ b/lib/lacp.c > @@ -367,12 +367,12 @@ lacp_process_packet(struct lacp *lacp, const void *member_, > break; > case SUBTYPE_MARKER: > member->count_rx_pdus_marker++; > - VLOG_DBG("%s: received a LACP marker PDU.", lacp->name); > + VLOG_DBG("%s: received a LACP marker PDU", lacp->name); > goto out; > case SUBTYPE_UNUSED: > default: > member->count_rx_pdus_bad++; > - VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU.", > + VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU", > lacp->name); > goto out; > } > @@ -382,11 +382,16 @@ lacp_process_packet(struct lacp *lacp, const void *member_, > * trigger re-checking of L1 state. */ > if (!member->carrier_up) { > VLOG_INFO_RL(&rl, "%s: carrier state is DOWN," > - " dropping received LACP PDU.", member->name); > + " dropping received LACP PDU", member->name); > seq_change(connectivity_seq_get()); > goto out; > } > > + if (member->status == LACP_DEFAULTED) { > + VLOG_INFO("%s: defaulted member %s reestablished connection with" > + " LACP partner", lacp->name, member->name); > + } > + > member->status = LACP_CURRENT; > tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX; > timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate); > @@ -465,9 +470,16 @@ lacp_member_register(struct lacp *lacp, void *member_, > if (!lacp->key_member) { > lacp->key_member = member; > } > + > + VLOG_DBG("%s: member %s added", lacp->name, s->name); > } > > if (!member->name || strcmp(s->name, member->name)) { > + if (member->name) { > + VLOG_DBG("%s: member %s renamed from %s", lacp->name, > + s->name, member->name); > + } > + > free(member->name); > member->name = xstrdup(s->name); > } > @@ -531,6 +543,9 @@ lacp_member_carrier_changed(const struct lacp *lacp, const void *member_, > member->count_carrier_changed++; > } > > + VLOG_DBG("%s: member %s changed state to %s", lacp->name, > + member->name, carrier_up ? "up" : "down"); > + > out: > lacp_unlock(); > } > @@ -710,8 +725,9 @@ lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex) > /* XXX: In the future allow users to configure the expected system ID. > * For now just special case loopback. */ > if (eth_addr_equals(member->partner.sys_id, member->lacp->sys_id)) { > - VLOG_WARN_RL(&rl, "member %s: Loopback detected. Interface is " > - "connected to its own bond", member->name); > + VLOG_WARN_RL(&rl, "%s: member %s: Loopback detected. " > + "Interface is connected to its own bond", > + lacp->name, member->name); > continue; > } > > diff --git a/ofproto/bond.c b/ofproto/bond.c > index 845f69e21..13702950d 100644 > --- a/ofproto/bond.c > +++ b/ofproto/bond.c > @@ -386,7 +386,8 @@ update_recirc_rules__(struct bond *bond) > char *err_s = match_to_string(&pr_op->match, NULL, > RECIRC_RULE_PRIORITY); > > - VLOG_ERR("failed to add post recirculation flow %s", err_s); > + VLOG_ERR("%s: failed to add post recirculation flow %s", > + bond->name, err_s); > free(err_s); > } > break; > @@ -399,7 +400,8 @@ update_recirc_rules__(struct bond *bond) > char *err_s = match_to_string(&pr_op->match, NULL, > RECIRC_RULE_PRIORITY); > > - VLOG_ERR("failed to remove post recirculation flow %s", err_s); > + VLOG_ERR("%s: failed to remove post recirculation flow " > + "%s", bond->name, err_s); There seems to be enough space to keep the '%s' on a previous line. > free(err_s); > } > > @@ -496,7 +498,7 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s) > if (s->use_lb_output_action && > !ovs_lb_output_action_supported(bond->ofproto)) { > VLOG_WARN("%s: Datapath does not support 'lb_output' action, " > - "disabled.", bond->name); > + "disabled", bond->name); > } else { > bond->use_lb_output_action = s->use_lb_output_action; > if (!bond->use_lb_output_action) { > @@ -855,6 +857,8 @@ bond_check_admissibility(struct bond *bond, const void *member_, > ovs_rwlock_rdlock(&rwlock); > member = bond_member_lookup(bond, member_); > if (!member) { > + VLOG_DBG_RL(&rl, "%s: lookup for member on bond failed", > + bond->name); > goto out; > } > > @@ -874,7 +878,7 @@ bond_check_admissibility(struct bond *bond, const void *member_, > * main thread to run LACP state machine and enable the member. */ > verdict = (member->enabled || member->may_enable) ? BV_ACCEPT : BV_DROP; > if (!member->enabled && member->may_enable) { > - VLOG_DBG_RL(&rl, "bond %s: member %s: " > + VLOG_DBG_RL(&rl, "%s: member %s: " > "main thread has not yet enabled member", > bond->name, bond->active_member->name); > } > @@ -913,7 +917,7 @@ bond_check_admissibility(struct bond *bond, const void *member_, > /* Drop all packets which arrive on backup members. This is similar to > * how Linux bonding handles active-backup bonds. */ > if (bond->active_member != member) { > - VLOG_DBG_RL(&rl, "bond %s: member %s: active-backup bond received " > + VLOG_DBG_RL(&rl, "%s: member %s: active-backup bond received " > "packet on backup member destined for " ETH_ADDR_FMT, > bond->name, member->name, ETH_ADDR_ARGS(eth_dst)); > goto out; > @@ -935,7 +939,7 @@ bond_check_admissibility(struct bond *bond, const void *member_, > OVS_NOT_REACHED(); > out: > if (member && (verdict != BV_ACCEPT)) { > - VLOG_DBG_RL(&rl, "bond %s: member %s: " > + VLOG_DBG_RL(&rl, "%s: member %s: " > "admissibility verdict is to drop pkt%s, " > "active member: %s, may_enable: %s, enabled: %s, " > "LACP status: %s", > @@ -1127,8 +1131,13 @@ log_bals(struct bond *bond, const struct ovs_list *bals) > if (ds.length) { > ds_put_char(&ds, ','); > } > - ds_put_format(&ds, " %s %"PRIu64"kB", > - member->name, member->tx_bytes / 1024); > + if (member->tx_bytes > 1024) { > + ds_put_format(&ds, " %s %"PRIu64" kB", > + member->name, member->tx_bytes / 1024); > + } else { > + ds_put_format(&ds, " %s %"PRIu64" B", Many interface names end with numbers, and things like "dpdk0 0 B" are barely readable, IMHO. Could you, please, remove the extra space between the number and 'kB'/'B' here and other places? > + member->name, member->tx_bytes); > + } > > if (!member->enabled) { > ds_put_cstr(&ds, " (disabled)"); > @@ -1141,13 +1150,20 @@ log_bals(struct bond *bond, const struct ovs_list *bals) > if (&e->list_node != ovs_list_front(&member->entries)) { > ds_put_cstr(&ds, " + "); > } > - ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB", > - e - bond->hash, e->tx_bytes / 1024); > + if (e->tx_bytes > 1024) { > + ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" kB", > + e - bond->hash, e->tx_bytes / 1024); > + } else { > + ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" B", > + e - bond->hash, e->tx_bytes); > + } > } > ds_put_cstr(&ds, ")"); > } > } > - VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds)); > + if (ds.length) { > + VLOG_DBG("%s: %s", bond->name, ds_cstr(&ds)); > + } > ds_destroy(&ds); > } > } > @@ -1161,14 +1177,6 @@ bond_shift_load(struct bond_entry *hash, struct bond_member *to) > struct bond *bond = from->bond; > uint64_t delta = hash->tx_bytes; > > - VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %"PRIdPTR") " > - "from %s to %s (now carrying %"PRIu64"kB and " > - "%"PRIu64"kB load, respectively)", > - bond->name, delta / 1024, hash - bond->hash, > - from->name, to->name, > - (from->tx_bytes - delta) / 1024, > - (to->tx_bytes + delta) / 1024); > - > /* Shift load away from 'from' to 'to'. */ > from->tx_bytes -= delta; > to->tx_bytes += delta; > @@ -1345,6 +1353,7 @@ bond_rebalance(struct bond *bond) > insert_bal(&bals, member); > } > } > + > log_bals(bond, &bals); > > /* Shift load from the most-loaded members to the least-loaded members. */ > @@ -1356,6 +1365,7 @@ bond_rebalance(struct bond *bond) > uint64_t overload; > > overload = from->tx_bytes - to->tx_bytes; > + > if (overload < to->tx_bytes >> 5 || overload < 100000) { > /* The extra load on 'from' (and all less-loaded members), compared > * to that of 'to' (the least-loaded member), is less than ~3%, or > @@ -1388,6 +1398,12 @@ bond_rebalance(struct bond *bond) > reinsert_bal(&bals, from); > reinsert_bal(&bals, to); > } > + VLOG_INFO("%s: shifted %"PRIuSIZE" hashes totaling %"PRIu64 > + " kB from %s (%"PRIu64" kB) to %s (%"PRIu64" kB)", > + bond->name, cnt, > + (overload + to->tx_bytes - from->tx_bytes) >> 1, It would be easier to understand if we will just divide by 2 here. > + from->name, from->tx_bytes, to->name, to->tx_bytes); These are in bytes, but reported as kB. > + > rebalanced = true; > } > > @@ -1565,6 +1581,9 @@ bond_print_details(struct ds *ds, const struct bond *bond) > if (be_tx_k) { > ds_put_format(ds, " hash %d: %"PRIu64" kB load\n", > hash, be_tx_k); > + } else if (be->tx_bytes) { > + ds_put_format(ds, " hash %d: %"PRIu64" B load\n", > + hash, be->tx_bytes); > } > > /* XXX How can we list the MACs assigned to hashes of SLB bonds? */ > @@ -1691,7 +1710,7 @@ bond_unixctl_set_active_member(struct unixctl_conn *conn, > if (bond->active_member != member) { > bond->bond_revalidate = true; > bond->active_member = member; > - VLOG_INFO("bond %s: active member is now %s", > + VLOG_INFO("%s: active member is now %s", > bond->name, member->name); > bond->send_learning_packets = true; > unixctl_command_reply(conn, "done"); > @@ -1869,7 +1888,7 @@ bond_enable_member(struct bond_member *member, bool enable) > } > ovs_mutex_unlock(&member->bond->mutex); > > - VLOG_INFO("member %s: %s", member->name, > + VLOG_INFO("%s: member %s %s", bond->name, member->name, > member->enabled ? "enabled" : "disabled"); > } > } > @@ -1879,23 +1898,31 @@ bond_link_status_update(struct bond_member *member) > { > struct bond *bond = member->bond; > bool up; > + bool carrier = netdev_get_carrier(member->netdev); Reverse xmass tree, please. It might make sense to initialize it on a separate line. > > - up = netdev_get_carrier(member->netdev) && member->may_enable; > + up = carrier && member->may_enable; > if ((up == member->enabled) != (member->delay_expires == LLONG_MAX)) { > + char * msg; const char *msg; An empty line after the declaration would be nice. > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > - VLOG_INFO_RL(&rl, "member %s: link state %s", > - member->name, up ? "up" : "down"); > + if (!carrier) { > + msg = "carrier state"; > + } else { > + msg = "link state"; > + } > + VLOG_INFO_RL(&rl, "%s: member %s %s %s", bond->name, > + member->name, msg, up ? "up" : "down"); > if (up == member->enabled) { > member->delay_expires = LLONG_MAX; > - VLOG_INFO_RL(&rl, "member %s: will not be %s", > - member->name, up ? "disabled" : "enabled"); > + VLOG_INFO_RL(&rl, "%s: member %s will not be %s", > + bond->name, member->name, > + up ? "disabled" : "enabled"); > } else { > int delay = up ? bond->updelay : bond->downdelay; > member->delay_expires = time_msec() + delay; > if (delay) { > - VLOG_INFO_RL(&rl, "member %s: will be %s if it stays %s " > - "for %d ms", > - member->name, > + VLOG_INFO_RL(&rl, "%s: member %s will be %s" > + " if it stays %s for %d ms", > + bond->name, member->name, > up ? "enabled" : "disabled", > up ? "up" : "down", > delay); > @@ -1952,14 +1979,15 @@ choose_output_member(const struct bond *bond, const struct flow *flow, > { > struct bond_entry *e; > int balance; > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); Reverse xmass tree. > > balance = bond->balance; > if (bond->lacp_status == LACP_CONFIGURED) { > /* LACP has been configured on this bond but negotiations were > - * unsuccussful. If lacp_fallback_ab is enabled use active- > + * unsuccessful. If lacp_fallback_ab is enabled use active- > * backup mode else drop all traffic. */ > if (!bond->lacp_fallback_ab) { > - return NULL; > + goto error; > } > balance = BM_AB; > } > @@ -1971,7 +1999,7 @@ choose_output_member(const struct bond *bond, const struct flow *flow, > case BM_TCP: > if (bond->lacp_status != LACP_NEGOTIATED) { > /* Must have LACP negotiations for TCP balanced bonds. */ > - return NULL; > + goto error; > } > if (wc) { > flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP); > @@ -1990,6 +2018,18 @@ choose_output_member(const struct bond *bond, const struct flow *flow, > default: > OVS_NOT_REACHED(); > } > +error: > + if (VLOG_IS_DBG_ENABLED()) { if (!VLOG_DROP_DBG(&rl)) { > + struct ds ds = DS_EMPTY_INITIALIZER; Empty line after the declaration. > + flow_format(&ds, flow, NULL); > + VLOG_DBG_RL(&rl, "%s: cannot choose output member " And remove the RL here. > + "for flow %s with LACP status %s", > + bond->name, > + ds_cstr(&ds), > + lacp_status_description(bond->lacp_status)); > + ds_destroy(&ds); > + } > + return NULL; > } > > static struct bond_member * > @@ -2039,10 +2079,10 @@ bond_choose_active_member(struct bond *bond) > bond->active_member = bond_choose_member(bond); > if (bond->active_member) { > if (bond->active_member->enabled) { > - VLOG_INFO_RL(&rl, "bond %s: active member is now %s", > + VLOG_INFO_RL(&rl, "%s: active member is now %s", > bond->name, bond->active_member->name); > } else { > - VLOG_INFO_RL(&rl, "bond %s: active member is now %s, skipping " > + VLOG_INFO_RL(&rl, "%s: active member is now %s, skipping " > "remaining %lld ms updelay (since no member was " > "enabled)", bond->name, bond->active_member->name, > bond->active_member->delay_expires - time_msec()); > @@ -2056,7 +2096,7 @@ bond_choose_active_member(struct bond *bond) > } > } else if (old_active_member) { > bond_active_member_changed(bond); > - VLOG_INFO_RL(&rl, "bond %s: all members disabled", bond->name); > + VLOG_INFO_RL(&rl, "%s: all members disabled", bond->name); > } > } >
On Tue, Aug 30, 2022 at 8:52 AM Ilya Maximets <i.maximets@ovn.org> wrote: > > On 6/22/22 16:28, Mike Pattrick wrote: > > Add additional logging for debug and info level with a focus on code > > related to bond members coming up, go down, and changing. > > > > Several existing log messages were modified to handle sub 1kB log > > messages with more grace. Instead of reporting 0kB of traffic load > > shifting from one member to another, we now suppress these messages at > > the INFO level and display exact byte count at the debug level. > > > > Signed-off-by: Mike Pattrick <mkp@redhat.com> > > > > -- > > > > Since v4: > > - Removed prefixes > > - Removed unrequired conditional > > > > Since v3: > > - Removed log if bond members were significantly deviant > > - Normalized log prefixes, including LACP > > - Added details to the member enable/disable log > > > > Since v2: > > - Normalized log capitalization and periods > > - Summarized info level hash transfers > > - Added log if bond members were significantly deviant > > > > Signed-off-by: Mike Pattrick <mkp@redhat.com> > > --- > > Thanks for the patch, Mike! > > See some comments inline, besides the ones that David already made. > > Best regards, Ilya Maximets. Thank you for the reviews, I'll send in a new version. Cheers, M > > > lib/lacp.c | 26 +++++++++--- > > ofproto/bond.c | 110 +++++++++++++++++++++++++++++++++---------------- > > 2 files changed, 96 insertions(+), 40 deletions(-) > > > > diff --git a/lib/lacp.c b/lib/lacp.c > > index 3252f17eb..bca347129 100644 > > --- a/lib/lacp.c > > +++ b/lib/lacp.c > > @@ -367,12 +367,12 @@ lacp_process_packet(struct lacp *lacp, const void *member_, > > break; > > case SUBTYPE_MARKER: > > member->count_rx_pdus_marker++; > > - VLOG_DBG("%s: received a LACP marker PDU.", lacp->name); > > + VLOG_DBG("%s: received a LACP marker PDU", lacp->name); > > goto out; > > case SUBTYPE_UNUSED: > > default: > > member->count_rx_pdus_bad++; > > - VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU.", > > + VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU", > > lacp->name); > > goto out; > > } > > @@ -382,11 +382,16 @@ lacp_process_packet(struct lacp *lacp, const void *member_, > > * trigger re-checking of L1 state. */ > > if (!member->carrier_up) { > > VLOG_INFO_RL(&rl, "%s: carrier state is DOWN," > > - " dropping received LACP PDU.", member->name); > > + " dropping received LACP PDU", member->name); > > seq_change(connectivity_seq_get()); > > goto out; > > } > > > > + if (member->status == LACP_DEFAULTED) { > > + VLOG_INFO("%s: defaulted member %s reestablished connection with" > > + " LACP partner", lacp->name, member->name); > > + } > > + > > member->status = LACP_CURRENT; > > tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX; > > timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate); > > @@ -465,9 +470,16 @@ lacp_member_register(struct lacp *lacp, void *member_, > > if (!lacp->key_member) { > > lacp->key_member = member; > > } > > + > > + VLOG_DBG("%s: member %s added", lacp->name, s->name); > > } > > > > if (!member->name || strcmp(s->name, member->name)) { > > + if (member->name) { > > + VLOG_DBG("%s: member %s renamed from %s", lacp->name, > > + s->name, member->name); > > + } > > + > > free(member->name); > > member->name = xstrdup(s->name); > > } > > @@ -531,6 +543,9 @@ lacp_member_carrier_changed(const struct lacp *lacp, const void *member_, > > member->count_carrier_changed++; > > } > > > > + VLOG_DBG("%s: member %s changed state to %s", lacp->name, > > + member->name, carrier_up ? "up" : "down"); > > + > > out: > > lacp_unlock(); > > } > > @@ -710,8 +725,9 @@ lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex) > > /* XXX: In the future allow users to configure the expected system ID. > > * For now just special case loopback. */ > > if (eth_addr_equals(member->partner.sys_id, member->lacp->sys_id)) { > > - VLOG_WARN_RL(&rl, "member %s: Loopback detected. Interface is " > > - "connected to its own bond", member->name); > > + VLOG_WARN_RL(&rl, "%s: member %s: Loopback detected. " > > + "Interface is connected to its own bond", > > + lacp->name, member->name); > > continue; > > } > > > > diff --git a/ofproto/bond.c b/ofproto/bond.c > > index 845f69e21..13702950d 100644 > > --- a/ofproto/bond.c > > +++ b/ofproto/bond.c > > @@ -386,7 +386,8 @@ update_recirc_rules__(struct bond *bond) > > char *err_s = match_to_string(&pr_op->match, NULL, > > RECIRC_RULE_PRIORITY); > > > > - VLOG_ERR("failed to add post recirculation flow %s", err_s); > > + VLOG_ERR("%s: failed to add post recirculation flow %s", > > + bond->name, err_s); > > free(err_s); > > } > > break; > > @@ -399,7 +400,8 @@ update_recirc_rules__(struct bond *bond) > > char *err_s = match_to_string(&pr_op->match, NULL, > > RECIRC_RULE_PRIORITY); > > > > - VLOG_ERR("failed to remove post recirculation flow %s", err_s); > > + VLOG_ERR("%s: failed to remove post recirculation flow " > > + "%s", bond->name, err_s); > > There seems to be enough space to keep the '%s' on a previous line. > > > free(err_s); > > } > > > > @@ -496,7 +498,7 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s) > > if (s->use_lb_output_action && > > !ovs_lb_output_action_supported(bond->ofproto)) { > > VLOG_WARN("%s: Datapath does not support 'lb_output' action, " > > - "disabled.", bond->name); > > + "disabled", bond->name); > > } else { > > bond->use_lb_output_action = s->use_lb_output_action; > > if (!bond->use_lb_output_action) { > > @@ -855,6 +857,8 @@ bond_check_admissibility(struct bond *bond, const void *member_, > > ovs_rwlock_rdlock(&rwlock); > > member = bond_member_lookup(bond, member_); > > if (!member) { > > + VLOG_DBG_RL(&rl, "%s: lookup for member on bond failed", > > + bond->name); > > goto out; > > } > > > > @@ -874,7 +878,7 @@ bond_check_admissibility(struct bond *bond, const void *member_, > > * main thread to run LACP state machine and enable the member. */ > > verdict = (member->enabled || member->may_enable) ? BV_ACCEPT : BV_DROP; > > if (!member->enabled && member->may_enable) { > > - VLOG_DBG_RL(&rl, "bond %s: member %s: " > > + VLOG_DBG_RL(&rl, "%s: member %s: " > > "main thread has not yet enabled member", > > bond->name, bond->active_member->name); > > } > > @@ -913,7 +917,7 @@ bond_check_admissibility(struct bond *bond, const void *member_, > > /* Drop all packets which arrive on backup members. This is similar to > > * how Linux bonding handles active-backup bonds. */ > > if (bond->active_member != member) { > > - VLOG_DBG_RL(&rl, "bond %s: member %s: active-backup bond received " > > + VLOG_DBG_RL(&rl, "%s: member %s: active-backup bond received " > > "packet on backup member destined for " ETH_ADDR_FMT, > > bond->name, member->name, ETH_ADDR_ARGS(eth_dst)); > > goto out; > > @@ -935,7 +939,7 @@ bond_check_admissibility(struct bond *bond, const void *member_, > > OVS_NOT_REACHED(); > > out: > > if (member && (verdict != BV_ACCEPT)) { > > - VLOG_DBG_RL(&rl, "bond %s: member %s: " > > + VLOG_DBG_RL(&rl, "%s: member %s: " > > "admissibility verdict is to drop pkt%s, " > > "active member: %s, may_enable: %s, enabled: %s, " > > "LACP status: %s", > > @@ -1127,8 +1131,13 @@ log_bals(struct bond *bond, const struct ovs_list *bals) > > if (ds.length) { > > ds_put_char(&ds, ','); > > } > > - ds_put_format(&ds, " %s %"PRIu64"kB", > > - member->name, member->tx_bytes / 1024); > > + if (member->tx_bytes > 1024) { > > + ds_put_format(&ds, " %s %"PRIu64" kB", > > + member->name, member->tx_bytes / 1024); > > + } else { > > + ds_put_format(&ds, " %s %"PRIu64" B", > > Many interface names end with numbers, and things like "dpdk0 0 B" > are barely readable, IMHO. Could you, please, remove the extra space > between the number and 'kB'/'B' here and other places? > > > + member->name, member->tx_bytes); > > + } > > > > if (!member->enabled) { > > ds_put_cstr(&ds, " (disabled)"); > > @@ -1141,13 +1150,20 @@ log_bals(struct bond *bond, const struct ovs_list *bals) > > if (&e->list_node != ovs_list_front(&member->entries)) { > > ds_put_cstr(&ds, " + "); > > } > > - ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB", > > - e - bond->hash, e->tx_bytes / 1024); > > + if (e->tx_bytes > 1024) { > > + ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" kB", > > + e - bond->hash, e->tx_bytes / 1024); > > + } else { > > + ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" B", > > + e - bond->hash, e->tx_bytes); > > + } > > } > > ds_put_cstr(&ds, ")"); > > } > > } > > - VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds)); > > + if (ds.length) { > > + VLOG_DBG("%s: %s", bond->name, ds_cstr(&ds)); > > + } > > ds_destroy(&ds); > > } > > } > > @@ -1161,14 +1177,6 @@ bond_shift_load(struct bond_entry *hash, struct bond_member *to) > > struct bond *bond = from->bond; > > uint64_t delta = hash->tx_bytes; > > > > - VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %"PRIdPTR") " > > - "from %s to %s (now carrying %"PRIu64"kB and " > > - "%"PRIu64"kB load, respectively)", > > - bond->name, delta / 1024, hash - bond->hash, > > - from->name, to->name, > > - (from->tx_bytes - delta) / 1024, > > - (to->tx_bytes + delta) / 1024); > > - > > /* Shift load away from 'from' to 'to'. */ > > from->tx_bytes -= delta; > > to->tx_bytes += delta; > > @@ -1345,6 +1353,7 @@ bond_rebalance(struct bond *bond) > > insert_bal(&bals, member); > > } > > } > > + > > log_bals(bond, &bals); > > > > /* Shift load from the most-loaded members to the least-loaded members. */ > > @@ -1356,6 +1365,7 @@ bond_rebalance(struct bond *bond) > > uint64_t overload; > > > > overload = from->tx_bytes - to->tx_bytes; > > + > > if (overload < to->tx_bytes >> 5 || overload < 100000) { > > /* The extra load on 'from' (and all less-loaded members), compared > > * to that of 'to' (the least-loaded member), is less than ~3%, or > > @@ -1388,6 +1398,12 @@ bond_rebalance(struct bond *bond) > > reinsert_bal(&bals, from); > > reinsert_bal(&bals, to); > > } > > + VLOG_INFO("%s: shifted %"PRIuSIZE" hashes totaling %"PRIu64 > > + " kB from %s (%"PRIu64" kB) to %s (%"PRIu64" kB)", > > + bond->name, cnt, > > + (overload + to->tx_bytes - from->tx_bytes) >> 1, > > It would be easier to understand if we will just divide by 2 here. > > > + from->name, from->tx_bytes, to->name, to->tx_bytes); > > These are in bytes, but reported as kB. > > > + > > rebalanced = true; > > } > > > > @@ -1565,6 +1581,9 @@ bond_print_details(struct ds *ds, const struct bond *bond) > > if (be_tx_k) { > > ds_put_format(ds, " hash %d: %"PRIu64" kB load\n", > > hash, be_tx_k); > > + } else if (be->tx_bytes) { > > + ds_put_format(ds, " hash %d: %"PRIu64" B load\n", > > + hash, be->tx_bytes); > > } > > > > /* XXX How can we list the MACs assigned to hashes of SLB bonds? */ > > @@ -1691,7 +1710,7 @@ bond_unixctl_set_active_member(struct unixctl_conn *conn, > > if (bond->active_member != member) { > > bond->bond_revalidate = true; > > bond->active_member = member; > > - VLOG_INFO("bond %s: active member is now %s", > > + VLOG_INFO("%s: active member is now %s", > > bond->name, member->name); > > bond->send_learning_packets = true; > > unixctl_command_reply(conn, "done"); > > @@ -1869,7 +1888,7 @@ bond_enable_member(struct bond_member *member, bool enable) > > } > > ovs_mutex_unlock(&member->bond->mutex); > > > > - VLOG_INFO("member %s: %s", member->name, > > + VLOG_INFO("%s: member %s %s", bond->name, member->name, > > member->enabled ? "enabled" : "disabled"); > > } > > } > > @@ -1879,23 +1898,31 @@ bond_link_status_update(struct bond_member *member) > > { > > struct bond *bond = member->bond; > > bool up; > > + bool carrier = netdev_get_carrier(member->netdev); > > Reverse xmass tree, please. It might make sense to initialize it > on a separate line. > > > > > - up = netdev_get_carrier(member->netdev) && member->may_enable; > > + up = carrier && member->may_enable; > > if ((up == member->enabled) != (member->delay_expires == LLONG_MAX)) { > > + char * msg; > > const char *msg; > > An empty line after the declaration would be nice. > > > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > > - VLOG_INFO_RL(&rl, "member %s: link state %s", > > - member->name, up ? "up" : "down"); > > + if (!carrier) { > > + msg = "carrier state"; > > + } else { > > + msg = "link state"; > > + } > > + VLOG_INFO_RL(&rl, "%s: member %s %s %s", bond->name, > > + member->name, msg, up ? "up" : "down"); > > if (up == member->enabled) { > > member->delay_expires = LLONG_MAX; > > - VLOG_INFO_RL(&rl, "member %s: will not be %s", > > - member->name, up ? "disabled" : "enabled"); > > + VLOG_INFO_RL(&rl, "%s: member %s will not be %s", > > + bond->name, member->name, > > + up ? "disabled" : "enabled"); > > } else { > > int delay = up ? bond->updelay : bond->downdelay; > > member->delay_expires = time_msec() + delay; > > if (delay) { > > - VLOG_INFO_RL(&rl, "member %s: will be %s if it stays %s " > > - "for %d ms", > > - member->name, > > + VLOG_INFO_RL(&rl, "%s: member %s will be %s" > > + " if it stays %s for %d ms", > > + bond->name, member->name, > > up ? "enabled" : "disabled", > > up ? "up" : "down", > > delay); > > @@ -1952,14 +1979,15 @@ choose_output_member(const struct bond *bond, const struct flow *flow, > > { > > struct bond_entry *e; > > int balance; > > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > > Reverse xmass tree. > > > > > balance = bond->balance; > > if (bond->lacp_status == LACP_CONFIGURED) { > > /* LACP has been configured on this bond but negotiations were > > - * unsuccussful. If lacp_fallback_ab is enabled use active- > > + * unsuccessful. If lacp_fallback_ab is enabled use active- > > * backup mode else drop all traffic. */ > > if (!bond->lacp_fallback_ab) { > > - return NULL; > > + goto error; > > } > > balance = BM_AB; > > } > > @@ -1971,7 +1999,7 @@ choose_output_member(const struct bond *bond, const struct flow *flow, > > case BM_TCP: > > if (bond->lacp_status != LACP_NEGOTIATED) { > > /* Must have LACP negotiations for TCP balanced bonds. */ > > - return NULL; > > + goto error; > > } > > if (wc) { > > flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP); > > @@ -1990,6 +2018,18 @@ choose_output_member(const struct bond *bond, const struct flow *flow, > > default: > > OVS_NOT_REACHED(); > > } > > +error: > > + if (VLOG_IS_DBG_ENABLED()) { > > if (!VLOG_DROP_DBG(&rl)) { > > > + struct ds ds = DS_EMPTY_INITIALIZER; > > Empty line after the declaration. > > > + flow_format(&ds, flow, NULL); > > + VLOG_DBG_RL(&rl, "%s: cannot choose output member " > > And remove the RL here. > > > + "for flow %s with LACP status %s", > > + bond->name, > > + ds_cstr(&ds), > > + lacp_status_description(bond->lacp_status)); > > + ds_destroy(&ds); > > + } > > + return NULL; > > } > > > > static struct bond_member * > > @@ -2039,10 +2079,10 @@ bond_choose_active_member(struct bond *bond) > > bond->active_member = bond_choose_member(bond); > > if (bond->active_member) { > > if (bond->active_member->enabled) { > > - VLOG_INFO_RL(&rl, "bond %s: active member is now %s", > > + VLOG_INFO_RL(&rl, "%s: active member is now %s", > > bond->name, bond->active_member->name); > > } else { > > - VLOG_INFO_RL(&rl, "bond %s: active member is now %s, skipping " > > + VLOG_INFO_RL(&rl, "%s: active member is now %s, skipping " > > "remaining %lld ms updelay (since no member was " > > "enabled)", bond->name, bond->active_member->name, > > bond->active_member->delay_expires - time_msec()); > > @@ -2056,7 +2096,7 @@ bond_choose_active_member(struct bond *bond) > > } > > } else if (old_active_member) { > > bond_active_member_changed(bond); > > - VLOG_INFO_RL(&rl, "bond %s: all members disabled", bond->name); > > + VLOG_INFO_RL(&rl, "%s: all members disabled", bond->name); > > } > > } > > >
diff --git a/lib/lacp.c b/lib/lacp.c index 3252f17eb..bca347129 100644 --- a/lib/lacp.c +++ b/lib/lacp.c @@ -367,12 +367,12 @@ lacp_process_packet(struct lacp *lacp, const void *member_, break; case SUBTYPE_MARKER: member->count_rx_pdus_marker++; - VLOG_DBG("%s: received a LACP marker PDU.", lacp->name); + VLOG_DBG("%s: received a LACP marker PDU", lacp->name); goto out; case SUBTYPE_UNUSED: default: member->count_rx_pdus_bad++; - VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU.", + VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU", lacp->name); goto out; } @@ -382,11 +382,16 @@ lacp_process_packet(struct lacp *lacp, const void *member_, * trigger re-checking of L1 state. */ if (!member->carrier_up) { VLOG_INFO_RL(&rl, "%s: carrier state is DOWN," - " dropping received LACP PDU.", member->name); + " dropping received LACP PDU", member->name); seq_change(connectivity_seq_get()); goto out; } + if (member->status == LACP_DEFAULTED) { + VLOG_INFO("%s: defaulted member %s reestablished connection with" + " LACP partner", lacp->name, member->name); + } + member->status = LACP_CURRENT; tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX; timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate); @@ -465,9 +470,16 @@ lacp_member_register(struct lacp *lacp, void *member_, if (!lacp->key_member) { lacp->key_member = member; } + + VLOG_DBG("%s: member %s added", lacp->name, s->name); } if (!member->name || strcmp(s->name, member->name)) { + if (member->name) { + VLOG_DBG("%s: member %s renamed from %s", lacp->name, + s->name, member->name); + } + free(member->name); member->name = xstrdup(s->name); } @@ -531,6 +543,9 @@ lacp_member_carrier_changed(const struct lacp *lacp, const void *member_, member->count_carrier_changed++; } + VLOG_DBG("%s: member %s changed state to %s", lacp->name, + member->name, carrier_up ? "up" : "down"); + out: lacp_unlock(); } @@ -710,8 +725,9 @@ lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex) /* XXX: In the future allow users to configure the expected system ID. * For now just special case loopback. */ if (eth_addr_equals(member->partner.sys_id, member->lacp->sys_id)) { - VLOG_WARN_RL(&rl, "member %s: Loopback detected. Interface is " - "connected to its own bond", member->name); + VLOG_WARN_RL(&rl, "%s: member %s: Loopback detected. " + "Interface is connected to its own bond", + lacp->name, member->name); continue; } diff --git a/ofproto/bond.c b/ofproto/bond.c index 845f69e21..13702950d 100644 --- a/ofproto/bond.c +++ b/ofproto/bond.c @@ -386,7 +386,8 @@ update_recirc_rules__(struct bond *bond) char *err_s = match_to_string(&pr_op->match, NULL, RECIRC_RULE_PRIORITY); - VLOG_ERR("failed to add post recirculation flow %s", err_s); + VLOG_ERR("%s: failed to add post recirculation flow %s", + bond->name, err_s); free(err_s); } break; @@ -399,7 +400,8 @@ update_recirc_rules__(struct bond *bond) char *err_s = match_to_string(&pr_op->match, NULL, RECIRC_RULE_PRIORITY); - VLOG_ERR("failed to remove post recirculation flow %s", err_s); + VLOG_ERR("%s: failed to remove post recirculation flow " + "%s", bond->name, err_s); free(err_s); } @@ -496,7 +498,7 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s) if (s->use_lb_output_action && !ovs_lb_output_action_supported(bond->ofproto)) { VLOG_WARN("%s: Datapath does not support 'lb_output' action, " - "disabled.", bond->name); + "disabled", bond->name); } else { bond->use_lb_output_action = s->use_lb_output_action; if (!bond->use_lb_output_action) { @@ -855,6 +857,8 @@ bond_check_admissibility(struct bond *bond, const void *member_, ovs_rwlock_rdlock(&rwlock); member = bond_member_lookup(bond, member_); if (!member) { + VLOG_DBG_RL(&rl, "%s: lookup for member on bond failed", + bond->name); goto out; } @@ -874,7 +878,7 @@ bond_check_admissibility(struct bond *bond, const void *member_, * main thread to run LACP state machine and enable the member. */ verdict = (member->enabled || member->may_enable) ? BV_ACCEPT : BV_DROP; if (!member->enabled && member->may_enable) { - VLOG_DBG_RL(&rl, "bond %s: member %s: " + VLOG_DBG_RL(&rl, "%s: member %s: " "main thread has not yet enabled member", bond->name, bond->active_member->name); } @@ -913,7 +917,7 @@ bond_check_admissibility(struct bond *bond, const void *member_, /* Drop all packets which arrive on backup members. This is similar to * how Linux bonding handles active-backup bonds. */ if (bond->active_member != member) { - VLOG_DBG_RL(&rl, "bond %s: member %s: active-backup bond received " + VLOG_DBG_RL(&rl, "%s: member %s: active-backup bond received " "packet on backup member destined for " ETH_ADDR_FMT, bond->name, member->name, ETH_ADDR_ARGS(eth_dst)); goto out; @@ -935,7 +939,7 @@ bond_check_admissibility(struct bond *bond, const void *member_, OVS_NOT_REACHED(); out: if (member && (verdict != BV_ACCEPT)) { - VLOG_DBG_RL(&rl, "bond %s: member %s: " + VLOG_DBG_RL(&rl, "%s: member %s: " "admissibility verdict is to drop pkt%s, " "active member: %s, may_enable: %s, enabled: %s, " "LACP status: %s", @@ -1127,8 +1131,13 @@ log_bals(struct bond *bond, const struct ovs_list *bals) if (ds.length) { ds_put_char(&ds, ','); } - ds_put_format(&ds, " %s %"PRIu64"kB", - member->name, member->tx_bytes / 1024); + if (member->tx_bytes > 1024) { + ds_put_format(&ds, " %s %"PRIu64" kB", + member->name, member->tx_bytes / 1024); + } else { + ds_put_format(&ds, " %s %"PRIu64" B", + member->name, member->tx_bytes); + } if (!member->enabled) { ds_put_cstr(&ds, " (disabled)"); @@ -1141,13 +1150,20 @@ log_bals(struct bond *bond, const struct ovs_list *bals) if (&e->list_node != ovs_list_front(&member->entries)) { ds_put_cstr(&ds, " + "); } - ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB", - e - bond->hash, e->tx_bytes / 1024); + if (e->tx_bytes > 1024) { + ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" kB", + e - bond->hash, e->tx_bytes / 1024); + } else { + ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" B", + e - bond->hash, e->tx_bytes); + } } ds_put_cstr(&ds, ")"); } } - VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds)); + if (ds.length) { + VLOG_DBG("%s: %s", bond->name, ds_cstr(&ds)); + } ds_destroy(&ds); } } @@ -1161,14 +1177,6 @@ bond_shift_load(struct bond_entry *hash, struct bond_member *to) struct bond *bond = from->bond; uint64_t delta = hash->tx_bytes; - VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %"PRIdPTR") " - "from %s to %s (now carrying %"PRIu64"kB and " - "%"PRIu64"kB load, respectively)", - bond->name, delta / 1024, hash - bond->hash, - from->name, to->name, - (from->tx_bytes - delta) / 1024, - (to->tx_bytes + delta) / 1024); - /* Shift load away from 'from' to 'to'. */ from->tx_bytes -= delta; to->tx_bytes += delta; @@ -1345,6 +1353,7 @@ bond_rebalance(struct bond *bond) insert_bal(&bals, member); } } + log_bals(bond, &bals); /* Shift load from the most-loaded members to the least-loaded members. */ @@ -1356,6 +1365,7 @@ bond_rebalance(struct bond *bond) uint64_t overload; overload = from->tx_bytes - to->tx_bytes; + if (overload < to->tx_bytes >> 5 || overload < 100000) { /* The extra load on 'from' (and all less-loaded members), compared * to that of 'to' (the least-loaded member), is less than ~3%, or @@ -1388,6 +1398,12 @@ bond_rebalance(struct bond *bond) reinsert_bal(&bals, from); reinsert_bal(&bals, to); } + VLOG_INFO("%s: shifted %"PRIuSIZE" hashes totaling %"PRIu64 + " kB from %s (%"PRIu64" kB) to %s (%"PRIu64" kB)", + bond->name, cnt, + (overload + to->tx_bytes - from->tx_bytes) >> 1, + from->name, from->tx_bytes, to->name, to->tx_bytes); + rebalanced = true; } @@ -1565,6 +1581,9 @@ bond_print_details(struct ds *ds, const struct bond *bond) if (be_tx_k) { ds_put_format(ds, " hash %d: %"PRIu64" kB load\n", hash, be_tx_k); + } else if (be->tx_bytes) { + ds_put_format(ds, " hash %d: %"PRIu64" B load\n", + hash, be->tx_bytes); } /* XXX How can we list the MACs assigned to hashes of SLB bonds? */ @@ -1691,7 +1710,7 @@ bond_unixctl_set_active_member(struct unixctl_conn *conn, if (bond->active_member != member) { bond->bond_revalidate = true; bond->active_member = member; - VLOG_INFO("bond %s: active member is now %s", + VLOG_INFO("%s: active member is now %s", bond->name, member->name); bond->send_learning_packets = true; unixctl_command_reply(conn, "done"); @@ -1869,7 +1888,7 @@ bond_enable_member(struct bond_member *member, bool enable) } ovs_mutex_unlock(&member->bond->mutex); - VLOG_INFO("member %s: %s", member->name, + VLOG_INFO("%s: member %s %s", bond->name, member->name, member->enabled ? "enabled" : "disabled"); } } @@ -1879,23 +1898,31 @@ bond_link_status_update(struct bond_member *member) { struct bond *bond = member->bond; bool up; + bool carrier = netdev_get_carrier(member->netdev); - up = netdev_get_carrier(member->netdev) && member->may_enable; + up = carrier && member->may_enable; if ((up == member->enabled) != (member->delay_expires == LLONG_MAX)) { + char * msg; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); - VLOG_INFO_RL(&rl, "member %s: link state %s", - member->name, up ? "up" : "down"); + if (!carrier) { + msg = "carrier state"; + } else { + msg = "link state"; + } + VLOG_INFO_RL(&rl, "%s: member %s %s %s", bond->name, + member->name, msg, up ? "up" : "down"); if (up == member->enabled) { member->delay_expires = LLONG_MAX; - VLOG_INFO_RL(&rl, "member %s: will not be %s", - member->name, up ? "disabled" : "enabled"); + VLOG_INFO_RL(&rl, "%s: member %s will not be %s", + bond->name, member->name, + up ? "disabled" : "enabled"); } else { int delay = up ? bond->updelay : bond->downdelay; member->delay_expires = time_msec() + delay; if (delay) { - VLOG_INFO_RL(&rl, "member %s: will be %s if it stays %s " - "for %d ms", - member->name, + VLOG_INFO_RL(&rl, "%s: member %s will be %s" + " if it stays %s for %d ms", + bond->name, member->name, up ? "enabled" : "disabled", up ? "up" : "down", delay); @@ -1952,14 +1979,15 @@ choose_output_member(const struct bond *bond, const struct flow *flow, { struct bond_entry *e; int balance; + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); balance = bond->balance; if (bond->lacp_status == LACP_CONFIGURED) { /* LACP has been configured on this bond but negotiations were - * unsuccussful. If lacp_fallback_ab is enabled use active- + * unsuccessful. If lacp_fallback_ab is enabled use active- * backup mode else drop all traffic. */ if (!bond->lacp_fallback_ab) { - return NULL; + goto error; } balance = BM_AB; } @@ -1971,7 +1999,7 @@ choose_output_member(const struct bond *bond, const struct flow *flow, case BM_TCP: if (bond->lacp_status != LACP_NEGOTIATED) { /* Must have LACP negotiations for TCP balanced bonds. */ - return NULL; + goto error; } if (wc) { flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP); @@ -1990,6 +2018,18 @@ choose_output_member(const struct bond *bond, const struct flow *flow, default: OVS_NOT_REACHED(); } +error: + if (VLOG_IS_DBG_ENABLED()) { + struct ds ds = DS_EMPTY_INITIALIZER; + flow_format(&ds, flow, NULL); + VLOG_DBG_RL(&rl, "%s: cannot choose output member " + "for flow %s with LACP status %s", + bond->name, + ds_cstr(&ds), + lacp_status_description(bond->lacp_status)); + ds_destroy(&ds); + } + return NULL; } static struct bond_member * @@ -2039,10 +2079,10 @@ bond_choose_active_member(struct bond *bond) bond->active_member = bond_choose_member(bond); if (bond->active_member) { if (bond->active_member->enabled) { - VLOG_INFO_RL(&rl, "bond %s: active member is now %s", + VLOG_INFO_RL(&rl, "%s: active member is now %s", bond->name, bond->active_member->name); } else { - VLOG_INFO_RL(&rl, "bond %s: active member is now %s, skipping " + VLOG_INFO_RL(&rl, "%s: active member is now %s, skipping " "remaining %lld ms updelay (since no member was " "enabled)", bond->name, bond->active_member->name, bond->active_member->delay_expires - time_msec()); @@ -2056,7 +2096,7 @@ bond_choose_active_member(struct bond *bond) } } else if (old_active_member) { bond_active_member_changed(bond); - VLOG_INFO_RL(&rl, "bond %s: all members disabled", bond->name); + VLOG_INFO_RL(&rl, "%s: all members disabled", bond->name); } }