Message ID | 3abdeea2a7124d62b1ec6f28fe48d0d14aac5315.1608128903.git.lorenzo.bianconi@redhat.com |
---|---|
State | Superseded |
Headers | show |
Series | introduce BFD support in ovn-controller | expand |
On Wed, Dec 16, 2020 at 8:14 PM Lorenzo Bianconi <lorenzo.bianconi@redhat.com> wrote: > > Introduce the capability to transmit BFD packets in ovn-controller. > Introduce BFD tables in nb/sb dbs in order to configure BFD parameters > (e.g. min_tx, min_rx, ..) for ovn-controller. > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> Hi Lorenzo, Please see below for a few comments. Thanks Numan > --- > controller/ovn-controller.c | 1 + > controller/pinctrl.c | 288 +++++++++++++++++++++++++++++++++++- > controller/pinctrl.h | 2 + > lib/ovn-l7.h | 19 +++ > northd/ovn-northd.c | 91 ++++++++++++ > ovn-nb.ovsschema | 24 ++- > ovn-nb.xml | 66 +++++++++ > ovn-sb.ovsschema | 27 +++- > ovn-sb.xml | 78 ++++++++++ > 9 files changed, 591 insertions(+), 5 deletions(-) > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > index b5f0c1315..84b3f85ad 100644 > --- a/controller/ovn-controller.c > +++ b/controller/ovn-controller.c > @@ -2864,6 +2864,7 @@ main(int argc, char *argv[]) > ovnsb_idl_loop.idl), > sbrec_service_monitor_table_get( > ovnsb_idl_loop.idl), > + sbrec_bfd_table_get(ovnsb_idl_loop.idl), > br_int, chassis, > &runtime_data->local_datapaths, > &runtime_data->active_tunnels); > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > index 7e3abf0a4..ae994b513 100644 > --- a/controller/pinctrl.c > +++ b/controller/pinctrl.c > @@ -323,6 +323,18 @@ put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits, > static void notify_pinctrl_main(void); > static void notify_pinctrl_handler(void); > > +static bool bfd_monitor_should_inject(void); > +static void bfd_monitor_wait(long long int timeout); > +static void bfd_monitor_init(void); > +static void bfd_monitor_destroy(void); > +static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) > + OVS_REQUIRES(pinctrl_mutex); > +static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, > + struct ovsdb_idl_index *sbrec_port_binding_by_name, > + const struct sbrec_chassis *chassis, > + const struct sset *active_tunnels) > + OVS_REQUIRES(pinctrl_mutex); > + > COVERAGE_DEFINE(pinctrl_drop_put_mac_binding); > COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map); > COVERAGE_DEFINE(pinctrl_drop_controller_event); > @@ -487,6 +499,7 @@ pinctrl_init(void) > ip_mcast_snoop_init(); > init_put_vport_bindings(); > init_svc_monitors(); > + bfd_monitor_init(); > pinctrl.br_int_name = NULL; > pinctrl_handler_seq = seq_create(); > pinctrl_main_seq = seq_create(); > @@ -3053,6 +3066,8 @@ pinctrl_handler(void *arg_) > swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION); > > while (!latch_is_set(&pctrl->pinctrl_thread_exit)) { > + long long int bfd_time = LLONG_MAX; > + > ovs_mutex_lock(&pinctrl_mutex); > pinctrl_rconn_setup(swconn, pctrl->br_int_name); > ip_mcast_snoop_run(); > @@ -3085,6 +3100,7 @@ pinctrl_handler(void *arg_) > send_ipv6_ras(swconn, &send_ipv6_ra_time); > send_ipv6_prefixd(swconn, &send_prefixd_time); > send_mac_binding_buffered_pkts(swconn); > + bfd_monitor_send_msg(swconn, &bfd_time); > ovs_mutex_unlock(&pinctrl_mutex); > > ip_mcast_querier_run(swconn, &send_mcast_query_time); > @@ -3102,6 +3118,7 @@ pinctrl_handler(void *arg_) > ip_mcast_querier_wait(send_mcast_query_time); > svc_monitors_wait(svc_monitors_next_run_time); > ipv6_prefixd_wait(send_prefixd_time); > + bfd_monitor_wait(bfd_time); > > new_seq = seq_read(pinctrl_handler_seq); > seq_wait(pinctrl_handler_seq, new_seq); > @@ -3149,6 +3166,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > const struct sbrec_dns_table *dns_table, > const struct sbrec_controller_event_table *ce_table, > const struct sbrec_service_monitor_table *svc_mon_table, > + const struct sbrec_bfd_table *bfd_table, > const struct ovsrec_bridge *br_int, > const struct sbrec_chassis *chassis, > const struct hmap *local_datapaths, > @@ -3179,6 +3197,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > local_datapaths); > sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name, > chassis); > + bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis, > + active_tunnels); > ovs_mutex_unlock(&pinctrl_mutex); > } > > @@ -3722,6 +3742,7 @@ pinctrl_destroy(void) > destroy_dns_cache(); > ip_mcast_snoop_destroy(); > destroy_svc_monitors(); > + bfd_monitor_destroy(); > seq_destroy(pinctrl_main_seq); > seq_destroy(pinctrl_handler_seq); > } > @@ -5525,7 +5546,8 @@ may_inject_pkts(void) > !shash_is_empty(&send_garp_rarp_data) || > ipv6_prefixd_should_inject() || > !ovs_list_is_empty(&mcast_query_list) || > - !ovs_list_is_empty(&buffered_mac_bindings)); > + !ovs_list_is_empty(&buffered_mac_bindings) || > + bfd_monitor_should_inject()); > } > > static void > @@ -6312,6 +6334,270 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, > > } > > +static struct hmap bfd_monitor_map; > + > +struct bfd_entry { > + struct hmap_node node; > + > + /* L2 source address */ > + struct eth_addr src_mac; > + /* IPv4 source address */ > + ovs_be32 ip_src; > + /* IPv4 destination address */ > + ovs_be32 ip_dst; > + /* RFC 5881 section 4 > + * The source port MUST be in the range 49152 through 65535. > + * The same UDP source port number MUST be used for all BFD > + * Control packets associated with a particular session. > + * The source port number SHOULD be unique among all BFD > + * sessions on the system > + */ > + uint16_t udp_src; > + ovs_be32 disc; > + > + int64_t port_key; > + int64_t metadata; > + > + long long int last_update; > + long long int next_tx; > +}; > + > +static void > +bfd_monitor_init(void) > +{ > + hmap_init(&bfd_monitor_map); > +} > + > +static void > +bfd_monitor_destroy(void) > +{ > + hmap_destroy(&bfd_monitor_map); I think you also need to remove the hmap nodes and free the 'struct bfd_entry' objects before destroying the hmap. > +} > + > +static struct bfd_entry * > +pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port) > +{ > + struct bfd_entry *entry; > + HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0), > + &bfd_monitor_map) { > + if (entry->udp_src == port) { > + return entry; > + } > + } > + return NULL; > +} > + > +static bool > +bfd_monitor_should_inject(void) > +{ > + long long int cur_time = time_msec(); > + struct bfd_entry *entry; > + > + HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { > + if (entry->next_tx < cur_time) { > + return true; > + } > + } > + return false; > +} > + > +static void > +bfd_monitor_wait(long long int timeout) > +{ > + if (!hmap_is_empty(&bfd_monitor_map)) { > + poll_timer_wait_until(timeout); > + } > +} > + > +static void > +bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet) > +{ > + struct udp_header *udp; > + struct bfd_msg *msg; > + > + dp_packet_reserve(packet, 2); > + struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth); > + eth->eth_dst = eth_addr_broadcast; > + eth->eth_src = entry->src_mac; > + eth->eth_type = htons(ETH_TYPE_IP); > + > + struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip); > + ip->ip_ihl_ver = IP_IHL_VER(5, 4); > + ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg); > + ip->ip_ttl = MAXTTL; > + ip->ip_tos = IPTOS_PREC_INTERNETCONTROL; > + ip->ip_proto = IPPROTO_UDP; > + put_16aligned_be32(&ip->ip_src, entry->ip_src); > + put_16aligned_be32(&ip->ip_dst, entry->ip_dst); > + /* Checksum has already been zeroed by put_zeros call. */ > + ip->ip_csum = csum(ip, sizeof *ip); > + > + udp = dp_packet_put_zeros(packet, sizeof *udp); > + udp->udp_src = htons(entry->udp_src); > + udp->udp_dst = htons(BFD_DEST_PORT); > + udp->udp_len = htons(sizeof *udp + sizeof *msg); > + > + msg = dp_packet_put_uninit(packet, sizeof *msg); > + msg->vers_diag = (BFD_VERSION << 5); > + msg->length = BFD_PACKET_LEN; > +} > + > +static void > +bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) > + OVS_REQUIRES(pinctrl_mutex) > +{ > + long long int cur_time = time_msec(); > + struct bfd_entry *entry; > + > + HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { > + if (cur_time < entry->next_tx) { > + goto next; > + } > + > + uint64_t packet_stub[256 / 8]; > + struct dp_packet packet; > + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); > + bfd_monitor_put_bfd_msg(entry, &packet); > + > + uint64_t ofpacts_stub[4096 / 8]; > + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); > + > + /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */ > + uint32_t dp_key = entry->metadata; > + uint32_t port_key = entry->port_key; > + put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); > + put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts); > + put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts); > + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); > + resubmit->in_port = OFPP_CONTROLLER; > + resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE; > + > + struct ofputil_packet_out po = { > + .packet = dp_packet_data(&packet), > + .packet_len = dp_packet_size(&packet), > + .buffer_id = UINT32_MAX, > + .ofpacts = ofpacts.data, > + .ofpacts_len = ofpacts.size, > + }; > + > + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); > + enum ofp_version version = rconn_get_version(swconn); > + enum ofputil_protocol proto = > + ofputil_protocol_from_ofp_version(version); > + queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); > + dp_packet_uninit(&packet); > + ofpbuf_uninit(&ofpacts); > + > + entry->next_tx = cur_time + 5000; > +next: > + if (*bfd_time > entry->next_tx) { > + *bfd_time = entry->next_tx; I think there is some problem here. I applied all the patches of this series and deployed fake multinode. When I create a BFD entry like below, ovn-controller pinctrl thread on ovn-gw-1 fake chassis, just wakes up 100% of the time. --- ovn-nbctl create bfd logical_port=lr0-public dst_ip=172.16.0.50 --- In the ovn-controller logs I see ---- 2020-12-18T13:29:03.181Z|77940|poll_loop(ovn_pinctrl0)|DBG|wakeup due to 0-ms timeout at controller/pinctrl.c:6495 2020-12-18T13:29:03.181Z|77941|poll_loop(ovn_pinctrl0)|DBG|wakeup due to 0-ms timeout at controller/pinctrl.c:6495 2020-12-18T13:29:03.181Z|77942|poll_loop(ovn_pinctrl0)|DBG|wakeup due to 0-ms timeout at controller/pinctrl.c:6495 2020-12-18T13:29:03.181Z|77943|poll_loop(ovn_pinctrl0)|DBG|wakeup due to 0-ms timeout at controller/pinctrl.c:6495 ... .. ---- Thanks Numan > + } > + } > +} > + > +#define BFD_MONITOR_STALE_TIMEOUT 180000LL > +static void > +bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, > + struct ovsdb_idl_index *sbrec_port_binding_by_name, > + const struct sbrec_chassis *chassis, > + const struct sset *active_tunnels) > + OVS_REQUIRES(pinctrl_mutex) > +{ > + long long int cur_time = time_msec(); > + bool changed = false; > + > + const struct sbrec_bfd *bt; > + SBREC_BFD_TABLE_FOR_EACH (bt, bfd_table) { > + const struct sbrec_port_binding *pb > + = lport_lookup_by_name(sbrec_port_binding_by_name, > + bt->logical_port); > + if (!pb) { > + continue; > + } > + > + struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_port( > + bt->dst_ip, bt->src_port); > + if (!entry) { > + ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP); > + struct eth_addr ea = eth_addr_zero; > + int i; > + > + if (!ip_parse(bt->dst_ip, &ip_dst)) { > + continue; > + } > + > + const char *peer_s = smap_get(&pb->options, "peer"); > + if (!peer_s) { > + continue; > + } > + > + const struct sbrec_port_binding *peer > + = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s); > + if (!peer) { > + continue; > + } > + > + char *redirect_name = xasprintf("cr-%s", pb->logical_port); > + bool resident = lport_is_chassis_resident( > + sbrec_port_binding_by_name, chassis, active_tunnels, > + redirect_name); > + free(redirect_name); > + if (!resident && strcmp(pb->type, "l3gateway") && > + pb->chassis != chassis) { > + continue; > + } > + > + for (i = 0; i < pb->n_mac; i++) { > + struct lport_addresses laddrs; > + > + if (!extract_lsp_addresses(pb->mac[i], &laddrs)) { > + continue; > + } > + > + ea = laddrs.ea; > + if (laddrs.n_ipv4_addrs > 0) { > + ip_src = laddrs.ipv4_addrs[0].addr; > + destroy_lport_addresses(&laddrs); > + break; > + } > + destroy_lport_addresses(&laddrs); > + } > + > + if (eth_addr_is_zero(ea)) { > + continue; > + } > + > + entry = xzalloc(sizeof *entry); > + entry->src_mac = ea; > + entry->ip_src = ip_src; > + entry->ip_dst = ip_dst; > + entry->udp_src = bt->src_port; > + entry->disc = htonl(bt->disc); > + entry->next_tx = cur_time; > + entry->metadata = pb->datapath->tunnel_key; > + entry->port_key = pb->tunnel_key; > + > + uint32_t hash = hash_string(bt->dst_ip, 0); > + hmap_insert(&bfd_monitor_map, &entry->node, hash); > + changed = true; > + } > + entry->last_update = cur_time; > + } > + > + struct bfd_entry *entry, *next_entry; > + HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) { > + if (entry->last_update + BFD_MONITOR_STALE_TIMEOUT < cur_time) { > + hmap_remove(&bfd_monitor_map, &entry->node); > + free(entry); > + } > + } > + > + if (changed) { > + notify_pinctrl_handler(); > + } > +} > + > static uint16_t > get_random_src_port(void) > { > diff --git a/controller/pinctrl.h b/controller/pinctrl.h > index 4b101ec92..8555d983d 100644 > --- a/controller/pinctrl.h > +++ b/controller/pinctrl.h > @@ -31,6 +31,7 @@ struct sbrec_chassis; > struct sbrec_dns_table; > struct sbrec_controller_event_table; > struct sbrec_service_monitor_table; > +struct sbrec_bfd_table; > > void pinctrl_init(void); > void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > @@ -44,6 +45,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > const struct sbrec_dns_table *, > const struct sbrec_controller_event_table *, > const struct sbrec_service_monitor_table *, > + const struct sbrec_bfd_table *, > const struct ovsrec_bridge *, const struct sbrec_chassis *, > const struct hmap *local_datapaths, > const struct sset *active_tunnels); > diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h > index c84a0e7a9..d00982449 100644 > --- a/lib/ovn-l7.h > +++ b/lib/ovn-l7.h > @@ -26,6 +26,25 @@ > #include "hash.h" > #include "ovn/logical-fields.h" > > +#define BFD_PACKET_LEN 24 > +#define BFD_DEST_PORT 3784 > +#define BFD_VERSION 1 > +#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */ > +#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */ > + > +struct bfd_msg { > + uint8_t vers_diag; > + uint8_t flags; > + uint8_t mult; > + uint8_t length; > + ovs_be32 my_disc; > + ovs_be32 your_disc; > + ovs_be32 min_tx; > + ovs_be32 min_rx; > + ovs_be32 min_rx_echo; > +}; > +BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct bfd_msg)); > + > /* Generic options map which is used to store dhcpv4 opts and dhcpv6 opts. */ > struct gen_opts_map { > struct hmap_node hmap_node; > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > index b377dffa1..e8c7629e7 100644 > --- a/northd/ovn-northd.c > +++ b/northd/ovn-northd.c > @@ -11707,6 +11707,92 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, > } > } > > +struct bfd_entry { > + struct hmap_node hmap_node; > + > + const struct sbrec_bfd *sb_bt; > + bool found; > +}; > + > +static struct bfd_entry * > +bfd_port_lookup(struct hmap *bfd_map, const char *logical_port, > + const char *dst_ip) > +{ > + struct bfd_entry *bfd_e; > + uint32_t hash; > + > + hash = hash_string(dst_ip, 0); > + hash = hash_string(logical_port, hash); > + HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) { > + if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) && > + !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) { > + return bfd_e; > + } > + } > + return NULL; > +} > + > +static void > +build_bfd_table(struct northd_context *ctx) > +{ > + struct hmap sb_only = HMAP_INITIALIZER(&sb_only); > + struct bfd_entry *bfd_e, *next_bfd_e; > + const struct sbrec_bfd *sb_bt; > + uint32_t hash; > + > + SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) { > + bfd_e = xmalloc(sizeof *bfd_e); > + bfd_e->sb_bt = sb_bt; > + bfd_e->found = false; > + hash = hash_string(sb_bt->dst_ip, 0); > + hash = hash_string(sb_bt->logical_port, hash); > + hmap_insert(&sb_only, &bfd_e->hmap_node, hash); > + } > + > + const struct nbrec_bfd *nb_bt; > + NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) { > + bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip); > + if (!bfd_e) { > + sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn); > + sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port); > + sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip); > + sbrec_bfd_set_disc(sb_bt, 1 + random_uint32()); > + /* RFC 5881 section 4 > + * The source port MUST be in the range 49152 through 65535. > + * The same UDP source port number MUST be used for all BFD > + * Control packets associated with a particular session. > + * The source port number SHOULD be unique among all BFD > + * sessions on the system > + */ > + uint16_t udp_src = random_range(16383) + 49152; > + sbrec_bfd_set_src_port(sb_bt, udp_src); > + sbrec_bfd_set_status(sb_bt, "down"); > + sbrec_bfd_set_min_tx(sb_bt, nb_bt->min_tx); > + sbrec_bfd_set_min_rx(sb_bt, nb_bt->min_rx); > + sbrec_bfd_set_detect_mult(sb_bt, nb_bt->detect_mult); > + } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) { > + if (!strcmp(nb_bt->status, "admin_down") || > + !strcmp(bfd_e->sb_bt->status, "admin_down")) { > + sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status); > + } else { > + nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status); > + } > + } > + if (bfd_e) { > + bfd_e->found = true; > + } > + } > + > + HMAP_FOR_EACH_SAFE (bfd_e, next_bfd_e, hmap_node, &sb_only) { > + if (!bfd_e->found && bfd_e->sb_bt) { > + sbrec_bfd_delete(bfd_e->sb_bt); > + } > + hmap_remove(&sb_only, &bfd_e->hmap_node); > + free(bfd_e); > + } > + hmap_destroy(&sb_only); > +} > + > static void > sync_address_set(struct northd_context *ctx, const char *name, > const char **addrs, size_t n_addrs, > @@ -12504,6 +12590,7 @@ ovnnb_db_run(struct northd_context *ctx, > build_ip_mcast(ctx, datapaths); > build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups); > build_meter_groups(ctx, &meter_groups); > + build_bfd_table(ctx); > build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups, > &igmp_groups, &meter_groups, &lbs); > ovn_update_ipv6_prefix(ports); > @@ -13463,6 +13550,10 @@ main(int argc, char *argv[]) > add_column_noalert(ovnsb_idl_loop.idl, > &sbrec_load_balancer_col_external_ids); > > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port); > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip); > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status); > + > struct ovsdb_idl_index *sbrec_chassis_by_name > = chassis_index_create(ovnsb_idl_loop.idl); > > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema > index b77a2308c..a880482a4 100644 > --- a/ovn-nb.ovsschema > +++ b/ovn-nb.ovsschema > @@ -1,7 +1,7 @@ > { > "name": "OVN_Northbound", > - "version": "5.30.0", > - "cksum": "3273824429 27172", > + "version": "5.31.0", > + "cksum": "3663844734 28178", > "tables": { > "NB_Global": { > "columns": { > @@ -526,5 +526,25 @@ > "type": {"key": "string", "value": "string", > "min": 0, "max": "unlimited"}}}, > "indexes": [["name"]], > + "isRoot": true}, > + "BFD": { > + "columns": { > + "logical_port": {"type": "string"}, > + "dst_ip": {"type": "string"}, > + "min_tx": {"type": {"key": {"type": "integer"}}}, > + "min_rx": {"type": {"key": {"type": "integer"}}}, > + "detect_mult": {"type": {"key": {"type": "integer"}}}, > + "status": { > + "type": {"key": {"type": "string", > + "enum": ["set", ["down", "init", "up", > + "admin_down"]]}, > + "min": 0, "max": 1}}, > + "external_ids": { > + "type": {"key": "string", "value": "string", > + "min": 0, "max": "unlimited"}}, > + "options": { > + "type": {"key": "string", "value": "string", > + "min": 0, "max": "unlimited"}}}, > + "indexes": [["logical_port", "dst_ip"]], > "isRoot": true}} > } > diff --git a/ovn-nb.xml b/ovn-nb.xml > index 0cf043790..4a28d3f0d 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -3728,4 +3728,70 @@ > </column> > </group> > </table> > + > + <table name="BFD"> > + <p> > + Contains BFD parameter for ovn-controller bfd configuration > + </p> > + > + <group title="Configuration"> > + <column name="logical_port"> > + OVN logical port when BFD engine is running. > + </column> > + > + <column name="dst_ip"> > + BFD peer IP address. > + </column> > + > + <column name="min_tx"> > + This is the minimum interval, in milliseconds, that the local > + system would like to use when transmitting BFD Control packets, > + less any jitter applied. The value zero is reserved. > + </column> > + > + <column name="min_rx"> > + This is the minimum interval, in milliseconds, between received > + BFD Control packets that this system is capable of supporting, > + less any jitter applied by the sender. If this value is zero, > + the transmitting system does not want the remote system to send > + any periodic BFD Control packets. > + </column> > + > + <column name="detect_mult"> > + Detection time multiplier. The negotiated transmit interval, > + multiplied by this value, provides the Detection Time for the > + receiving system in Asynchronous mode. > + </column> > + > + <column name="options"> > + Reserved for future use. > + </column> > + > + <column name="external_ids"> > + See <em>External IDs</em> at the beginning of this document. > + </column> > + </group> > + > + <group title="Status Reporting"> > + <column name="status"> > + <p> > + BFD port logical states. Possible values are: > + <ul> > + <li> > + <code>admin_down</code> > + </li> > + <li> > + <code>down</code> > + </li> > + <li> > + <code>init</code> > + </li> > + <li> > + <code>up</code> > + </li> > + </ul> > + </p> > + </column> > + </group> > + </table> > </database> > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema > index 5228839b8..97db6de39 100644 > --- a/ovn-sb.ovsschema > +++ b/ovn-sb.ovsschema > @@ -1,7 +1,7 @@ > { > "name": "OVN_Southbound", > - "version": "20.12.0", > - "cksum": "3969471120 24441", > + "version": "20.13.0", > + "cksum": "3035725595 25676", > "tables": { > "SB_Global": { > "columns": { > @@ -484,6 +484,29 @@ > "external_ids": { > "type": {"key": "string", "value": "string", > "min": 0, "max": "unlimited"}}}, > + "isRoot": true}, > + "BFD": { > + "columns": { > + "src_port": {"type": {"key": {"type": "integer", > + "minInteger": 49152, > + "maxInteger": 65535}}}, > + "disc": {"type": {"key": {"type": "integer"}}}, > + "logical_port": {"type": "string"}, > + "dst_ip": {"type": "string"}, > + "min_tx": {"type": {"key": {"type": "integer"}}}, > + "min_rx": {"type": {"key": {"type": "integer"}}}, > + "detect_mult": {"type": {"key": {"type": "integer"}}}, > + "status": { > + "type": {"key": {"type": "string", > + "enum": ["set", ["down", "init", "up", > + "admin_down"]]}}}, > + "external_ids": { > + "type": {"key": "string", "value": "string", > + "min": 0, "max": "unlimited"}}, > + "options": { > + "type": {"key": "string", "value": "string", > + "min": 0, "max": "unlimited"}}}, > + "indexes": [["logical_port", "dst_ip", "src_port", "disc"]], > "isRoot": true} > } > } > diff --git a/ovn-sb.xml b/ovn-sb.xml > index c13994848..ccfe2e415 100644 > --- a/ovn-sb.xml > +++ b/ovn-sb.xml > @@ -4231,4 +4231,82 @@ tcp.flags = RST; > </column> > </group> > </table> > + > + <table name="BFD"> > + <p> > + Contains BFD parameter for ovn-controller bfd configuration > + </p> > + > + <group title="Configuration"> > + <column name="src_port"> > + udp source port used in bfd control packets. > + The source port MUST be in the range 49152 through 65535 > + (RFC5881 section 4). > + </column> > + > + <column name="disc"> > + A unique, nonzero discriminator value generated by the transmitting > + system, used to demultiplex multiple BFD sessions between the same pair > + of systems. > + </column> > + > + <column name="logical_port"> > + OVN logical port when BFD engine is running. > + </column> > + > + <column name="dst_ip"> > + BFD peer IP address. > + </column> > + > + <column name="min_tx"> > + This is the minimum interval, in milliseconds, that the local > + system would like to use when transmitting BFD Control packets, > + less any jitter applied. The value zero is reserved. > + </column> > + > + <column name="min_rx"> > + This is the minimum interval, in milliseconds, between received > + BFD Control packets that this system is capable of supporting, > + less any jitter applied by the sender. If this value is zero, > + the transmitting system does not want the remote system to send > + any periodic BFD Control packets. > + </column> > + > + <column name="detect_mult"> > + Detection time multiplier. The negotiated transmit interval, > + multiplied by this value, provides the Detection Time for the > + receiving system in Asynchronous mode. > + </column> > + > + <column name="options"> > + Reserved for future use. > + </column> > + > + <column name="external_ids"> > + See <em>External IDs</em> at the beginning of this document. > + </column> > + </group> > + > + <group title="Status Reporting"> > + <column name="status"> > + <p> > + BFD port logical states. Possible values are: > + <ul> > + <li> > + <code>admin_down</code> > + </li> > + <li> > + <code>down</code> > + </li> > + <li> > + <code>init</code> > + </li> > + <li> > + <code>up</code> > + </li> > + </ul> > + </p> > + </column> > + </group> > + </table> > </database> > -- > 2.29.2 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
On Fri, Dec 18, 2020 at 7:00 PM Numan Siddique <numans@ovn.org> wrote: > > On Wed, Dec 16, 2020 at 8:14 PM Lorenzo Bianconi > <lorenzo.bianconi@redhat.com> wrote: > > > > Introduce the capability to transmit BFD packets in ovn-controller. > > Introduce BFD tables in nb/sb dbs in order to configure BFD parameters > > (e.g. min_tx, min_rx, ..) for ovn-controller. > > > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> > > Hi Lorenzo, > > Please see below for a few comments. > > Thanks > Numan > > > --- > > controller/ovn-controller.c | 1 + > > controller/pinctrl.c | 288 +++++++++++++++++++++++++++++++++++- > > controller/pinctrl.h | 2 + > > lib/ovn-l7.h | 19 +++ > > northd/ovn-northd.c | 91 ++++++++++++ > > ovn-nb.ovsschema | 24 ++- > > ovn-nb.xml | 66 +++++++++ > > ovn-sb.ovsschema | 27 +++- > > ovn-sb.xml | 78 ++++++++++ > > 9 files changed, 591 insertions(+), 5 deletions(-) > > > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > index b5f0c1315..84b3f85ad 100644 > > --- a/controller/ovn-controller.c > > +++ b/controller/ovn-controller.c > > @@ -2864,6 +2864,7 @@ main(int argc, char *argv[]) > > ovnsb_idl_loop.idl), > > sbrec_service_monitor_table_get( > > ovnsb_idl_loop.idl), > > + sbrec_bfd_table_get(ovnsb_idl_loop.idl), > > br_int, chassis, > > &runtime_data->local_datapaths, > > &runtime_data->active_tunnels); > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > > index 7e3abf0a4..ae994b513 100644 > > --- a/controller/pinctrl.c > > +++ b/controller/pinctrl.c > > @@ -323,6 +323,18 @@ put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits, > > static void notify_pinctrl_main(void); > > static void notify_pinctrl_handler(void); > > > > +static bool bfd_monitor_should_inject(void); > > +static void bfd_monitor_wait(long long int timeout); > > +static void bfd_monitor_init(void); > > +static void bfd_monitor_destroy(void); > > +static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) > > + OVS_REQUIRES(pinctrl_mutex); > > +static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, > > + struct ovsdb_idl_index *sbrec_port_binding_by_name, > > + const struct sbrec_chassis *chassis, > > + const struct sset *active_tunnels) > > + OVS_REQUIRES(pinctrl_mutex); > > + > > COVERAGE_DEFINE(pinctrl_drop_put_mac_binding); > > COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map); > > COVERAGE_DEFINE(pinctrl_drop_controller_event); > > @@ -487,6 +499,7 @@ pinctrl_init(void) > > ip_mcast_snoop_init(); > > init_put_vport_bindings(); > > init_svc_monitors(); > > + bfd_monitor_init(); > > pinctrl.br_int_name = NULL; > > pinctrl_handler_seq = seq_create(); > > pinctrl_main_seq = seq_create(); > > @@ -3053,6 +3066,8 @@ pinctrl_handler(void *arg_) > > swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION); > > > > while (!latch_is_set(&pctrl->pinctrl_thread_exit)) { > > + long long int bfd_time = LLONG_MAX; > > + > > ovs_mutex_lock(&pinctrl_mutex); > > pinctrl_rconn_setup(swconn, pctrl->br_int_name); > > ip_mcast_snoop_run(); > > @@ -3085,6 +3100,7 @@ pinctrl_handler(void *arg_) > > send_ipv6_ras(swconn, &send_ipv6_ra_time); > > send_ipv6_prefixd(swconn, &send_prefixd_time); > > send_mac_binding_buffered_pkts(swconn); > > + bfd_monitor_send_msg(swconn, &bfd_time); > > ovs_mutex_unlock(&pinctrl_mutex); > > > > ip_mcast_querier_run(swconn, &send_mcast_query_time); > > @@ -3102,6 +3118,7 @@ pinctrl_handler(void *arg_) > > ip_mcast_querier_wait(send_mcast_query_time); > > svc_monitors_wait(svc_monitors_next_run_time); > > ipv6_prefixd_wait(send_prefixd_time); > > + bfd_monitor_wait(bfd_time); > > > > new_seq = seq_read(pinctrl_handler_seq); > > seq_wait(pinctrl_handler_seq, new_seq); > > @@ -3149,6 +3166,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > > const struct sbrec_dns_table *dns_table, > > const struct sbrec_controller_event_table *ce_table, > > const struct sbrec_service_monitor_table *svc_mon_table, > > + const struct sbrec_bfd_table *bfd_table, > > const struct ovsrec_bridge *br_int, > > const struct sbrec_chassis *chassis, > > const struct hmap *local_datapaths, > > @@ -3179,6 +3197,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > > local_datapaths); > > sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name, > > chassis); > > + bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis, > > + active_tunnels); > > ovs_mutex_unlock(&pinctrl_mutex); > > } > > > > @@ -3722,6 +3742,7 @@ pinctrl_destroy(void) > > destroy_dns_cache(); > > ip_mcast_snoop_destroy(); > > destroy_svc_monitors(); > > + bfd_monitor_destroy(); > > seq_destroy(pinctrl_main_seq); > > seq_destroy(pinctrl_handler_seq); > > } > > @@ -5525,7 +5546,8 @@ may_inject_pkts(void) > > !shash_is_empty(&send_garp_rarp_data) || > > ipv6_prefixd_should_inject() || > > !ovs_list_is_empty(&mcast_query_list) || > > - !ovs_list_is_empty(&buffered_mac_bindings)); > > + !ovs_list_is_empty(&buffered_mac_bindings) || > > + bfd_monitor_should_inject()); > > } > > > > static void > > @@ -6312,6 +6334,270 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, > > > > } > > > > +static struct hmap bfd_monitor_map; > > + > > +struct bfd_entry { > > + struct hmap_node node; > > + > > + /* L2 source address */ > > + struct eth_addr src_mac; > > + /* IPv4 source address */ > > + ovs_be32 ip_src; > > + /* IPv4 destination address */ > > + ovs_be32 ip_dst; > > + /* RFC 5881 section 4 > > + * The source port MUST be in the range 49152 through 65535. > > + * The same UDP source port number MUST be used for all BFD > > + * Control packets associated with a particular session. > > + * The source port number SHOULD be unique among all BFD > > + * sessions on the system > > + */ > > + uint16_t udp_src; > > + ovs_be32 disc; > > + > > + int64_t port_key; > > + int64_t metadata; > > + > > + long long int last_update; > > + long long int next_tx; > > +}; > > + > > +static void > > +bfd_monitor_init(void) > > +{ > > + hmap_init(&bfd_monitor_map); > > +} > > + > > +static void > > +bfd_monitor_destroy(void) > > +{ > > + hmap_destroy(&bfd_monitor_map); > > I think you also need to remove the hmap nodes and free the 'struct > bfd_entry' objects > before destroying the hmap. > > > > +} > > + > > +static struct bfd_entry * > > +pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port) > > +{ > > + struct bfd_entry *entry; > > + HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0), > > + &bfd_monitor_map) { > > + if (entry->udp_src == port) { > > + return entry; > > + } > > + } > > + return NULL; > > +} > > + > > +static bool > > +bfd_monitor_should_inject(void) > > +{ > > + long long int cur_time = time_msec(); > > + struct bfd_entry *entry; > > + > > + HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { > > + if (entry->next_tx < cur_time) { > > + return true; > > + } > > + } > > + return false; > > +} > > + > > +static void > > +bfd_monitor_wait(long long int timeout) > > +{ > > + if (!hmap_is_empty(&bfd_monitor_map)) { > > + poll_timer_wait_until(timeout); > > + } > > +} > > + > > +static void > > +bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet) > > +{ > > + struct udp_header *udp; > > + struct bfd_msg *msg; > > + > > + dp_packet_reserve(packet, 2); I don't understand why reserving 2 bytes at the head is required ? Can you please put some comments on why it is required ? Thanks Numan > > + struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth); > > + eth->eth_dst = eth_addr_broadcast; > > + eth->eth_src = entry->src_mac; > > + eth->eth_type = htons(ETH_TYPE_IP); > > + > > + struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip); > > + ip->ip_ihl_ver = IP_IHL_VER(5, 4); > > + ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg); > > + ip->ip_ttl = MAXTTL; > > + ip->ip_tos = IPTOS_PREC_INTERNETCONTROL; > > + ip->ip_proto = IPPROTO_UDP; > > + put_16aligned_be32(&ip->ip_src, entry->ip_src); > > + put_16aligned_be32(&ip->ip_dst, entry->ip_dst); > > + /* Checksum has already been zeroed by put_zeros call. */ > > + ip->ip_csum = csum(ip, sizeof *ip); > > + > > + udp = dp_packet_put_zeros(packet, sizeof *udp); > > + udp->udp_src = htons(entry->udp_src); > > + udp->udp_dst = htons(BFD_DEST_PORT); > > + udp->udp_len = htons(sizeof *udp + sizeof *msg); > > + > > + msg = dp_packet_put_uninit(packet, sizeof *msg); > > + msg->vers_diag = (BFD_VERSION << 5); > > + msg->length = BFD_PACKET_LEN; > > +} > > + > > +static void > > +bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) > > + OVS_REQUIRES(pinctrl_mutex) > > +{ > > + long long int cur_time = time_msec(); > > + struct bfd_entry *entry; > > + > > + HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { > > + if (cur_time < entry->next_tx) { > > + goto next; > > + } > > + > > + uint64_t packet_stub[256 / 8]; > > + struct dp_packet packet; > > + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); > > + bfd_monitor_put_bfd_msg(entry, &packet); > > + > > + uint64_t ofpacts_stub[4096 / 8]; > > + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); > > + > > + /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */ > > + uint32_t dp_key = entry->metadata; > > + uint32_t port_key = entry->port_key; > > + put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); > > + put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts); > > + put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts); > > + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); > > + resubmit->in_port = OFPP_CONTROLLER; > > + resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE; > > + > > + struct ofputil_packet_out po = { > > + .packet = dp_packet_data(&packet), > > + .packet_len = dp_packet_size(&packet), > > + .buffer_id = UINT32_MAX, > > + .ofpacts = ofpacts.data, > > + .ofpacts_len = ofpacts.size, > > + }; > > + > > + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); > > + enum ofp_version version = rconn_get_version(swconn); > > + enum ofputil_protocol proto = > > + ofputil_protocol_from_ofp_version(version); > > + queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); > > + dp_packet_uninit(&packet); > > + ofpbuf_uninit(&ofpacts); > > + > > + entry->next_tx = cur_time + 5000; > > +next: > > + if (*bfd_time > entry->next_tx) { > > + *bfd_time = entry->next_tx; > > I think there is some problem here. I applied all the patches of this > series and deployed > fake multinode. > > When I create a BFD entry like below, ovn-controller pinctrl thread on > ovn-gw-1 fake chassis, just wakes up > 100% of the time. > > --- > ovn-nbctl create bfd logical_port=lr0-public dst_ip=172.16.0.50 > --- > > In the ovn-controller logs I see > > ---- > 2020-12-18T13:29:03.181Z|77940|poll_loop(ovn_pinctrl0)|DBG|wakeup due > to 0-ms timeout at controller/pinctrl.c:6495 > 2020-12-18T13:29:03.181Z|77941|poll_loop(ovn_pinctrl0)|DBG|wakeup due > to 0-ms timeout at controller/pinctrl.c:6495 > 2020-12-18T13:29:03.181Z|77942|poll_loop(ovn_pinctrl0)|DBG|wakeup due > to 0-ms timeout at controller/pinctrl.c:6495 > 2020-12-18T13:29:03.181Z|77943|poll_loop(ovn_pinctrl0)|DBG|wakeup due > to 0-ms timeout at controller/pinctrl.c:6495 > ... > .. > ---- > > Thanks > Numan > > > > + } > > + } > > +} > > + > > +#define BFD_MONITOR_STALE_TIMEOUT 180000LL > > +static void > > +bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, > > + struct ovsdb_idl_index *sbrec_port_binding_by_name, > > + const struct sbrec_chassis *chassis, > > + const struct sset *active_tunnels) > > + OVS_REQUIRES(pinctrl_mutex) > > +{ > > + long long int cur_time = time_msec(); > > + bool changed = false; > > + > > + const struct sbrec_bfd *bt; > > + SBREC_BFD_TABLE_FOR_EACH (bt, bfd_table) { > > + const struct sbrec_port_binding *pb > > + = lport_lookup_by_name(sbrec_port_binding_by_name, > > + bt->logical_port); > > + if (!pb) { > > + continue; > > + } > > + > > + struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_port( > > + bt->dst_ip, bt->src_port); > > + if (!entry) { > > + ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP); > > + struct eth_addr ea = eth_addr_zero; > > + int i; > > + > > + if (!ip_parse(bt->dst_ip, &ip_dst)) { > > + continue; > > + } > > + > > + const char *peer_s = smap_get(&pb->options, "peer"); > > + if (!peer_s) { > > + continue; > > + } > > + > > + const struct sbrec_port_binding *peer > > + = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s); > > + if (!peer) { > > + continue; > > + } > > + > > + char *redirect_name = xasprintf("cr-%s", pb->logical_port); > > + bool resident = lport_is_chassis_resident( > > + sbrec_port_binding_by_name, chassis, active_tunnels, > > + redirect_name); > > + free(redirect_name); > > + if (!resident && strcmp(pb->type, "l3gateway") && > > + pb->chassis != chassis) { > > + continue; > > + } > > + > > + for (i = 0; i < pb->n_mac; i++) { > > + struct lport_addresses laddrs; > > + > > + if (!extract_lsp_addresses(pb->mac[i], &laddrs)) { > > + continue; > > + } > > + > > + ea = laddrs.ea; > > + if (laddrs.n_ipv4_addrs > 0) { > > + ip_src = laddrs.ipv4_addrs[0].addr; > > + destroy_lport_addresses(&laddrs); > > + break; > > + } > > + destroy_lport_addresses(&laddrs); > > + } > > + > > + if (eth_addr_is_zero(ea)) { > > + continue; > > + } > > + > > + entry = xzalloc(sizeof *entry); > > + entry->src_mac = ea; > > + entry->ip_src = ip_src; > > + entry->ip_dst = ip_dst; > > + entry->udp_src = bt->src_port; > > + entry->disc = htonl(bt->disc); > > + entry->next_tx = cur_time; > > + entry->metadata = pb->datapath->tunnel_key; > > + entry->port_key = pb->tunnel_key; > > + > > + uint32_t hash = hash_string(bt->dst_ip, 0); > > + hmap_insert(&bfd_monitor_map, &entry->node, hash); > > + changed = true; > > + } > > + entry->last_update = cur_time; > > + } > > + > > + struct bfd_entry *entry, *next_entry; > > + HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) { > > + if (entry->last_update + BFD_MONITOR_STALE_TIMEOUT < cur_time) { > > + hmap_remove(&bfd_monitor_map, &entry->node); > > + free(entry); > > + } > > + } > > + > > + if (changed) { > > + notify_pinctrl_handler(); > > + } > > +} > > + > > static uint16_t > > get_random_src_port(void) > > { > > diff --git a/controller/pinctrl.h b/controller/pinctrl.h > > index 4b101ec92..8555d983d 100644 > > --- a/controller/pinctrl.h > > +++ b/controller/pinctrl.h > > @@ -31,6 +31,7 @@ struct sbrec_chassis; > > struct sbrec_dns_table; > > struct sbrec_controller_event_table; > > struct sbrec_service_monitor_table; > > +struct sbrec_bfd_table; > > > > void pinctrl_init(void); > > void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > > @@ -44,6 +45,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > > const struct sbrec_dns_table *, > > const struct sbrec_controller_event_table *, > > const struct sbrec_service_monitor_table *, > > + const struct sbrec_bfd_table *, > > const struct ovsrec_bridge *, const struct sbrec_chassis *, > > const struct hmap *local_datapaths, > > const struct sset *active_tunnels); > > diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h > > index c84a0e7a9..d00982449 100644 > > --- a/lib/ovn-l7.h > > +++ b/lib/ovn-l7.h > > @@ -26,6 +26,25 @@ > > #include "hash.h" > > #include "ovn/logical-fields.h" > > > > +#define BFD_PACKET_LEN 24 > > +#define BFD_DEST_PORT 3784 > > +#define BFD_VERSION 1 > > +#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */ > > +#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */ > > + > > +struct bfd_msg { > > + uint8_t vers_diag; > > + uint8_t flags; > > + uint8_t mult; > > + uint8_t length; > > + ovs_be32 my_disc; > > + ovs_be32 your_disc; > > + ovs_be32 min_tx; > > + ovs_be32 min_rx; > > + ovs_be32 min_rx_echo; > > +}; > > +BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct bfd_msg)); > > + > > /* Generic options map which is used to store dhcpv4 opts and dhcpv6 opts. */ > > struct gen_opts_map { > > struct hmap_node hmap_node; > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > > index b377dffa1..e8c7629e7 100644 > > --- a/northd/ovn-northd.c > > +++ b/northd/ovn-northd.c > > @@ -11707,6 +11707,92 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, > > } > > } > > > > +struct bfd_entry { > > + struct hmap_node hmap_node; > > + > > + const struct sbrec_bfd *sb_bt; > > + bool found; > > +}; > > + > > +static struct bfd_entry * > > +bfd_port_lookup(struct hmap *bfd_map, const char *logical_port, > > + const char *dst_ip) > > +{ > > + struct bfd_entry *bfd_e; > > + uint32_t hash; > > + > > + hash = hash_string(dst_ip, 0); > > + hash = hash_string(logical_port, hash); > > + HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) { > > + if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) && > > + !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) { > > + return bfd_e; > > + } > > + } > > + return NULL; > > +} > > + > > +static void > > +build_bfd_table(struct northd_context *ctx) > > +{ > > + struct hmap sb_only = HMAP_INITIALIZER(&sb_only); > > + struct bfd_entry *bfd_e, *next_bfd_e; > > + const struct sbrec_bfd *sb_bt; > > + uint32_t hash; > > + > > + SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) { > > + bfd_e = xmalloc(sizeof *bfd_e); > > + bfd_e->sb_bt = sb_bt; > > + bfd_e->found = false; > > + hash = hash_string(sb_bt->dst_ip, 0); > > + hash = hash_string(sb_bt->logical_port, hash); > > + hmap_insert(&sb_only, &bfd_e->hmap_node, hash); > > + } > > + > > + const struct nbrec_bfd *nb_bt; > > + NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) { > > + bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip); > > + if (!bfd_e) { > > + sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn); > > + sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port); > > + sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip); > > + sbrec_bfd_set_disc(sb_bt, 1 + random_uint32()); > > + /* RFC 5881 section 4 > > + * The source port MUST be in the range 49152 through 65535. > > + * The same UDP source port number MUST be used for all BFD > > + * Control packets associated with a particular session. > > + * The source port number SHOULD be unique among all BFD > > + * sessions on the system > > + */ > > + uint16_t udp_src = random_range(16383) + 49152; > > + sbrec_bfd_set_src_port(sb_bt, udp_src); > > + sbrec_bfd_set_status(sb_bt, "down"); > > + sbrec_bfd_set_min_tx(sb_bt, nb_bt->min_tx); > > + sbrec_bfd_set_min_rx(sb_bt, nb_bt->min_rx); > > + sbrec_bfd_set_detect_mult(sb_bt, nb_bt->detect_mult); > > + } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) { > > + if (!strcmp(nb_bt->status, "admin_down") || > > + !strcmp(bfd_e->sb_bt->status, "admin_down")) { > > + sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status); > > + } else { > > + nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status); > > + } > > + } > > + if (bfd_e) { > > + bfd_e->found = true; > > + } > > + } > > + > > + HMAP_FOR_EACH_SAFE (bfd_e, next_bfd_e, hmap_node, &sb_only) { > > + if (!bfd_e->found && bfd_e->sb_bt) { > > + sbrec_bfd_delete(bfd_e->sb_bt); > > + } > > + hmap_remove(&sb_only, &bfd_e->hmap_node); > > + free(bfd_e); > > + } > > + hmap_destroy(&sb_only); > > +} > > + > > static void > > sync_address_set(struct northd_context *ctx, const char *name, > > const char **addrs, size_t n_addrs, > > @@ -12504,6 +12590,7 @@ ovnnb_db_run(struct northd_context *ctx, > > build_ip_mcast(ctx, datapaths); > > build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups); > > build_meter_groups(ctx, &meter_groups); > > + build_bfd_table(ctx); > > build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups, > > &igmp_groups, &meter_groups, &lbs); > > ovn_update_ipv6_prefix(ports); > > @@ -13463,6 +13550,10 @@ main(int argc, char *argv[]) > > add_column_noalert(ovnsb_idl_loop.idl, > > &sbrec_load_balancer_col_external_ids); > > > > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port); > > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip); > > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status); > > + > > struct ovsdb_idl_index *sbrec_chassis_by_name > > = chassis_index_create(ovnsb_idl_loop.idl); > > > > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema > > index b77a2308c..a880482a4 100644 > > --- a/ovn-nb.ovsschema > > +++ b/ovn-nb.ovsschema > > @@ -1,7 +1,7 @@ > > { > > "name": "OVN_Northbound", > > - "version": "5.30.0", > > - "cksum": "3273824429 27172", > > + "version": "5.31.0", > > + "cksum": "3663844734 28178", > > "tables": { > > "NB_Global": { > > "columns": { > > @@ -526,5 +526,25 @@ > > "type": {"key": "string", "value": "string", > > "min": 0, "max": "unlimited"}}}, > > "indexes": [["name"]], > > + "isRoot": true}, > > + "BFD": { > > + "columns": { > > + "logical_port": {"type": "string"}, > > + "dst_ip": {"type": "string"}, > > + "min_tx": {"type": {"key": {"type": "integer"}}}, > > + "min_rx": {"type": {"key": {"type": "integer"}}}, > > + "detect_mult": {"type": {"key": {"type": "integer"}}}, > > + "status": { > > + "type": {"key": {"type": "string", > > + "enum": ["set", ["down", "init", "up", > > + "admin_down"]]}, > > + "min": 0, "max": 1}}, > > + "external_ids": { > > + "type": {"key": "string", "value": "string", > > + "min": 0, "max": "unlimited"}}, > > + "options": { > > + "type": {"key": "string", "value": "string", > > + "min": 0, "max": "unlimited"}}}, > > + "indexes": [["logical_port", "dst_ip"]], > > "isRoot": true}} > > } > > diff --git a/ovn-nb.xml b/ovn-nb.xml > > index 0cf043790..4a28d3f0d 100644 > > --- a/ovn-nb.xml > > +++ b/ovn-nb.xml > > @@ -3728,4 +3728,70 @@ > > </column> > > </group> > > </table> > > + > > + <table name="BFD"> > > + <p> > > + Contains BFD parameter for ovn-controller bfd configuration > > + </p> > > + > > + <group title="Configuration"> > > + <column name="logical_port"> > > + OVN logical port when BFD engine is running. > > + </column> > > + > > + <column name="dst_ip"> > > + BFD peer IP address. > > + </column> > > + > > + <column name="min_tx"> > > + This is the minimum interval, in milliseconds, that the local > > + system would like to use when transmitting BFD Control packets, > > + less any jitter applied. The value zero is reserved. > > + </column> > > + > > + <column name="min_rx"> > > + This is the minimum interval, in milliseconds, between received > > + BFD Control packets that this system is capable of supporting, > > + less any jitter applied by the sender. If this value is zero, > > + the transmitting system does not want the remote system to send > > + any periodic BFD Control packets. > > + </column> > > + > > + <column name="detect_mult"> > > + Detection time multiplier. The negotiated transmit interval, > > + multiplied by this value, provides the Detection Time for the > > + receiving system in Asynchronous mode. > > + </column> > > + > > + <column name="options"> > > + Reserved for future use. > > + </column> > > + > > + <column name="external_ids"> > > + See <em>External IDs</em> at the beginning of this document. > > + </column> > > + </group> > > + > > + <group title="Status Reporting"> > > + <column name="status"> > > + <p> > > + BFD port logical states. Possible values are: > > + <ul> > > + <li> > > + <code>admin_down</code> > > + </li> > > + <li> > > + <code>down</code> > > + </li> > > + <li> > > + <code>init</code> > > + </li> > > + <li> > > + <code>up</code> > > + </li> > > + </ul> > > + </p> > > + </column> > > + </group> > > + </table> > > </database> > > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema > > index 5228839b8..97db6de39 100644 > > --- a/ovn-sb.ovsschema > > +++ b/ovn-sb.ovsschema > > @@ -1,7 +1,7 @@ > > { > > "name": "OVN_Southbound", > > - "version": "20.12.0", > > - "cksum": "3969471120 24441", > > + "version": "20.13.0", > > + "cksum": "3035725595 25676", > > "tables": { > > "SB_Global": { > > "columns": { > > @@ -484,6 +484,29 @@ > > "external_ids": { > > "type": {"key": "string", "value": "string", > > "min": 0, "max": "unlimited"}}}, > > + "isRoot": true}, > > + "BFD": { > > + "columns": { > > + "src_port": {"type": {"key": {"type": "integer", > > + "minInteger": 49152, > > + "maxInteger": 65535}}}, > > + "disc": {"type": {"key": {"type": "integer"}}}, > > + "logical_port": {"type": "string"}, > > + "dst_ip": {"type": "string"}, > > + "min_tx": {"type": {"key": {"type": "integer"}}}, > > + "min_rx": {"type": {"key": {"type": "integer"}}}, > > + "detect_mult": {"type": {"key": {"type": "integer"}}}, > > + "status": { > > + "type": {"key": {"type": "string", > > + "enum": ["set", ["down", "init", "up", > > + "admin_down"]]}}}, > > + "external_ids": { > > + "type": {"key": "string", "value": "string", > > + "min": 0, "max": "unlimited"}}, > > + "options": { > > + "type": {"key": "string", "value": "string", > > + "min": 0, "max": "unlimited"}}}, > > + "indexes": [["logical_port", "dst_ip", "src_port", "disc"]], > > "isRoot": true} > > } > > } > > diff --git a/ovn-sb.xml b/ovn-sb.xml > > index c13994848..ccfe2e415 100644 > > --- a/ovn-sb.xml > > +++ b/ovn-sb.xml > > @@ -4231,4 +4231,82 @@ tcp.flags = RST; > > </column> > > </group> > > </table> > > + > > + <table name="BFD"> > > + <p> > > + Contains BFD parameter for ovn-controller bfd configuration > > + </p> > > + > > + <group title="Configuration"> > > + <column name="src_port"> > > + udp source port used in bfd control packets. > > + The source port MUST be in the range 49152 through 65535 > > + (RFC5881 section 4). > > + </column> > > + > > + <column name="disc"> > > + A unique, nonzero discriminator value generated by the transmitting > > + system, used to demultiplex multiple BFD sessions between the same pair > > + of systems. > > + </column> > > + > > + <column name="logical_port"> > > + OVN logical port when BFD engine is running. > > + </column> > > + > > + <column name="dst_ip"> > > + BFD peer IP address. > > + </column> > > + > > + <column name="min_tx"> > > + This is the minimum interval, in milliseconds, that the local > > + system would like to use when transmitting BFD Control packets, > > + less any jitter applied. The value zero is reserved. > > + </column> > > + > > + <column name="min_rx"> > > + This is the minimum interval, in milliseconds, between received > > + BFD Control packets that this system is capable of supporting, > > + less any jitter applied by the sender. If this value is zero, > > + the transmitting system does not want the remote system to send > > + any periodic BFD Control packets. > > + </column> > > + > > + <column name="detect_mult"> > > + Detection time multiplier. The negotiated transmit interval, > > + multiplied by this value, provides the Detection Time for the > > + receiving system in Asynchronous mode. > > + </column> > > + > > + <column name="options"> > > + Reserved for future use. > > + </column> > > + > > + <column name="external_ids"> > > + See <em>External IDs</em> at the beginning of this document. > > + </column> > > + </group> > > + > > + <group title="Status Reporting"> > > + <column name="status"> > > + <p> > > + BFD port logical states. Possible values are: > > + <ul> > > + <li> > > + <code>admin_down</code> > > + </li> > > + <li> > > + <code>down</code> > > + </li> > > + <li> > > + <code>init</code> > > + </li> > > + <li> > > + <code>up</code> > > + </li> > > + </ul> > > + </p> > > + </column> > > + </group> > > + </table> > > </database> > > -- > > 2.29.2 > > > > _______________________________________________ > > dev mailing list > > dev@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > >
On Dec 18, Numan Siddique wrote: > On Fri, Dec 18, 2020 at 7:00 PM Numan Siddique <numans@ovn.org> wrote: > > > > On Wed, Dec 16, 2020 at 8:14 PM Lorenzo Bianconi > > <lorenzo.bianconi@redhat.com> wrote: > > > > > > Introduce the capability to transmit BFD packets in ovn-controller. > > > Introduce BFD tables in nb/sb dbs in order to configure BFD parameters > > > (e.g. min_tx, min_rx, ..) for ovn-controller. > > > > > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> > > > > Hi Lorenzo, > > > > Please see below for a few comments. > > > > Thanks > > Numan > > > > > --- > > > controller/ovn-controller.c | 1 + > > > controller/pinctrl.c | 288 +++++++++++++++++++++++++++++++++++- > > > controller/pinctrl.h | 2 + > > > lib/ovn-l7.h | 19 +++ > > > northd/ovn-northd.c | 91 ++++++++++++ > > > ovn-nb.ovsschema | 24 ++- > > > ovn-nb.xml | 66 +++++++++ > > > ovn-sb.ovsschema | 27 +++- > > > ovn-sb.xml | 78 ++++++++++ > > > 9 files changed, 591 insertions(+), 5 deletions(-) > > > > > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > > index b5f0c1315..84b3f85ad 100644 > > > --- a/controller/ovn-controller.c > > > +++ b/controller/ovn-controller.c > > > @@ -2864,6 +2864,7 @@ main(int argc, char *argv[]) > > > ovnsb_idl_loop.idl), > > > sbrec_service_monitor_table_get( > > > ovnsb_idl_loop.idl), > > > + sbrec_bfd_table_get(ovnsb_idl_loop.idl), > > > br_int, chassis, > > > &runtime_data->local_datapaths, > > > &runtime_data->active_tunnels); > > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > > > index 7e3abf0a4..ae994b513 100644 > > > --- a/controller/pinctrl.c > > > +++ b/controller/pinctrl.c > > > @@ -323,6 +323,18 @@ put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits, > > > static void notify_pinctrl_main(void); > > > static void notify_pinctrl_handler(void); > > > > > > +static bool bfd_monitor_should_inject(void); > > > +static void bfd_monitor_wait(long long int timeout); > > > +static void bfd_monitor_init(void); > > > +static void bfd_monitor_destroy(void); > > > +static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) > > > + OVS_REQUIRES(pinctrl_mutex); > > > +static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, > > > + struct ovsdb_idl_index *sbrec_port_binding_by_name, > > > + const struct sbrec_chassis *chassis, > > > + const struct sset *active_tunnels) > > > + OVS_REQUIRES(pinctrl_mutex); > > > + > > > COVERAGE_DEFINE(pinctrl_drop_put_mac_binding); > > > COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map); > > > COVERAGE_DEFINE(pinctrl_drop_controller_event); > > > @@ -487,6 +499,7 @@ pinctrl_init(void) > > > ip_mcast_snoop_init(); > > > init_put_vport_bindings(); > > > init_svc_monitors(); > > > + bfd_monitor_init(); > > > pinctrl.br_int_name = NULL; > > > pinctrl_handler_seq = seq_create(); > > > pinctrl_main_seq = seq_create(); > > > @@ -3053,6 +3066,8 @@ pinctrl_handler(void *arg_) > > > swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION); > > > > > > while (!latch_is_set(&pctrl->pinctrl_thread_exit)) { > > > + long long int bfd_time = LLONG_MAX; > > > + > > > ovs_mutex_lock(&pinctrl_mutex); > > > pinctrl_rconn_setup(swconn, pctrl->br_int_name); > > > ip_mcast_snoop_run(); > > > @@ -3085,6 +3100,7 @@ pinctrl_handler(void *arg_) > > > send_ipv6_ras(swconn, &send_ipv6_ra_time); > > > send_ipv6_prefixd(swconn, &send_prefixd_time); > > > send_mac_binding_buffered_pkts(swconn); > > > + bfd_monitor_send_msg(swconn, &bfd_time); > > > ovs_mutex_unlock(&pinctrl_mutex); > > > > > > ip_mcast_querier_run(swconn, &send_mcast_query_time); > > > @@ -3102,6 +3118,7 @@ pinctrl_handler(void *arg_) > > > ip_mcast_querier_wait(send_mcast_query_time); > > > svc_monitors_wait(svc_monitors_next_run_time); > > > ipv6_prefixd_wait(send_prefixd_time); > > > + bfd_monitor_wait(bfd_time); > > > > > > new_seq = seq_read(pinctrl_handler_seq); > > > seq_wait(pinctrl_handler_seq, new_seq); > > > @@ -3149,6 +3166,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > > > const struct sbrec_dns_table *dns_table, > > > const struct sbrec_controller_event_table *ce_table, > > > const struct sbrec_service_monitor_table *svc_mon_table, > > > + const struct sbrec_bfd_table *bfd_table, > > > const struct ovsrec_bridge *br_int, > > > const struct sbrec_chassis *chassis, > > > const struct hmap *local_datapaths, > > > @@ -3179,6 +3197,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > > > local_datapaths); > > > sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name, > > > chassis); > > > + bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis, > > > + active_tunnels); > > > ovs_mutex_unlock(&pinctrl_mutex); > > > } > > > > > > @@ -3722,6 +3742,7 @@ pinctrl_destroy(void) > > > destroy_dns_cache(); > > > ip_mcast_snoop_destroy(); > > > destroy_svc_monitors(); > > > + bfd_monitor_destroy(); > > > seq_destroy(pinctrl_main_seq); > > > seq_destroy(pinctrl_handler_seq); > > > } > > > @@ -5525,7 +5546,8 @@ may_inject_pkts(void) > > > !shash_is_empty(&send_garp_rarp_data) || > > > ipv6_prefixd_should_inject() || > > > !ovs_list_is_empty(&mcast_query_list) || > > > - !ovs_list_is_empty(&buffered_mac_bindings)); > > > + !ovs_list_is_empty(&buffered_mac_bindings) || > > > + bfd_monitor_should_inject()); > > > } > > > > > > static void > > > @@ -6312,6 +6334,270 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, > > > > > > } > > > > > > +static struct hmap bfd_monitor_map; > > > + > > > +struct bfd_entry { > > > + struct hmap_node node; > > > + > > > + /* L2 source address */ > > > + struct eth_addr src_mac; > > > + /* IPv4 source address */ > > > + ovs_be32 ip_src; > > > + /* IPv4 destination address */ > > > + ovs_be32 ip_dst; > > > + /* RFC 5881 section 4 > > > + * The source port MUST be in the range 49152 through 65535. > > > + * The same UDP source port number MUST be used for all BFD > > > + * Control packets associated with a particular session. > > > + * The source port number SHOULD be unique among all BFD > > > + * sessions on the system > > > + */ > > > + uint16_t udp_src; > > > + ovs_be32 disc; > > > + > > > + int64_t port_key; > > > + int64_t metadata; > > > + > > > + long long int last_update; > > > + long long int next_tx; > > > +}; > > > + > > > +static void > > > +bfd_monitor_init(void) > > > +{ > > > + hmap_init(&bfd_monitor_map); > > > +} > > > + > > > +static void > > > +bfd_monitor_destroy(void) > > > +{ > > > + hmap_destroy(&bfd_monitor_map); > > > > I think you also need to remove the hmap nodes and free the 'struct > > bfd_entry' objects > > before destroying the hmap. > > > > > > > +} > > > + > > > +static struct bfd_entry * > > > +pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port) > > > +{ > > > + struct bfd_entry *entry; > > > + HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0), > > > + &bfd_monitor_map) { > > > + if (entry->udp_src == port) { > > > + return entry; > > > + } > > > + } > > > + return NULL; > > > +} > > > + > > > +static bool > > > +bfd_monitor_should_inject(void) > > > +{ > > > + long long int cur_time = time_msec(); > > > + struct bfd_entry *entry; > > > + > > > + HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { > > > + if (entry->next_tx < cur_time) { > > > + return true; > > > + } > > > + } > > > + return false; > > > +} > > > + > > > +static void > > > +bfd_monitor_wait(long long int timeout) > > > +{ > > > + if (!hmap_is_empty(&bfd_monitor_map)) { > > > + poll_timer_wait_until(timeout); > > > + } > > > +} > > > + > > > +static void > > > +bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet) > > > +{ > > > + struct udp_header *udp; > > > + struct bfd_msg *msg; > > > + > > > + dp_packet_reserve(packet, 2); > > I don't understand why reserving 2 bytes at the head is required ? > > Can you please put some comments on why it is required ? ack, will do..it is to align the IP header to 4byte-boundary. Regards, Lorenzo > > Thanks > Numan > > > > + struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth); > > > + eth->eth_dst = eth_addr_broadcast; > > > + eth->eth_src = entry->src_mac; > > > + eth->eth_type = htons(ETH_TYPE_IP); > > > + > > > + struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip); > > > + ip->ip_ihl_ver = IP_IHL_VER(5, 4); > > > + ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg); > > > + ip->ip_ttl = MAXTTL; > > > + ip->ip_tos = IPTOS_PREC_INTERNETCONTROL; > > > + ip->ip_proto = IPPROTO_UDP; > > > + put_16aligned_be32(&ip->ip_src, entry->ip_src); > > > + put_16aligned_be32(&ip->ip_dst, entry->ip_dst); > > > + /* Checksum has already been zeroed by put_zeros call. */ > > > + ip->ip_csum = csum(ip, sizeof *ip); > > > + > > > + udp = dp_packet_put_zeros(packet, sizeof *udp); > > > + udp->udp_src = htons(entry->udp_src); > > > + udp->udp_dst = htons(BFD_DEST_PORT); > > > + udp->udp_len = htons(sizeof *udp + sizeof *msg); > > > + > > > + msg = dp_packet_put_uninit(packet, sizeof *msg); > > > + msg->vers_diag = (BFD_VERSION << 5); > > > + msg->length = BFD_PACKET_LEN; > > > +} > > > + > > > +static void > > > +bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) > > > + OVS_REQUIRES(pinctrl_mutex) > > > +{ > > > + long long int cur_time = time_msec(); > > > + struct bfd_entry *entry; > > > + > > > + HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { > > > + if (cur_time < entry->next_tx) { > > > + goto next; > > > + } > > > + > > > + uint64_t packet_stub[256 / 8]; > > > + struct dp_packet packet; > > > + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); > > > + bfd_monitor_put_bfd_msg(entry, &packet); > > > + > > > + uint64_t ofpacts_stub[4096 / 8]; > > > + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); > > > + > > > + /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */ > > > + uint32_t dp_key = entry->metadata; > > > + uint32_t port_key = entry->port_key; > > > + put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); > > > + put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts); > > > + put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts); > > > + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); > > > + resubmit->in_port = OFPP_CONTROLLER; > > > + resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE; > > > + > > > + struct ofputil_packet_out po = { > > > + .packet = dp_packet_data(&packet), > > > + .packet_len = dp_packet_size(&packet), > > > + .buffer_id = UINT32_MAX, > > > + .ofpacts = ofpacts.data, > > > + .ofpacts_len = ofpacts.size, > > > + }; > > > + > > > + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); > > > + enum ofp_version version = rconn_get_version(swconn); > > > + enum ofputil_protocol proto = > > > + ofputil_protocol_from_ofp_version(version); > > > + queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); > > > + dp_packet_uninit(&packet); > > > + ofpbuf_uninit(&ofpacts); > > > + > > > + entry->next_tx = cur_time + 5000; > > > +next: > > > + if (*bfd_time > entry->next_tx) { > > > + *bfd_time = entry->next_tx; > > > > I think there is some problem here. I applied all the patches of this > > series and deployed > > fake multinode. > > > > When I create a BFD entry like below, ovn-controller pinctrl thread on > > ovn-gw-1 fake chassis, just wakes up > > 100% of the time. > > > > --- > > ovn-nbctl create bfd logical_port=lr0-public dst_ip=172.16.0.50 > > --- > > > > In the ovn-controller logs I see > > > > ---- > > 2020-12-18T13:29:03.181Z|77940|poll_loop(ovn_pinctrl0)|DBG|wakeup due > > to 0-ms timeout at controller/pinctrl.c:6495 > > 2020-12-18T13:29:03.181Z|77941|poll_loop(ovn_pinctrl0)|DBG|wakeup due > > to 0-ms timeout at controller/pinctrl.c:6495 > > 2020-12-18T13:29:03.181Z|77942|poll_loop(ovn_pinctrl0)|DBG|wakeup due > > to 0-ms timeout at controller/pinctrl.c:6495 > > 2020-12-18T13:29:03.181Z|77943|poll_loop(ovn_pinctrl0)|DBG|wakeup due > > to 0-ms timeout at controller/pinctrl.c:6495 > > ... > > .. > > ---- > > > > Thanks > > Numan > > > > > > > + } > > > + } > > > +} > > > + > > > +#define BFD_MONITOR_STALE_TIMEOUT 180000LL > > > +static void > > > +bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, > > > + struct ovsdb_idl_index *sbrec_port_binding_by_name, > > > + const struct sbrec_chassis *chassis, > > > + const struct sset *active_tunnels) > > > + OVS_REQUIRES(pinctrl_mutex) > > > +{ > > > + long long int cur_time = time_msec(); > > > + bool changed = false; > > > + > > > + const struct sbrec_bfd *bt; > > > + SBREC_BFD_TABLE_FOR_EACH (bt, bfd_table) { > > > + const struct sbrec_port_binding *pb > > > + = lport_lookup_by_name(sbrec_port_binding_by_name, > > > + bt->logical_port); > > > + if (!pb) { > > > + continue; > > > + } > > > + > > > + struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_port( > > > + bt->dst_ip, bt->src_port); > > > + if (!entry) { > > > + ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP); > > > + struct eth_addr ea = eth_addr_zero; > > > + int i; > > > + > > > + if (!ip_parse(bt->dst_ip, &ip_dst)) { > > > + continue; > > > + } > > > + > > > + const char *peer_s = smap_get(&pb->options, "peer"); > > > + if (!peer_s) { > > > + continue; > > > + } > > > + > > > + const struct sbrec_port_binding *peer > > > + = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s); > > > + if (!peer) { > > > + continue; > > > + } > > > + > > > + char *redirect_name = xasprintf("cr-%s", pb->logical_port); > > > + bool resident = lport_is_chassis_resident( > > > + sbrec_port_binding_by_name, chassis, active_tunnels, > > > + redirect_name); > > > + free(redirect_name); > > > + if (!resident && strcmp(pb->type, "l3gateway") && > > > + pb->chassis != chassis) { > > > + continue; > > > + } > > > + > > > + for (i = 0; i < pb->n_mac; i++) { > > > + struct lport_addresses laddrs; > > > + > > > + if (!extract_lsp_addresses(pb->mac[i], &laddrs)) { > > > + continue; > > > + } > > > + > > > + ea = laddrs.ea; > > > + if (laddrs.n_ipv4_addrs > 0) { > > > + ip_src = laddrs.ipv4_addrs[0].addr; > > > + destroy_lport_addresses(&laddrs); > > > + break; > > > + } > > > + destroy_lport_addresses(&laddrs); > > > + } > > > + > > > + if (eth_addr_is_zero(ea)) { > > > + continue; > > > + } > > > + > > > + entry = xzalloc(sizeof *entry); > > > + entry->src_mac = ea; > > > + entry->ip_src = ip_src; > > > + entry->ip_dst = ip_dst; > > > + entry->udp_src = bt->src_port; > > > + entry->disc = htonl(bt->disc); > > > + entry->next_tx = cur_time; > > > + entry->metadata = pb->datapath->tunnel_key; > > > + entry->port_key = pb->tunnel_key; > > > + > > > + uint32_t hash = hash_string(bt->dst_ip, 0); > > > + hmap_insert(&bfd_monitor_map, &entry->node, hash); > > > + changed = true; > > > + } > > > + entry->last_update = cur_time; > > > + } > > > + > > > + struct bfd_entry *entry, *next_entry; > > > + HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) { > > > + if (entry->last_update + BFD_MONITOR_STALE_TIMEOUT < cur_time) { > > > + hmap_remove(&bfd_monitor_map, &entry->node); > > > + free(entry); > > > + } > > > + } > > > + > > > + if (changed) { > > > + notify_pinctrl_handler(); > > > + } > > > +} > > > + > > > static uint16_t > > > get_random_src_port(void) > > > { > > > diff --git a/controller/pinctrl.h b/controller/pinctrl.h > > > index 4b101ec92..8555d983d 100644 > > > --- a/controller/pinctrl.h > > > +++ b/controller/pinctrl.h > > > @@ -31,6 +31,7 @@ struct sbrec_chassis; > > > struct sbrec_dns_table; > > > struct sbrec_controller_event_table; > > > struct sbrec_service_monitor_table; > > > +struct sbrec_bfd_table; > > > > > > void pinctrl_init(void); > > > void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > > > @@ -44,6 +45,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > > > const struct sbrec_dns_table *, > > > const struct sbrec_controller_event_table *, > > > const struct sbrec_service_monitor_table *, > > > + const struct sbrec_bfd_table *, > > > const struct ovsrec_bridge *, const struct sbrec_chassis *, > > > const struct hmap *local_datapaths, > > > const struct sset *active_tunnels); > > > diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h > > > index c84a0e7a9..d00982449 100644 > > > --- a/lib/ovn-l7.h > > > +++ b/lib/ovn-l7.h > > > @@ -26,6 +26,25 @@ > > > #include "hash.h" > > > #include "ovn/logical-fields.h" > > > > > > +#define BFD_PACKET_LEN 24 > > > +#define BFD_DEST_PORT 3784 > > > +#define BFD_VERSION 1 > > > +#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */ > > > +#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */ > > > + > > > +struct bfd_msg { > > > + uint8_t vers_diag; > > > + uint8_t flags; > > > + uint8_t mult; > > > + uint8_t length; > > > + ovs_be32 my_disc; > > > + ovs_be32 your_disc; > > > + ovs_be32 min_tx; > > > + ovs_be32 min_rx; > > > + ovs_be32 min_rx_echo; > > > +}; > > > +BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct bfd_msg)); > > > + > > > /* Generic options map which is used to store dhcpv4 opts and dhcpv6 opts. */ > > > struct gen_opts_map { > > > struct hmap_node hmap_node; > > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > > > index b377dffa1..e8c7629e7 100644 > > > --- a/northd/ovn-northd.c > > > +++ b/northd/ovn-northd.c > > > @@ -11707,6 +11707,92 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, > > > } > > > } > > > > > > +struct bfd_entry { > > > + struct hmap_node hmap_node; > > > + > > > + const struct sbrec_bfd *sb_bt; > > > + bool found; > > > +}; > > > + > > > +static struct bfd_entry * > > > +bfd_port_lookup(struct hmap *bfd_map, const char *logical_port, > > > + const char *dst_ip) > > > +{ > > > + struct bfd_entry *bfd_e; > > > + uint32_t hash; > > > + > > > + hash = hash_string(dst_ip, 0); > > > + hash = hash_string(logical_port, hash); > > > + HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) { > > > + if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) && > > > + !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) { > > > + return bfd_e; > > > + } > > > + } > > > + return NULL; > > > +} > > > + > > > +static void > > > +build_bfd_table(struct northd_context *ctx) > > > +{ > > > + struct hmap sb_only = HMAP_INITIALIZER(&sb_only); > > > + struct bfd_entry *bfd_e, *next_bfd_e; > > > + const struct sbrec_bfd *sb_bt; > > > + uint32_t hash; > > > + > > > + SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) { > > > + bfd_e = xmalloc(sizeof *bfd_e); > > > + bfd_e->sb_bt = sb_bt; > > > + bfd_e->found = false; > > > + hash = hash_string(sb_bt->dst_ip, 0); > > > + hash = hash_string(sb_bt->logical_port, hash); > > > + hmap_insert(&sb_only, &bfd_e->hmap_node, hash); > > > + } > > > + > > > + const struct nbrec_bfd *nb_bt; > > > + NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) { > > > + bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip); > > > + if (!bfd_e) { > > > + sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn); > > > + sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port); > > > + sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip); > > > + sbrec_bfd_set_disc(sb_bt, 1 + random_uint32()); > > > + /* RFC 5881 section 4 > > > + * The source port MUST be in the range 49152 through 65535. > > > + * The same UDP source port number MUST be used for all BFD > > > + * Control packets associated with a particular session. > > > + * The source port number SHOULD be unique among all BFD > > > + * sessions on the system > > > + */ > > > + uint16_t udp_src = random_range(16383) + 49152; > > > + sbrec_bfd_set_src_port(sb_bt, udp_src); > > > + sbrec_bfd_set_status(sb_bt, "down"); > > > + sbrec_bfd_set_min_tx(sb_bt, nb_bt->min_tx); > > > + sbrec_bfd_set_min_rx(sb_bt, nb_bt->min_rx); > > > + sbrec_bfd_set_detect_mult(sb_bt, nb_bt->detect_mult); > > > + } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) { > > > + if (!strcmp(nb_bt->status, "admin_down") || > > > + !strcmp(bfd_e->sb_bt->status, "admin_down")) { > > > + sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status); > > > + } else { > > > + nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status); > > > + } > > > + } > > > + if (bfd_e) { > > > + bfd_e->found = true; > > > + } > > > + } > > > + > > > + HMAP_FOR_EACH_SAFE (bfd_e, next_bfd_e, hmap_node, &sb_only) { > > > + if (!bfd_e->found && bfd_e->sb_bt) { > > > + sbrec_bfd_delete(bfd_e->sb_bt); > > > + } > > > + hmap_remove(&sb_only, &bfd_e->hmap_node); > > > + free(bfd_e); > > > + } > > > + hmap_destroy(&sb_only); > > > +} > > > + > > > static void > > > sync_address_set(struct northd_context *ctx, const char *name, > > > const char **addrs, size_t n_addrs, > > > @@ -12504,6 +12590,7 @@ ovnnb_db_run(struct northd_context *ctx, > > > build_ip_mcast(ctx, datapaths); > > > build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups); > > > build_meter_groups(ctx, &meter_groups); > > > + build_bfd_table(ctx); > > > build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups, > > > &igmp_groups, &meter_groups, &lbs); > > > ovn_update_ipv6_prefix(ports); > > > @@ -13463,6 +13550,10 @@ main(int argc, char *argv[]) > > > add_column_noalert(ovnsb_idl_loop.idl, > > > &sbrec_load_balancer_col_external_ids); > > > > > > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port); > > > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip); > > > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status); > > > + > > > struct ovsdb_idl_index *sbrec_chassis_by_name > > > = chassis_index_create(ovnsb_idl_loop.idl); > > > > > > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema > > > index b77a2308c..a880482a4 100644 > > > --- a/ovn-nb.ovsschema > > > +++ b/ovn-nb.ovsschema > > > @@ -1,7 +1,7 @@ > > > { > > > "name": "OVN_Northbound", > > > - "version": "5.30.0", > > > - "cksum": "3273824429 27172", > > > + "version": "5.31.0", > > > + "cksum": "3663844734 28178", > > > "tables": { > > > "NB_Global": { > > > "columns": { > > > @@ -526,5 +526,25 @@ > > > "type": {"key": "string", "value": "string", > > > "min": 0, "max": "unlimited"}}}, > > > "indexes": [["name"]], > > > + "isRoot": true}, > > > + "BFD": { > > > + "columns": { > > > + "logical_port": {"type": "string"}, > > > + "dst_ip": {"type": "string"}, > > > + "min_tx": {"type": {"key": {"type": "integer"}}}, > > > + "min_rx": {"type": {"key": {"type": "integer"}}}, > > > + "detect_mult": {"type": {"key": {"type": "integer"}}}, > > > + "status": { > > > + "type": {"key": {"type": "string", > > > + "enum": ["set", ["down", "init", "up", > > > + "admin_down"]]}, > > > + "min": 0, "max": 1}}, > > > + "external_ids": { > > > + "type": {"key": "string", "value": "string", > > > + "min": 0, "max": "unlimited"}}, > > > + "options": { > > > + "type": {"key": "string", "value": "string", > > > + "min": 0, "max": "unlimited"}}}, > > > + "indexes": [["logical_port", "dst_ip"]], > > > "isRoot": true}} > > > } > > > diff --git a/ovn-nb.xml b/ovn-nb.xml > > > index 0cf043790..4a28d3f0d 100644 > > > --- a/ovn-nb.xml > > > +++ b/ovn-nb.xml > > > @@ -3728,4 +3728,70 @@ > > > </column> > > > </group> > > > </table> > > > + > > > + <table name="BFD"> > > > + <p> > > > + Contains BFD parameter for ovn-controller bfd configuration > > > + </p> > > > + > > > + <group title="Configuration"> > > > + <column name="logical_port"> > > > + OVN logical port when BFD engine is running. > > > + </column> > > > + > > > + <column name="dst_ip"> > > > + BFD peer IP address. > > > + </column> > > > + > > > + <column name="min_tx"> > > > + This is the minimum interval, in milliseconds, that the local > > > + system would like to use when transmitting BFD Control packets, > > > + less any jitter applied. The value zero is reserved. > > > + </column> > > > + > > > + <column name="min_rx"> > > > + This is the minimum interval, in milliseconds, between received > > > + BFD Control packets that this system is capable of supporting, > > > + less any jitter applied by the sender. If this value is zero, > > > + the transmitting system does not want the remote system to send > > > + any periodic BFD Control packets. > > > + </column> > > > + > > > + <column name="detect_mult"> > > > + Detection time multiplier. The negotiated transmit interval, > > > + multiplied by this value, provides the Detection Time for the > > > + receiving system in Asynchronous mode. > > > + </column> > > > + > > > + <column name="options"> > > > + Reserved for future use. > > > + </column> > > > + > > > + <column name="external_ids"> > > > + See <em>External IDs</em> at the beginning of this document. > > > + </column> > > > + </group> > > > + > > > + <group title="Status Reporting"> > > > + <column name="status"> > > > + <p> > > > + BFD port logical states. Possible values are: > > > + <ul> > > > + <li> > > > + <code>admin_down</code> > > > + </li> > > > + <li> > > > + <code>down</code> > > > + </li> > > > + <li> > > > + <code>init</code> > > > + </li> > > > + <li> > > > + <code>up</code> > > > + </li> > > > + </ul> > > > + </p> > > > + </column> > > > + </group> > > > + </table> > > > </database> > > > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema > > > index 5228839b8..97db6de39 100644 > > > --- a/ovn-sb.ovsschema > > > +++ b/ovn-sb.ovsschema > > > @@ -1,7 +1,7 @@ > > > { > > > "name": "OVN_Southbound", > > > - "version": "20.12.0", > > > - "cksum": "3969471120 24441", > > > + "version": "20.13.0", > > > + "cksum": "3035725595 25676", > > > "tables": { > > > "SB_Global": { > > > "columns": { > > > @@ -484,6 +484,29 @@ > > > "external_ids": { > > > "type": {"key": "string", "value": "string", > > > "min": 0, "max": "unlimited"}}}, > > > + "isRoot": true}, > > > + "BFD": { > > > + "columns": { > > > + "src_port": {"type": {"key": {"type": "integer", > > > + "minInteger": 49152, > > > + "maxInteger": 65535}}}, > > > + "disc": {"type": {"key": {"type": "integer"}}}, > > > + "logical_port": {"type": "string"}, > > > + "dst_ip": {"type": "string"}, > > > + "min_tx": {"type": {"key": {"type": "integer"}}}, > > > + "min_rx": {"type": {"key": {"type": "integer"}}}, > > > + "detect_mult": {"type": {"key": {"type": "integer"}}}, > > > + "status": { > > > + "type": {"key": {"type": "string", > > > + "enum": ["set", ["down", "init", "up", > > > + "admin_down"]]}}}, > > > + "external_ids": { > > > + "type": {"key": "string", "value": "string", > > > + "min": 0, "max": "unlimited"}}, > > > + "options": { > > > + "type": {"key": "string", "value": "string", > > > + "min": 0, "max": "unlimited"}}}, > > > + "indexes": [["logical_port", "dst_ip", "src_port", "disc"]], > > > "isRoot": true} > > > } > > > } > > > diff --git a/ovn-sb.xml b/ovn-sb.xml > > > index c13994848..ccfe2e415 100644 > > > --- a/ovn-sb.xml > > > +++ b/ovn-sb.xml > > > @@ -4231,4 +4231,82 @@ tcp.flags = RST; > > > </column> > > > </group> > > > </table> > > > + > > > + <table name="BFD"> > > > + <p> > > > + Contains BFD parameter for ovn-controller bfd configuration > > > + </p> > > > + > > > + <group title="Configuration"> > > > + <column name="src_port"> > > > + udp source port used in bfd control packets. > > > + The source port MUST be in the range 49152 through 65535 > > > + (RFC5881 section 4). > > > + </column> > > > + > > > + <column name="disc"> > > > + A unique, nonzero discriminator value generated by the transmitting > > > + system, used to demultiplex multiple BFD sessions between the same pair > > > + of systems. > > > + </column> > > > + > > > + <column name="logical_port"> > > > + OVN logical port when BFD engine is running. > > > + </column> > > > + > > > + <column name="dst_ip"> > > > + BFD peer IP address. > > > + </column> > > > + > > > + <column name="min_tx"> > > > + This is the minimum interval, in milliseconds, that the local > > > + system would like to use when transmitting BFD Control packets, > > > + less any jitter applied. The value zero is reserved. > > > + </column> > > > + > > > + <column name="min_rx"> > > > + This is the minimum interval, in milliseconds, between received > > > + BFD Control packets that this system is capable of supporting, > > > + less any jitter applied by the sender. If this value is zero, > > > + the transmitting system does not want the remote system to send > > > + any periodic BFD Control packets. > > > + </column> > > > + > > > + <column name="detect_mult"> > > > + Detection time multiplier. The negotiated transmit interval, > > > + multiplied by this value, provides the Detection Time for the > > > + receiving system in Asynchronous mode. > > > + </column> > > > + > > > + <column name="options"> > > > + Reserved for future use. > > > + </column> > > > + > > > + <column name="external_ids"> > > > + See <em>External IDs</em> at the beginning of this document. > > > + </column> > > > + </group> > > > + > > > + <group title="Status Reporting"> > > > + <column name="status"> > > > + <p> > > > + BFD port logical states. Possible values are: > > > + <ul> > > > + <li> > > > + <code>admin_down</code> > > > + </li> > > > + <li> > > > + <code>down</code> > > > + </li> > > > + <li> > > > + <code>init</code> > > > + </li> > > > + <li> > > > + <code>up</code> > > > + </li> > > > + </ul> > > > + </p> > > > + </column> > > > + </group> > > > + </table> > > > </database> > > > -- > > > 2.29.2 > > > > > > _______________________________________________ > > > dev mailing list > > > dev@openvswitch.org > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > > >
On Dec 18, Numan Siddique wrote: > On Wed, Dec 16, 2020 at 8:14 PM Lorenzo Bianconi > <lorenzo.bianconi@redhat.com> wrote: > > > > Introduce the capability to transmit BFD packets in ovn-controller. > > Introduce BFD tables in nb/sb dbs in order to configure BFD parameters > > (e.g. min_tx, min_rx, ..) for ovn-controller. > > > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> > > Hi Lorenzo, > > Please see below for a few comments. Hi Numan, thx for the review. > > Thanks > Numan > > > --- > > controller/ovn-controller.c | 1 + > > controller/pinctrl.c | 288 +++++++++++++++++++++++++++++++++++- > > controller/pinctrl.h | 2 + > > lib/ovn-l7.h | 19 +++ > > northd/ovn-northd.c | 91 ++++++++++++ > > ovn-nb.ovsschema | 24 ++- > > ovn-nb.xml | 66 +++++++++ > > ovn-sb.ovsschema | 27 +++- > > ovn-sb.xml | 78 ++++++++++ > > 9 files changed, 591 insertions(+), 5 deletions(-) > > > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > index b5f0c1315..84b3f85ad 100644 > > --- a/controller/ovn-controller.c > > +++ b/controller/ovn-controller.c > > @@ -2864,6 +2864,7 @@ main(int argc, char *argv[]) > > ovnsb_idl_loop.idl), > > sbrec_service_monitor_table_get( > > ovnsb_idl_loop.idl), > > + sbrec_bfd_table_get(ovnsb_idl_loop.idl), > > br_int, chassis, > > &runtime_data->local_datapaths, > > &runtime_data->active_tunnels); > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > > index 7e3abf0a4..ae994b513 100644 > > --- a/controller/pinctrl.c > > +++ b/controller/pinctrl.c > > @@ -323,6 +323,18 @@ put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits, > > static void notify_pinctrl_main(void); > > static void notify_pinctrl_handler(void); > > > > +static bool bfd_monitor_should_inject(void); > > +static void bfd_monitor_wait(long long int timeout); > > +static void bfd_monitor_init(void); > > +static void bfd_monitor_destroy(void); > > +static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) > > + OVS_REQUIRES(pinctrl_mutex); > > +static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, > > + struct ovsdb_idl_index *sbrec_port_binding_by_name, > > + const struct sbrec_chassis *chassis, > > + const struct sset *active_tunnels) > > + OVS_REQUIRES(pinctrl_mutex); > > + > > COVERAGE_DEFINE(pinctrl_drop_put_mac_binding); > > COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map); > > COVERAGE_DEFINE(pinctrl_drop_controller_event); > > @@ -487,6 +499,7 @@ pinctrl_init(void) > > ip_mcast_snoop_init(); > > init_put_vport_bindings(); > > init_svc_monitors(); > > + bfd_monitor_init(); > > pinctrl.br_int_name = NULL; > > pinctrl_handler_seq = seq_create(); > > pinctrl_main_seq = seq_create(); > > @@ -3053,6 +3066,8 @@ pinctrl_handler(void *arg_) > > swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION); > > > > while (!latch_is_set(&pctrl->pinctrl_thread_exit)) { > > + long long int bfd_time = LLONG_MAX; > > + > > ovs_mutex_lock(&pinctrl_mutex); > > pinctrl_rconn_setup(swconn, pctrl->br_int_name); > > ip_mcast_snoop_run(); > > @@ -3085,6 +3100,7 @@ pinctrl_handler(void *arg_) > > send_ipv6_ras(swconn, &send_ipv6_ra_time); > > send_ipv6_prefixd(swconn, &send_prefixd_time); > > send_mac_binding_buffered_pkts(swconn); > > + bfd_monitor_send_msg(swconn, &bfd_time); > > ovs_mutex_unlock(&pinctrl_mutex); > > > > ip_mcast_querier_run(swconn, &send_mcast_query_time); > > @@ -3102,6 +3118,7 @@ pinctrl_handler(void *arg_) > > ip_mcast_querier_wait(send_mcast_query_time); > > svc_monitors_wait(svc_monitors_next_run_time); > > ipv6_prefixd_wait(send_prefixd_time); > > + bfd_monitor_wait(bfd_time); > > > > new_seq = seq_read(pinctrl_handler_seq); > > seq_wait(pinctrl_handler_seq, new_seq); > > @@ -3149,6 +3166,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > > const struct sbrec_dns_table *dns_table, > > const struct sbrec_controller_event_table *ce_table, > > const struct sbrec_service_monitor_table *svc_mon_table, > > + const struct sbrec_bfd_table *bfd_table, > > const struct ovsrec_bridge *br_int, > > const struct sbrec_chassis *chassis, > > const struct hmap *local_datapaths, > > @@ -3179,6 +3197,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > > local_datapaths); > > sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name, > > chassis); > > + bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis, > > + active_tunnels); > > ovs_mutex_unlock(&pinctrl_mutex); > > } > > > > @@ -3722,6 +3742,7 @@ pinctrl_destroy(void) > > destroy_dns_cache(); > > ip_mcast_snoop_destroy(); > > destroy_svc_monitors(); > > + bfd_monitor_destroy(); > > seq_destroy(pinctrl_main_seq); > > seq_destroy(pinctrl_handler_seq); > > } > > @@ -5525,7 +5546,8 @@ may_inject_pkts(void) > > !shash_is_empty(&send_garp_rarp_data) || > > ipv6_prefixd_should_inject() || > > !ovs_list_is_empty(&mcast_query_list) || > > - !ovs_list_is_empty(&buffered_mac_bindings)); > > + !ovs_list_is_empty(&buffered_mac_bindings) || > > + bfd_monitor_should_inject()); > > } > > > > static void > > @@ -6312,6 +6334,270 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, > > > > } > > > > +static struct hmap bfd_monitor_map; > > + > > +struct bfd_entry { > > + struct hmap_node node; > > + > > + /* L2 source address */ > > + struct eth_addr src_mac; > > + /* IPv4 source address */ > > + ovs_be32 ip_src; > > + /* IPv4 destination address */ > > + ovs_be32 ip_dst; > > + /* RFC 5881 section 4 > > + * The source port MUST be in the range 49152 through 65535. > > + * The same UDP source port number MUST be used for all BFD > > + * Control packets associated with a particular session. > > + * The source port number SHOULD be unique among all BFD > > + * sessions on the system > > + */ > > + uint16_t udp_src; > > + ovs_be32 disc; > > + > > + int64_t port_key; > > + int64_t metadata; > > + > > + long long int last_update; > > + long long int next_tx; > > +}; > > + > > +static void > > +bfd_monitor_init(void) > > +{ > > + hmap_init(&bfd_monitor_map); > > +} > > + > > +static void > > +bfd_monitor_destroy(void) > > +{ > > + hmap_destroy(&bfd_monitor_map); > > I think you also need to remove the hmap nodes and free the 'struct > bfd_entry' objects > before destroying the hmap. ack, I will fix it in v4 > > > > +} > > + > > +static struct bfd_entry * > > +pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port) > > +{ > > + struct bfd_entry *entry; > > + HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0), > > + &bfd_monitor_map) { > > + if (entry->udp_src == port) { > > + return entry; > > + } > > + } > > + return NULL; > > +} > > + > > +static bool > > +bfd_monitor_should_inject(void) > > +{ > > + long long int cur_time = time_msec(); > > + struct bfd_entry *entry; > > + > > + HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { > > + if (entry->next_tx < cur_time) { > > + return true; > > + } > > + } > > + return false; > > +} > > + > > +static void > > +bfd_monitor_wait(long long int timeout) > > +{ > > + if (!hmap_is_empty(&bfd_monitor_map)) { > > + poll_timer_wait_until(timeout); > > + } > > +} > > + > > +static void > > +bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet) > > +{ > > + struct udp_header *udp; > > + struct bfd_msg *msg; > > + > > + dp_packet_reserve(packet, 2); > > + struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth); > > + eth->eth_dst = eth_addr_broadcast; > > + eth->eth_src = entry->src_mac; > > + eth->eth_type = htons(ETH_TYPE_IP); > > + > > + struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip); > > + ip->ip_ihl_ver = IP_IHL_VER(5, 4); > > + ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg); > > + ip->ip_ttl = MAXTTL; > > + ip->ip_tos = IPTOS_PREC_INTERNETCONTROL; > > + ip->ip_proto = IPPROTO_UDP; > > + put_16aligned_be32(&ip->ip_src, entry->ip_src); > > + put_16aligned_be32(&ip->ip_dst, entry->ip_dst); > > + /* Checksum has already been zeroed by put_zeros call. */ > > + ip->ip_csum = csum(ip, sizeof *ip); > > + > > + udp = dp_packet_put_zeros(packet, sizeof *udp); > > + udp->udp_src = htons(entry->udp_src); > > + udp->udp_dst = htons(BFD_DEST_PORT); > > + udp->udp_len = htons(sizeof *udp + sizeof *msg); > > + > > + msg = dp_packet_put_uninit(packet, sizeof *msg); > > + msg->vers_diag = (BFD_VERSION << 5); > > + msg->length = BFD_PACKET_LEN; > > +} > > + > > +static void > > +bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) > > + OVS_REQUIRES(pinctrl_mutex) > > +{ > > + long long int cur_time = time_msec(); > > + struct bfd_entry *entry; > > + > > + HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { > > + if (cur_time < entry->next_tx) { > > + goto next; > > + } > > + > > + uint64_t packet_stub[256 / 8]; > > + struct dp_packet packet; > > + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); > > + bfd_monitor_put_bfd_msg(entry, &packet); > > + > > + uint64_t ofpacts_stub[4096 / 8]; > > + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); > > + > > + /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */ > > + uint32_t dp_key = entry->metadata; > > + uint32_t port_key = entry->port_key; > > + put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); > > + put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts); > > + put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts); > > + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); > > + resubmit->in_port = OFPP_CONTROLLER; > > + resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE; > > + > > + struct ofputil_packet_out po = { > > + .packet = dp_packet_data(&packet), > > + .packet_len = dp_packet_size(&packet), > > + .buffer_id = UINT32_MAX, > > + .ofpacts = ofpacts.data, > > + .ofpacts_len = ofpacts.size, > > + }; > > + > > + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); > > + enum ofp_version version = rconn_get_version(swconn); > > + enum ofputil_protocol proto = > > + ofputil_protocol_from_ofp_version(version); > > + queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); > > + dp_packet_uninit(&packet); > > + ofpbuf_uninit(&ofpacts); > > + > > + entry->next_tx = cur_time + 5000; > > +next: > > + if (*bfd_time > entry->next_tx) { > > + *bfd_time = entry->next_tx; > > I think there is some problem here. I applied all the patches of this > series and deployed > fake multinode. > > When I create a BFD entry like below, ovn-controller pinctrl thread on > ovn-gw-1 fake chassis, just wakes up > 100% of the time. ack, I spotted the issue. Using the following command: $ovn-nbctl create bfd logical_port=lr0-public dst_ip=172.16.0.50 I will not provide a value fpr min_tx and min_rx. This will immediately trigger a thread wakeup. I will fix it in v4 Regards, Lorenzo > > --- > ovn-nbctl create bfd logical_port=lr0-public dst_ip=172.16.0.50 > --- > > In the ovn-controller logs I see > > ---- > 2020-12-18T13:29:03.181Z|77940|poll_loop(ovn_pinctrl0)|DBG|wakeup due > to 0-ms timeout at controller/pinctrl.c:6495 > 2020-12-18T13:29:03.181Z|77941|poll_loop(ovn_pinctrl0)|DBG|wakeup due > to 0-ms timeout at controller/pinctrl.c:6495 > 2020-12-18T13:29:03.181Z|77942|poll_loop(ovn_pinctrl0)|DBG|wakeup due > to 0-ms timeout at controller/pinctrl.c:6495 > 2020-12-18T13:29:03.181Z|77943|poll_loop(ovn_pinctrl0)|DBG|wakeup due > to 0-ms timeout at controller/pinctrl.c:6495 > ... > .. > ---- > > Thanks > Numan > > > > + } > > + } > > +} > > + > > +#define BFD_MONITOR_STALE_TIMEOUT 180000LL > > +static void > > +bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, > > + struct ovsdb_idl_index *sbrec_port_binding_by_name, > > + const struct sbrec_chassis *chassis, > > + const struct sset *active_tunnels) > > + OVS_REQUIRES(pinctrl_mutex) > > +{ > > + long long int cur_time = time_msec(); > > + bool changed = false; > > + > > + const struct sbrec_bfd *bt; > > + SBREC_BFD_TABLE_FOR_EACH (bt, bfd_table) { > > + const struct sbrec_port_binding *pb > > + = lport_lookup_by_name(sbrec_port_binding_by_name, > > + bt->logical_port); > > + if (!pb) { > > + continue; > > + } > > + > > + struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_port( > > + bt->dst_ip, bt->src_port); > > + if (!entry) { > > + ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP); > > + struct eth_addr ea = eth_addr_zero; > > + int i; > > + > > + if (!ip_parse(bt->dst_ip, &ip_dst)) { > > + continue; > > + } > > + > > + const char *peer_s = smap_get(&pb->options, "peer"); > > + if (!peer_s) { > > + continue; > > + } > > + > > + const struct sbrec_port_binding *peer > > + = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s); > > + if (!peer) { > > + continue; > > + } > > + > > + char *redirect_name = xasprintf("cr-%s", pb->logical_port); > > + bool resident = lport_is_chassis_resident( > > + sbrec_port_binding_by_name, chassis, active_tunnels, > > + redirect_name); > > + free(redirect_name); > > + if (!resident && strcmp(pb->type, "l3gateway") && > > + pb->chassis != chassis) { > > + continue; > > + } > > + > > + for (i = 0; i < pb->n_mac; i++) { > > + struct lport_addresses laddrs; > > + > > + if (!extract_lsp_addresses(pb->mac[i], &laddrs)) { > > + continue; > > + } > > + > > + ea = laddrs.ea; > > + if (laddrs.n_ipv4_addrs > 0) { > > + ip_src = laddrs.ipv4_addrs[0].addr; > > + destroy_lport_addresses(&laddrs); > > + break; > > + } > > + destroy_lport_addresses(&laddrs); > > + } > > + > > + if (eth_addr_is_zero(ea)) { > > + continue; > > + } > > + > > + entry = xzalloc(sizeof *entry); > > + entry->src_mac = ea; > > + entry->ip_src = ip_src; > > + entry->ip_dst = ip_dst; > > + entry->udp_src = bt->src_port; > > + entry->disc = htonl(bt->disc); > > + entry->next_tx = cur_time; > > + entry->metadata = pb->datapath->tunnel_key; > > + entry->port_key = pb->tunnel_key; > > + > > + uint32_t hash = hash_string(bt->dst_ip, 0); > > + hmap_insert(&bfd_monitor_map, &entry->node, hash); > > + changed = true; > > + } > > + entry->last_update = cur_time; > > + } > > + > > + struct bfd_entry *entry, *next_entry; > > + HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) { > > + if (entry->last_update + BFD_MONITOR_STALE_TIMEOUT < cur_time) { > > + hmap_remove(&bfd_monitor_map, &entry->node); > > + free(entry); > > + } > > + } > > + > > + if (changed) { > > + notify_pinctrl_handler(); > > + } > > +} > > + > > static uint16_t > > get_random_src_port(void) > > { > > diff --git a/controller/pinctrl.h b/controller/pinctrl.h > > index 4b101ec92..8555d983d 100644 > > --- a/controller/pinctrl.h > > +++ b/controller/pinctrl.h > > @@ -31,6 +31,7 @@ struct sbrec_chassis; > > struct sbrec_dns_table; > > struct sbrec_controller_event_table; > > struct sbrec_service_monitor_table; > > +struct sbrec_bfd_table; > > > > void pinctrl_init(void); > > void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > > @@ -44,6 +45,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > > const struct sbrec_dns_table *, > > const struct sbrec_controller_event_table *, > > const struct sbrec_service_monitor_table *, > > + const struct sbrec_bfd_table *, > > const struct ovsrec_bridge *, const struct sbrec_chassis *, > > const struct hmap *local_datapaths, > > const struct sset *active_tunnels); > > diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h > > index c84a0e7a9..d00982449 100644 > > --- a/lib/ovn-l7.h > > +++ b/lib/ovn-l7.h > > @@ -26,6 +26,25 @@ > > #include "hash.h" > > #include "ovn/logical-fields.h" > > > > +#define BFD_PACKET_LEN 24 > > +#define BFD_DEST_PORT 3784 > > +#define BFD_VERSION 1 > > +#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */ > > +#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */ > > + > > +struct bfd_msg { > > + uint8_t vers_diag; > > + uint8_t flags; > > + uint8_t mult; > > + uint8_t length; > > + ovs_be32 my_disc; > > + ovs_be32 your_disc; > > + ovs_be32 min_tx; > > + ovs_be32 min_rx; > > + ovs_be32 min_rx_echo; > > +}; > > +BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct bfd_msg)); > > + > > /* Generic options map which is used to store dhcpv4 opts and dhcpv6 opts. */ > > struct gen_opts_map { > > struct hmap_node hmap_node; > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > > index b377dffa1..e8c7629e7 100644 > > --- a/northd/ovn-northd.c > > +++ b/northd/ovn-northd.c > > @@ -11707,6 +11707,92 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, > > } > > } > > > > +struct bfd_entry { > > + struct hmap_node hmap_node; > > + > > + const struct sbrec_bfd *sb_bt; > > + bool found; > > +}; > > + > > +static struct bfd_entry * > > +bfd_port_lookup(struct hmap *bfd_map, const char *logical_port, > > + const char *dst_ip) > > +{ > > + struct bfd_entry *bfd_e; > > + uint32_t hash; > > + > > + hash = hash_string(dst_ip, 0); > > + hash = hash_string(logical_port, hash); > > + HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) { > > + if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) && > > + !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) { > > + return bfd_e; > > + } > > + } > > + return NULL; > > +} > > + > > +static void > > +build_bfd_table(struct northd_context *ctx) > > +{ > > + struct hmap sb_only = HMAP_INITIALIZER(&sb_only); > > + struct bfd_entry *bfd_e, *next_bfd_e; > > + const struct sbrec_bfd *sb_bt; > > + uint32_t hash; > > + > > + SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) { > > + bfd_e = xmalloc(sizeof *bfd_e); > > + bfd_e->sb_bt = sb_bt; > > + bfd_e->found = false; > > + hash = hash_string(sb_bt->dst_ip, 0); > > + hash = hash_string(sb_bt->logical_port, hash); > > + hmap_insert(&sb_only, &bfd_e->hmap_node, hash); > > + } > > + > > + const struct nbrec_bfd *nb_bt; > > + NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) { > > + bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip); > > + if (!bfd_e) { > > + sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn); > > + sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port); > > + sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip); > > + sbrec_bfd_set_disc(sb_bt, 1 + random_uint32()); > > + /* RFC 5881 section 4 > > + * The source port MUST be in the range 49152 through 65535. > > + * The same UDP source port number MUST be used for all BFD > > + * Control packets associated with a particular session. > > + * The source port number SHOULD be unique among all BFD > > + * sessions on the system > > + */ > > + uint16_t udp_src = random_range(16383) + 49152; > > + sbrec_bfd_set_src_port(sb_bt, udp_src); > > + sbrec_bfd_set_status(sb_bt, "down"); > > + sbrec_bfd_set_min_tx(sb_bt, nb_bt->min_tx); > > + sbrec_bfd_set_min_rx(sb_bt, nb_bt->min_rx); > > + sbrec_bfd_set_detect_mult(sb_bt, nb_bt->detect_mult); > > + } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) { > > + if (!strcmp(nb_bt->status, "admin_down") || > > + !strcmp(bfd_e->sb_bt->status, "admin_down")) { > > + sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status); > > + } else { > > + nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status); > > + } > > + } > > + if (bfd_e) { > > + bfd_e->found = true; > > + } > > + } > > + > > + HMAP_FOR_EACH_SAFE (bfd_e, next_bfd_e, hmap_node, &sb_only) { > > + if (!bfd_e->found && bfd_e->sb_bt) { > > + sbrec_bfd_delete(bfd_e->sb_bt); > > + } > > + hmap_remove(&sb_only, &bfd_e->hmap_node); > > + free(bfd_e); > > + } > > + hmap_destroy(&sb_only); > > +} > > + > > static void > > sync_address_set(struct northd_context *ctx, const char *name, > > const char **addrs, size_t n_addrs, > > @@ -12504,6 +12590,7 @@ ovnnb_db_run(struct northd_context *ctx, > > build_ip_mcast(ctx, datapaths); > > build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups); > > build_meter_groups(ctx, &meter_groups); > > + build_bfd_table(ctx); > > build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups, > > &igmp_groups, &meter_groups, &lbs); > > ovn_update_ipv6_prefix(ports); > > @@ -13463,6 +13550,10 @@ main(int argc, char *argv[]) > > add_column_noalert(ovnsb_idl_loop.idl, > > &sbrec_load_balancer_col_external_ids); > > > > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port); > > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip); > > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status); > > + > > struct ovsdb_idl_index *sbrec_chassis_by_name > > = chassis_index_create(ovnsb_idl_loop.idl); > > > > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema > > index b77a2308c..a880482a4 100644 > > --- a/ovn-nb.ovsschema > > +++ b/ovn-nb.ovsschema > > @@ -1,7 +1,7 @@ > > { > > "name": "OVN_Northbound", > > - "version": "5.30.0", > > - "cksum": "3273824429 27172", > > + "version": "5.31.0", > > + "cksum": "3663844734 28178", > > "tables": { > > "NB_Global": { > > "columns": { > > @@ -526,5 +526,25 @@ > > "type": {"key": "string", "value": "string", > > "min": 0, "max": "unlimited"}}}, > > "indexes": [["name"]], > > + "isRoot": true}, > > + "BFD": { > > + "columns": { > > + "logical_port": {"type": "string"}, > > + "dst_ip": {"type": "string"}, > > + "min_tx": {"type": {"key": {"type": "integer"}}}, > > + "min_rx": {"type": {"key": {"type": "integer"}}}, > > + "detect_mult": {"type": {"key": {"type": "integer"}}}, > > + "status": { > > + "type": {"key": {"type": "string", > > + "enum": ["set", ["down", "init", "up", > > + "admin_down"]]}, > > + "min": 0, "max": 1}}, > > + "external_ids": { > > + "type": {"key": "string", "value": "string", > > + "min": 0, "max": "unlimited"}}, > > + "options": { > > + "type": {"key": "string", "value": "string", > > + "min": 0, "max": "unlimited"}}}, > > + "indexes": [["logical_port", "dst_ip"]], > > "isRoot": true}} > > } > > diff --git a/ovn-nb.xml b/ovn-nb.xml > > index 0cf043790..4a28d3f0d 100644 > > --- a/ovn-nb.xml > > +++ b/ovn-nb.xml > > @@ -3728,4 +3728,70 @@ > > </column> > > </group> > > </table> > > + > > + <table name="BFD"> > > + <p> > > + Contains BFD parameter for ovn-controller bfd configuration > > + </p> > > + > > + <group title="Configuration"> > > + <column name="logical_port"> > > + OVN logical port when BFD engine is running. > > + </column> > > + > > + <column name="dst_ip"> > > + BFD peer IP address. > > + </column> > > + > > + <column name="min_tx"> > > + This is the minimum interval, in milliseconds, that the local > > + system would like to use when transmitting BFD Control packets, > > + less any jitter applied. The value zero is reserved. > > + </column> > > + > > + <column name="min_rx"> > > + This is the minimum interval, in milliseconds, between received > > + BFD Control packets that this system is capable of supporting, > > + less any jitter applied by the sender. If this value is zero, > > + the transmitting system does not want the remote system to send > > + any periodic BFD Control packets. > > + </column> > > + > > + <column name="detect_mult"> > > + Detection time multiplier. The negotiated transmit interval, > > + multiplied by this value, provides the Detection Time for the > > + receiving system in Asynchronous mode. > > + </column> > > + > > + <column name="options"> > > + Reserved for future use. > > + </column> > > + > > + <column name="external_ids"> > > + See <em>External IDs</em> at the beginning of this document. > > + </column> > > + </group> > > + > > + <group title="Status Reporting"> > > + <column name="status"> > > + <p> > > + BFD port logical states. Possible values are: > > + <ul> > > + <li> > > + <code>admin_down</code> > > + </li> > > + <li> > > + <code>down</code> > > + </li> > > + <li> > > + <code>init</code> > > + </li> > > + <li> > > + <code>up</code> > > + </li> > > + </ul> > > + </p> > > + </column> > > + </group> > > + </table> > > </database> > > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema > > index 5228839b8..97db6de39 100644 > > --- a/ovn-sb.ovsschema > > +++ b/ovn-sb.ovsschema > > @@ -1,7 +1,7 @@ > > { > > "name": "OVN_Southbound", > > - "version": "20.12.0", > > - "cksum": "3969471120 24441", > > + "version": "20.13.0", > > + "cksum": "3035725595 25676", > > "tables": { > > "SB_Global": { > > "columns": { > > @@ -484,6 +484,29 @@ > > "external_ids": { > > "type": {"key": "string", "value": "string", > > "min": 0, "max": "unlimited"}}}, > > + "isRoot": true}, > > + "BFD": { > > + "columns": { > > + "src_port": {"type": {"key": {"type": "integer", > > + "minInteger": 49152, > > + "maxInteger": 65535}}}, > > + "disc": {"type": {"key": {"type": "integer"}}}, > > + "logical_port": {"type": "string"}, > > + "dst_ip": {"type": "string"}, > > + "min_tx": {"type": {"key": {"type": "integer"}}}, > > + "min_rx": {"type": {"key": {"type": "integer"}}}, > > + "detect_mult": {"type": {"key": {"type": "integer"}}}, > > + "status": { > > + "type": {"key": {"type": "string", > > + "enum": ["set", ["down", "init", "up", > > + "admin_down"]]}}}, > > + "external_ids": { > > + "type": {"key": "string", "value": "string", > > + "min": 0, "max": "unlimited"}}, > > + "options": { > > + "type": {"key": "string", "value": "string", > > + "min": 0, "max": "unlimited"}}}, > > + "indexes": [["logical_port", "dst_ip", "src_port", "disc"]], > > "isRoot": true} > > } > > } > > diff --git a/ovn-sb.xml b/ovn-sb.xml > > index c13994848..ccfe2e415 100644 > > --- a/ovn-sb.xml > > +++ b/ovn-sb.xml > > @@ -4231,4 +4231,82 @@ tcp.flags = RST; > > </column> > > </group> > > </table> > > + > > + <table name="BFD"> > > + <p> > > + Contains BFD parameter for ovn-controller bfd configuration > > + </p> > > + > > + <group title="Configuration"> > > + <column name="src_port"> > > + udp source port used in bfd control packets. > > + The source port MUST be in the range 49152 through 65535 > > + (RFC5881 section 4). > > + </column> > > + > > + <column name="disc"> > > + A unique, nonzero discriminator value generated by the transmitting > > + system, used to demultiplex multiple BFD sessions between the same pair > > + of systems. > > + </column> > > + > > + <column name="logical_port"> > > + OVN logical port when BFD engine is running. > > + </column> > > + > > + <column name="dst_ip"> > > + BFD peer IP address. > > + </column> > > + > > + <column name="min_tx"> > > + This is the minimum interval, in milliseconds, that the local > > + system would like to use when transmitting BFD Control packets, > > + less any jitter applied. The value zero is reserved. > > + </column> > > + > > + <column name="min_rx"> > > + This is the minimum interval, in milliseconds, between received > > + BFD Control packets that this system is capable of supporting, > > + less any jitter applied by the sender. If this value is zero, > > + the transmitting system does not want the remote system to send > > + any periodic BFD Control packets. > > + </column> > > + > > + <column name="detect_mult"> > > + Detection time multiplier. The negotiated transmit interval, > > + multiplied by this value, provides the Detection Time for the > > + receiving system in Asynchronous mode. > > + </column> > > + > > + <column name="options"> > > + Reserved for future use. > > + </column> > > + > > + <column name="external_ids"> > > + See <em>External IDs</em> at the beginning of this document. > > + </column> > > + </group> > > + > > + <group title="Status Reporting"> > > + <column name="status"> > > + <p> > > + BFD port logical states. Possible values are: > > + <ul> > > + <li> > > + <code>admin_down</code> > > + </li> > > + <li> > > + <code>down</code> > > + </li> > > + <li> > > + <code>init</code> > > + </li> > > + <li> > > + <code>up</code> > > + </li> > > + </ul> > > + </p> > > + </column> > > + </group> > > + </table> > > </database> > > -- > > 2.29.2 > > > > _______________________________________________ > > dev mailing list > > dev@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > >
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index b5f0c1315..84b3f85ad 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -2864,6 +2864,7 @@ main(int argc, char *argv[]) ovnsb_idl_loop.idl), sbrec_service_monitor_table_get( ovnsb_idl_loop.idl), + sbrec_bfd_table_get(ovnsb_idl_loop.idl), br_int, chassis, &runtime_data->local_datapaths, &runtime_data->active_tunnels); diff --git a/controller/pinctrl.c b/controller/pinctrl.c index 7e3abf0a4..ae994b513 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -323,6 +323,18 @@ put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits, static void notify_pinctrl_main(void); static void notify_pinctrl_handler(void); +static bool bfd_monitor_should_inject(void); +static void bfd_monitor_wait(long long int timeout); +static void bfd_monitor_init(void); +static void bfd_monitor_destroy(void); +static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) + OVS_REQUIRES(pinctrl_mutex); +static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, + struct ovsdb_idl_index *sbrec_port_binding_by_name, + const struct sbrec_chassis *chassis, + const struct sset *active_tunnels) + OVS_REQUIRES(pinctrl_mutex); + COVERAGE_DEFINE(pinctrl_drop_put_mac_binding); COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map); COVERAGE_DEFINE(pinctrl_drop_controller_event); @@ -487,6 +499,7 @@ pinctrl_init(void) ip_mcast_snoop_init(); init_put_vport_bindings(); init_svc_monitors(); + bfd_monitor_init(); pinctrl.br_int_name = NULL; pinctrl_handler_seq = seq_create(); pinctrl_main_seq = seq_create(); @@ -3053,6 +3066,8 @@ pinctrl_handler(void *arg_) swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION); while (!latch_is_set(&pctrl->pinctrl_thread_exit)) { + long long int bfd_time = LLONG_MAX; + ovs_mutex_lock(&pinctrl_mutex); pinctrl_rconn_setup(swconn, pctrl->br_int_name); ip_mcast_snoop_run(); @@ -3085,6 +3100,7 @@ pinctrl_handler(void *arg_) send_ipv6_ras(swconn, &send_ipv6_ra_time); send_ipv6_prefixd(swconn, &send_prefixd_time); send_mac_binding_buffered_pkts(swconn); + bfd_monitor_send_msg(swconn, &bfd_time); ovs_mutex_unlock(&pinctrl_mutex); ip_mcast_querier_run(swconn, &send_mcast_query_time); @@ -3102,6 +3118,7 @@ pinctrl_handler(void *arg_) ip_mcast_querier_wait(send_mcast_query_time); svc_monitors_wait(svc_monitors_next_run_time); ipv6_prefixd_wait(send_prefixd_time); + bfd_monitor_wait(bfd_time); new_seq = seq_read(pinctrl_handler_seq); seq_wait(pinctrl_handler_seq, new_seq); @@ -3149,6 +3166,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, const struct sbrec_dns_table *dns_table, const struct sbrec_controller_event_table *ce_table, const struct sbrec_service_monitor_table *svc_mon_table, + const struct sbrec_bfd_table *bfd_table, const struct ovsrec_bridge *br_int, const struct sbrec_chassis *chassis, const struct hmap *local_datapaths, @@ -3179,6 +3197,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, local_datapaths); sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name, chassis); + bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis, + active_tunnels); ovs_mutex_unlock(&pinctrl_mutex); } @@ -3722,6 +3742,7 @@ pinctrl_destroy(void) destroy_dns_cache(); ip_mcast_snoop_destroy(); destroy_svc_monitors(); + bfd_monitor_destroy(); seq_destroy(pinctrl_main_seq); seq_destroy(pinctrl_handler_seq); } @@ -5525,7 +5546,8 @@ may_inject_pkts(void) !shash_is_empty(&send_garp_rarp_data) || ipv6_prefixd_should_inject() || !ovs_list_is_empty(&mcast_query_list) || - !ovs_list_is_empty(&buffered_mac_bindings)); + !ovs_list_is_empty(&buffered_mac_bindings) || + bfd_monitor_should_inject()); } static void @@ -6312,6 +6334,270 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, } +static struct hmap bfd_monitor_map; + +struct bfd_entry { + struct hmap_node node; + + /* L2 source address */ + struct eth_addr src_mac; + /* IPv4 source address */ + ovs_be32 ip_src; + /* IPv4 destination address */ + ovs_be32 ip_dst; + /* RFC 5881 section 4 + * The source port MUST be in the range 49152 through 65535. + * The same UDP source port number MUST be used for all BFD + * Control packets associated with a particular session. + * The source port number SHOULD be unique among all BFD + * sessions on the system + */ + uint16_t udp_src; + ovs_be32 disc; + + int64_t port_key; + int64_t metadata; + + long long int last_update; + long long int next_tx; +}; + +static void +bfd_monitor_init(void) +{ + hmap_init(&bfd_monitor_map); +} + +static void +bfd_monitor_destroy(void) +{ + hmap_destroy(&bfd_monitor_map); +} + +static struct bfd_entry * +pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port) +{ + struct bfd_entry *entry; + HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0), + &bfd_monitor_map) { + if (entry->udp_src == port) { + return entry; + } + } + return NULL; +} + +static bool +bfd_monitor_should_inject(void) +{ + long long int cur_time = time_msec(); + struct bfd_entry *entry; + + HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { + if (entry->next_tx < cur_time) { + return true; + } + } + return false; +} + +static void +bfd_monitor_wait(long long int timeout) +{ + if (!hmap_is_empty(&bfd_monitor_map)) { + poll_timer_wait_until(timeout); + } +} + +static void +bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet) +{ + struct udp_header *udp; + struct bfd_msg *msg; + + dp_packet_reserve(packet, 2); + struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth); + eth->eth_dst = eth_addr_broadcast; + eth->eth_src = entry->src_mac; + eth->eth_type = htons(ETH_TYPE_IP); + + struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip); + ip->ip_ihl_ver = IP_IHL_VER(5, 4); + ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg); + ip->ip_ttl = MAXTTL; + ip->ip_tos = IPTOS_PREC_INTERNETCONTROL; + ip->ip_proto = IPPROTO_UDP; + put_16aligned_be32(&ip->ip_src, entry->ip_src); + put_16aligned_be32(&ip->ip_dst, entry->ip_dst); + /* Checksum has already been zeroed by put_zeros call. */ + ip->ip_csum = csum(ip, sizeof *ip); + + udp = dp_packet_put_zeros(packet, sizeof *udp); + udp->udp_src = htons(entry->udp_src); + udp->udp_dst = htons(BFD_DEST_PORT); + udp->udp_len = htons(sizeof *udp + sizeof *msg); + + msg = dp_packet_put_uninit(packet, sizeof *msg); + msg->vers_diag = (BFD_VERSION << 5); + msg->length = BFD_PACKET_LEN; +} + +static void +bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) + OVS_REQUIRES(pinctrl_mutex) +{ + long long int cur_time = time_msec(); + struct bfd_entry *entry; + + HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { + if (cur_time < entry->next_tx) { + goto next; + } + + uint64_t packet_stub[256 / 8]; + struct dp_packet packet; + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); + bfd_monitor_put_bfd_msg(entry, &packet); + + uint64_t ofpacts_stub[4096 / 8]; + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); + + /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */ + uint32_t dp_key = entry->metadata; + uint32_t port_key = entry->port_key; + put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); + put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts); + put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts); + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); + resubmit->in_port = OFPP_CONTROLLER; + resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE; + + struct ofputil_packet_out po = { + .packet = dp_packet_data(&packet), + .packet_len = dp_packet_size(&packet), + .buffer_id = UINT32_MAX, + .ofpacts = ofpacts.data, + .ofpacts_len = ofpacts.size, + }; + + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); + enum ofp_version version = rconn_get_version(swconn); + enum ofputil_protocol proto = + ofputil_protocol_from_ofp_version(version); + queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); + dp_packet_uninit(&packet); + ofpbuf_uninit(&ofpacts); + + entry->next_tx = cur_time + 5000; +next: + if (*bfd_time > entry->next_tx) { + *bfd_time = entry->next_tx; + } + } +} + +#define BFD_MONITOR_STALE_TIMEOUT 180000LL +static void +bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, + struct ovsdb_idl_index *sbrec_port_binding_by_name, + const struct sbrec_chassis *chassis, + const struct sset *active_tunnels) + OVS_REQUIRES(pinctrl_mutex) +{ + long long int cur_time = time_msec(); + bool changed = false; + + const struct sbrec_bfd *bt; + SBREC_BFD_TABLE_FOR_EACH (bt, bfd_table) { + const struct sbrec_port_binding *pb + = lport_lookup_by_name(sbrec_port_binding_by_name, + bt->logical_port); + if (!pb) { + continue; + } + + struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_port( + bt->dst_ip, bt->src_port); + if (!entry) { + ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP); + struct eth_addr ea = eth_addr_zero; + int i; + + if (!ip_parse(bt->dst_ip, &ip_dst)) { + continue; + } + + const char *peer_s = smap_get(&pb->options, "peer"); + if (!peer_s) { + continue; + } + + const struct sbrec_port_binding *peer + = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s); + if (!peer) { + continue; + } + + char *redirect_name = xasprintf("cr-%s", pb->logical_port); + bool resident = lport_is_chassis_resident( + sbrec_port_binding_by_name, chassis, active_tunnels, + redirect_name); + free(redirect_name); + if (!resident && strcmp(pb->type, "l3gateway") && + pb->chassis != chassis) { + continue; + } + + for (i = 0; i < pb->n_mac; i++) { + struct lport_addresses laddrs; + + if (!extract_lsp_addresses(pb->mac[i], &laddrs)) { + continue; + } + + ea = laddrs.ea; + if (laddrs.n_ipv4_addrs > 0) { + ip_src = laddrs.ipv4_addrs[0].addr; + destroy_lport_addresses(&laddrs); + break; + } + destroy_lport_addresses(&laddrs); + } + + if (eth_addr_is_zero(ea)) { + continue; + } + + entry = xzalloc(sizeof *entry); + entry->src_mac = ea; + entry->ip_src = ip_src; + entry->ip_dst = ip_dst; + entry->udp_src = bt->src_port; + entry->disc = htonl(bt->disc); + entry->next_tx = cur_time; + entry->metadata = pb->datapath->tunnel_key; + entry->port_key = pb->tunnel_key; + + uint32_t hash = hash_string(bt->dst_ip, 0); + hmap_insert(&bfd_monitor_map, &entry->node, hash); + changed = true; + } + entry->last_update = cur_time; + } + + struct bfd_entry *entry, *next_entry; + HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) { + if (entry->last_update + BFD_MONITOR_STALE_TIMEOUT < cur_time) { + hmap_remove(&bfd_monitor_map, &entry->node); + free(entry); + } + } + + if (changed) { + notify_pinctrl_handler(); + } +} + static uint16_t get_random_src_port(void) { diff --git a/controller/pinctrl.h b/controller/pinctrl.h index 4b101ec92..8555d983d 100644 --- a/controller/pinctrl.h +++ b/controller/pinctrl.h @@ -31,6 +31,7 @@ struct sbrec_chassis; struct sbrec_dns_table; struct sbrec_controller_event_table; struct sbrec_service_monitor_table; +struct sbrec_bfd_table; void pinctrl_init(void); void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, @@ -44,6 +45,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, const struct sbrec_dns_table *, const struct sbrec_controller_event_table *, const struct sbrec_service_monitor_table *, + const struct sbrec_bfd_table *, const struct ovsrec_bridge *, const struct sbrec_chassis *, const struct hmap *local_datapaths, const struct sset *active_tunnels); diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h index c84a0e7a9..d00982449 100644 --- a/lib/ovn-l7.h +++ b/lib/ovn-l7.h @@ -26,6 +26,25 @@ #include "hash.h" #include "ovn/logical-fields.h" +#define BFD_PACKET_LEN 24 +#define BFD_DEST_PORT 3784 +#define BFD_VERSION 1 +#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */ +#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */ + +struct bfd_msg { + uint8_t vers_diag; + uint8_t flags; + uint8_t mult; + uint8_t length; + ovs_be32 my_disc; + ovs_be32 your_disc; + ovs_be32 min_tx; + ovs_be32 min_rx; + ovs_be32 min_rx_echo; +}; +BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct bfd_msg)); + /* Generic options map which is used to store dhcpv4 opts and dhcpv6 opts. */ struct gen_opts_map { struct hmap_node hmap_node; diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index b377dffa1..e8c7629e7 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -11707,6 +11707,92 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, } } +struct bfd_entry { + struct hmap_node hmap_node; + + const struct sbrec_bfd *sb_bt; + bool found; +}; + +static struct bfd_entry * +bfd_port_lookup(struct hmap *bfd_map, const char *logical_port, + const char *dst_ip) +{ + struct bfd_entry *bfd_e; + uint32_t hash; + + hash = hash_string(dst_ip, 0); + hash = hash_string(logical_port, hash); + HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) { + if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) && + !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) { + return bfd_e; + } + } + return NULL; +} + +static void +build_bfd_table(struct northd_context *ctx) +{ + struct hmap sb_only = HMAP_INITIALIZER(&sb_only); + struct bfd_entry *bfd_e, *next_bfd_e; + const struct sbrec_bfd *sb_bt; + uint32_t hash; + + SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) { + bfd_e = xmalloc(sizeof *bfd_e); + bfd_e->sb_bt = sb_bt; + bfd_e->found = false; + hash = hash_string(sb_bt->dst_ip, 0); + hash = hash_string(sb_bt->logical_port, hash); + hmap_insert(&sb_only, &bfd_e->hmap_node, hash); + } + + const struct nbrec_bfd *nb_bt; + NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) { + bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip); + if (!bfd_e) { + sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn); + sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port); + sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip); + sbrec_bfd_set_disc(sb_bt, 1 + random_uint32()); + /* RFC 5881 section 4 + * The source port MUST be in the range 49152 through 65535. + * The same UDP source port number MUST be used for all BFD + * Control packets associated with a particular session. + * The source port number SHOULD be unique among all BFD + * sessions on the system + */ + uint16_t udp_src = random_range(16383) + 49152; + sbrec_bfd_set_src_port(sb_bt, udp_src); + sbrec_bfd_set_status(sb_bt, "down"); + sbrec_bfd_set_min_tx(sb_bt, nb_bt->min_tx); + sbrec_bfd_set_min_rx(sb_bt, nb_bt->min_rx); + sbrec_bfd_set_detect_mult(sb_bt, nb_bt->detect_mult); + } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) { + if (!strcmp(nb_bt->status, "admin_down") || + !strcmp(bfd_e->sb_bt->status, "admin_down")) { + sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status); + } else { + nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status); + } + } + if (bfd_e) { + bfd_e->found = true; + } + } + + HMAP_FOR_EACH_SAFE (bfd_e, next_bfd_e, hmap_node, &sb_only) { + if (!bfd_e->found && bfd_e->sb_bt) { + sbrec_bfd_delete(bfd_e->sb_bt); + } + hmap_remove(&sb_only, &bfd_e->hmap_node); + free(bfd_e); + } + hmap_destroy(&sb_only); +} + static void sync_address_set(struct northd_context *ctx, const char *name, const char **addrs, size_t n_addrs, @@ -12504,6 +12590,7 @@ ovnnb_db_run(struct northd_context *ctx, build_ip_mcast(ctx, datapaths); build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups); build_meter_groups(ctx, &meter_groups); + build_bfd_table(ctx); build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups, &igmp_groups, &meter_groups, &lbs); ovn_update_ipv6_prefix(ports); @@ -13463,6 +13550,10 @@ main(int argc, char *argv[]) add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_external_ids); + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port); + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip); + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status); + struct ovsdb_idl_index *sbrec_chassis_by_name = chassis_index_create(ovnsb_idl_loop.idl); diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema index b77a2308c..a880482a4 100644 --- a/ovn-nb.ovsschema +++ b/ovn-nb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Northbound", - "version": "5.30.0", - "cksum": "3273824429 27172", + "version": "5.31.0", + "cksum": "3663844734 28178", "tables": { "NB_Global": { "columns": { @@ -526,5 +526,25 @@ "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}, "indexes": [["name"]], + "isRoot": true}, + "BFD": { + "columns": { + "logical_port": {"type": "string"}, + "dst_ip": {"type": "string"}, + "min_tx": {"type": {"key": {"type": "integer"}}}, + "min_rx": {"type": {"key": {"type": "integer"}}}, + "detect_mult": {"type": {"key": {"type": "integer"}}}, + "status": { + "type": {"key": {"type": "string", + "enum": ["set", ["down", "init", "up", + "admin_down"]]}, + "min": 0, "max": 1}}, + "external_ids": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}, + "options": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}}, + "indexes": [["logical_port", "dst_ip"]], "isRoot": true}} } diff --git a/ovn-nb.xml b/ovn-nb.xml index 0cf043790..4a28d3f0d 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -3728,4 +3728,70 @@ </column> </group> </table> + + <table name="BFD"> + <p> + Contains BFD parameter for ovn-controller bfd configuration + </p> + + <group title="Configuration"> + <column name="logical_port"> + OVN logical port when BFD engine is running. + </column> + + <column name="dst_ip"> + BFD peer IP address. + </column> + + <column name="min_tx"> + This is the minimum interval, in milliseconds, that the local + system would like to use when transmitting BFD Control packets, + less any jitter applied. The value zero is reserved. + </column> + + <column name="min_rx"> + This is the minimum interval, in milliseconds, between received + BFD Control packets that this system is capable of supporting, + less any jitter applied by the sender. If this value is zero, + the transmitting system does not want the remote system to send + any periodic BFD Control packets. + </column> + + <column name="detect_mult"> + Detection time multiplier. The negotiated transmit interval, + multiplied by this value, provides the Detection Time for the + receiving system in Asynchronous mode. + </column> + + <column name="options"> + Reserved for future use. + </column> + + <column name="external_ids"> + See <em>External IDs</em> at the beginning of this document. + </column> + </group> + + <group title="Status Reporting"> + <column name="status"> + <p> + BFD port logical states. Possible values are: + <ul> + <li> + <code>admin_down</code> + </li> + <li> + <code>down</code> + </li> + <li> + <code>init</code> + </li> + <li> + <code>up</code> + </li> + </ul> + </p> + </column> + </group> + </table> </database> diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema index 5228839b8..97db6de39 100644 --- a/ovn-sb.ovsschema +++ b/ovn-sb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Southbound", - "version": "20.12.0", - "cksum": "3969471120 24441", + "version": "20.13.0", + "cksum": "3035725595 25676", "tables": { "SB_Global": { "columns": { @@ -484,6 +484,29 @@ "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}, + "isRoot": true}, + "BFD": { + "columns": { + "src_port": {"type": {"key": {"type": "integer", + "minInteger": 49152, + "maxInteger": 65535}}}, + "disc": {"type": {"key": {"type": "integer"}}}, + "logical_port": {"type": "string"}, + "dst_ip": {"type": "string"}, + "min_tx": {"type": {"key": {"type": "integer"}}}, + "min_rx": {"type": {"key": {"type": "integer"}}}, + "detect_mult": {"type": {"key": {"type": "integer"}}}, + "status": { + "type": {"key": {"type": "string", + "enum": ["set", ["down", "init", "up", + "admin_down"]]}}}, + "external_ids": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}, + "options": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}}, + "indexes": [["logical_port", "dst_ip", "src_port", "disc"]], "isRoot": true} } } diff --git a/ovn-sb.xml b/ovn-sb.xml index c13994848..ccfe2e415 100644 --- a/ovn-sb.xml +++ b/ovn-sb.xml @@ -4231,4 +4231,82 @@ tcp.flags = RST; </column> </group> </table> + + <table name="BFD"> + <p> + Contains BFD parameter for ovn-controller bfd configuration + </p> + + <group title="Configuration"> + <column name="src_port"> + udp source port used in bfd control packets. + The source port MUST be in the range 49152 through 65535 + (RFC5881 section 4). + </column> + + <column name="disc"> + A unique, nonzero discriminator value generated by the transmitting + system, used to demultiplex multiple BFD sessions between the same pair + of systems. + </column> + + <column name="logical_port"> + OVN logical port when BFD engine is running. + </column> + + <column name="dst_ip"> + BFD peer IP address. + </column> + + <column name="min_tx"> + This is the minimum interval, in milliseconds, that the local + system would like to use when transmitting BFD Control packets, + less any jitter applied. The value zero is reserved. + </column> + + <column name="min_rx"> + This is the minimum interval, in milliseconds, between received + BFD Control packets that this system is capable of supporting, + less any jitter applied by the sender. If this value is zero, + the transmitting system does not want the remote system to send + any periodic BFD Control packets. + </column> + + <column name="detect_mult"> + Detection time multiplier. The negotiated transmit interval, + multiplied by this value, provides the Detection Time for the + receiving system in Asynchronous mode. + </column> + + <column name="options"> + Reserved for future use. + </column> + + <column name="external_ids"> + See <em>External IDs</em> at the beginning of this document. + </column> + </group> + + <group title="Status Reporting"> + <column name="status"> + <p> + BFD port logical states. Possible values are: + <ul> + <li> + <code>admin_down</code> + </li> + <li> + <code>down</code> + </li> + <li> + <code>init</code> + </li> + <li> + <code>up</code> + </li> + </ul> + </p> + </column> + </group> + </table> </database>
Introduce the capability to transmit BFD packets in ovn-controller. Introduce BFD tables in nb/sb dbs in order to configure BFD parameters (e.g. min_tx, min_rx, ..) for ovn-controller. Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> --- controller/ovn-controller.c | 1 + controller/pinctrl.c | 288 +++++++++++++++++++++++++++++++++++- controller/pinctrl.h | 2 + lib/ovn-l7.h | 19 +++ northd/ovn-northd.c | 91 ++++++++++++ ovn-nb.ovsschema | 24 ++- ovn-nb.xml | 66 +++++++++ ovn-sb.ovsschema | 27 +++- ovn-sb.xml | 78 ++++++++++ 9 files changed, 591 insertions(+), 5 deletions(-)