diff mbox series

[ovs-dev,v3,1/5] controller: introduce BFD tx path in ovn-controller

Message ID 3abdeea2a7124d62b1ec6f28fe48d0d14aac5315.1608128903.git.lorenzo.bianconi@redhat.com
State Superseded
Headers show
Series introduce BFD support in ovn-controller | expand

Commit Message

Lorenzo Bianconi Dec. 16, 2020, 2:41 p.m. UTC
Introduce the capability to transmit BFD packets in ovn-controller.
Introduce BFD tables in nb/sb dbs in order to configure BFD parameters
(e.g. min_tx, min_rx, ..) for ovn-controller.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 controller/ovn-controller.c |   1 +
 controller/pinctrl.c        | 288 +++++++++++++++++++++++++++++++++++-
 controller/pinctrl.h        |   2 +
 lib/ovn-l7.h                |  19 +++
 northd/ovn-northd.c         |  91 ++++++++++++
 ovn-nb.ovsschema            |  24 ++-
 ovn-nb.xml                  |  66 +++++++++
 ovn-sb.ovsschema            |  27 +++-
 ovn-sb.xml                  |  78 ++++++++++
 9 files changed, 591 insertions(+), 5 deletions(-)

Comments

Numan Siddique Dec. 18, 2020, 1:30 p.m. UTC | #1
On Wed, Dec 16, 2020 at 8:14 PM Lorenzo Bianconi
<lorenzo.bianconi@redhat.com> wrote:
>
> Introduce the capability to transmit BFD packets in ovn-controller.
> Introduce BFD tables in nb/sb dbs in order to configure BFD parameters
> (e.g. min_tx, min_rx, ..) for ovn-controller.
>
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>

Hi Lorenzo,

Please see below for a few comments.

Thanks
Numan

> ---
>  controller/ovn-controller.c |   1 +
>  controller/pinctrl.c        | 288 +++++++++++++++++++++++++++++++++++-
>  controller/pinctrl.h        |   2 +
>  lib/ovn-l7.h                |  19 +++
>  northd/ovn-northd.c         |  91 ++++++++++++
>  ovn-nb.ovsschema            |  24 ++-
>  ovn-nb.xml                  |  66 +++++++++
>  ovn-sb.ovsschema            |  27 +++-
>  ovn-sb.xml                  |  78 ++++++++++
>  9 files changed, 591 insertions(+), 5 deletions(-)
>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index b5f0c1315..84b3f85ad 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -2864,6 +2864,7 @@ main(int argc, char *argv[])
>                                          ovnsb_idl_loop.idl),
>                                      sbrec_service_monitor_table_get(
>                                          ovnsb_idl_loop.idl),
> +                                    sbrec_bfd_table_get(ovnsb_idl_loop.idl),
>                                      br_int, chassis,
>                                      &runtime_data->local_datapaths,
>                                      &runtime_data->active_tunnels);
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index 7e3abf0a4..ae994b513 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -323,6 +323,18 @@ put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
>  static void notify_pinctrl_main(void);
>  static void notify_pinctrl_handler(void);
>
> +static bool bfd_monitor_should_inject(void);
> +static void bfd_monitor_wait(long long int timeout);
> +static void bfd_monitor_init(void);
> +static void bfd_monitor_destroy(void);
> +static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time)
> +                                 OVS_REQUIRES(pinctrl_mutex);
> +static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table,
> +                            struct ovsdb_idl_index *sbrec_port_binding_by_name,
> +                            const struct sbrec_chassis *chassis,
> +                            const struct sset *active_tunnels)
> +                            OVS_REQUIRES(pinctrl_mutex);
> +
>  COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
>  COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map);
>  COVERAGE_DEFINE(pinctrl_drop_controller_event);
> @@ -487,6 +499,7 @@ pinctrl_init(void)
>      ip_mcast_snoop_init();
>      init_put_vport_bindings();
>      init_svc_monitors();
> +    bfd_monitor_init();
>      pinctrl.br_int_name = NULL;
>      pinctrl_handler_seq = seq_create();
>      pinctrl_main_seq = seq_create();
> @@ -3053,6 +3066,8 @@ pinctrl_handler(void *arg_)
>      swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION);
>
>      while (!latch_is_set(&pctrl->pinctrl_thread_exit)) {
> +        long long int bfd_time = LLONG_MAX;
> +
>          ovs_mutex_lock(&pinctrl_mutex);
>          pinctrl_rconn_setup(swconn, pctrl->br_int_name);
>          ip_mcast_snoop_run();
> @@ -3085,6 +3100,7 @@ pinctrl_handler(void *arg_)
>                  send_ipv6_ras(swconn, &send_ipv6_ra_time);
>                  send_ipv6_prefixd(swconn, &send_prefixd_time);
>                  send_mac_binding_buffered_pkts(swconn);
> +                bfd_monitor_send_msg(swconn, &bfd_time);
>                  ovs_mutex_unlock(&pinctrl_mutex);
>
>                  ip_mcast_querier_run(swconn, &send_mcast_query_time);
> @@ -3102,6 +3118,7 @@ pinctrl_handler(void *arg_)
>          ip_mcast_querier_wait(send_mcast_query_time);
>          svc_monitors_wait(svc_monitors_next_run_time);
>          ipv6_prefixd_wait(send_prefixd_time);
> +        bfd_monitor_wait(bfd_time);
>
>          new_seq = seq_read(pinctrl_handler_seq);
>          seq_wait(pinctrl_handler_seq, new_seq);
> @@ -3149,6 +3166,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>              const struct sbrec_dns_table *dns_table,
>              const struct sbrec_controller_event_table *ce_table,
>              const struct sbrec_service_monitor_table *svc_mon_table,
> +            const struct sbrec_bfd_table *bfd_table,
>              const struct ovsrec_bridge *br_int,
>              const struct sbrec_chassis *chassis,
>              const struct hmap *local_datapaths,
> @@ -3179,6 +3197,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                           local_datapaths);
>      sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name,
>                        chassis);
> +    bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis,
> +                    active_tunnels);
>      ovs_mutex_unlock(&pinctrl_mutex);
>  }
>
> @@ -3722,6 +3742,7 @@ pinctrl_destroy(void)
>      destroy_dns_cache();
>      ip_mcast_snoop_destroy();
>      destroy_svc_monitors();
> +    bfd_monitor_destroy();
>      seq_destroy(pinctrl_main_seq);
>      seq_destroy(pinctrl_handler_seq);
>  }
> @@ -5525,7 +5546,8 @@ may_inject_pkts(void)
>              !shash_is_empty(&send_garp_rarp_data) ||
>              ipv6_prefixd_should_inject() ||
>              !ovs_list_is_empty(&mcast_query_list) ||
> -            !ovs_list_is_empty(&buffered_mac_bindings));
> +            !ovs_list_is_empty(&buffered_mac_bindings) ||
> +            bfd_monitor_should_inject());
>  }
>
>  static void
> @@ -6312,6 +6334,270 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn,
>
>  }
>
> +static struct hmap bfd_monitor_map;
> +
> +struct bfd_entry {
> +    struct hmap_node node;
> +
> +    /* L2 source address */
> +    struct eth_addr src_mac;
> +    /* IPv4 source address */
> +    ovs_be32 ip_src;
> +    /* IPv4 destination address */
> +    ovs_be32 ip_dst;
> +    /* RFC 5881 section 4
> +     * The source port MUST be in the range 49152 through 65535.
> +     * The same UDP source port number MUST be used for all BFD
> +     * Control packets associated with a particular session.
> +     * The source port number SHOULD be unique among all BFD
> +     * sessions on the system
> +     */
> +    uint16_t udp_src;
> +    ovs_be32 disc;
> +
> +    int64_t port_key;
> +    int64_t metadata;
> +
> +    long long int last_update;
> +    long long int next_tx;
> +};
> +
> +static void
> +bfd_monitor_init(void)
> +{
> +    hmap_init(&bfd_monitor_map);
> +}
> +
> +static void
> +bfd_monitor_destroy(void)
> +{
> +    hmap_destroy(&bfd_monitor_map);

I think you also need to remove the hmap nodes  and free the 'struct
bfd_entry' objects
before destroying the hmap.


> +}
> +
> +static struct bfd_entry *
> +pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port)
> +{
> +    struct bfd_entry *entry;
> +    HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0),
> +                             &bfd_monitor_map) {
> +        if (entry->udp_src == port) {
> +            return entry;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static bool
> +bfd_monitor_should_inject(void)
> +{
> +    long long int cur_time = time_msec();
> +    struct bfd_entry *entry;
> +
> +    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
> +        if (entry->next_tx < cur_time) {
> +            return true;
> +        }
> +    }
> +    return false;
> +}
> +
> +static void
> +bfd_monitor_wait(long long int timeout)
> +{
> +    if (!hmap_is_empty(&bfd_monitor_map)) {
> +        poll_timer_wait_until(timeout);
> +    }
> +}
> +
> +static void
> +bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet)
> +{
> +    struct udp_header *udp;
> +    struct bfd_msg *msg;
> +
> +    dp_packet_reserve(packet, 2);
> +    struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth);
> +    eth->eth_dst = eth_addr_broadcast;
> +    eth->eth_src = entry->src_mac;
> +    eth->eth_type = htons(ETH_TYPE_IP);
> +
> +    struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip);
> +    ip->ip_ihl_ver = IP_IHL_VER(5, 4);
> +    ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg);
> +    ip->ip_ttl = MAXTTL;
> +    ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
> +    ip->ip_proto = IPPROTO_UDP;
> +    put_16aligned_be32(&ip->ip_src, entry->ip_src);
> +    put_16aligned_be32(&ip->ip_dst, entry->ip_dst);
> +    /* Checksum has already been zeroed by put_zeros call. */
> +    ip->ip_csum = csum(ip, sizeof *ip);
> +
> +    udp = dp_packet_put_zeros(packet, sizeof *udp);
> +    udp->udp_src = htons(entry->udp_src);
> +    udp->udp_dst = htons(BFD_DEST_PORT);
> +    udp->udp_len = htons(sizeof *udp + sizeof *msg);
> +
> +    msg = dp_packet_put_uninit(packet, sizeof *msg);
> +    msg->vers_diag = (BFD_VERSION << 5);
> +    msg->length = BFD_PACKET_LEN;
> +}
> +
> +static void
> +bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time)
> +    OVS_REQUIRES(pinctrl_mutex)
> +{
> +    long long int cur_time = time_msec();
> +    struct bfd_entry *entry;
> +
> +    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
> +        if (cur_time < entry->next_tx) {
> +            goto next;
> +        }
> +
> +        uint64_t packet_stub[256 / 8];
> +        struct dp_packet packet;
> +        dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
> +        bfd_monitor_put_bfd_msg(entry, &packet);
> +
> +        uint64_t ofpacts_stub[4096 / 8];
> +        struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> +
> +        /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
> +        uint32_t dp_key = entry->metadata;
> +        uint32_t port_key = entry->port_key;
> +        put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
> +        put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
> +        put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts);
> +        struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
> +        resubmit->in_port = OFPP_CONTROLLER;
> +        resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
> +
> +        struct ofputil_packet_out po = {
> +            .packet = dp_packet_data(&packet),
> +            .packet_len = dp_packet_size(&packet),
> +            .buffer_id = UINT32_MAX,
> +            .ofpacts = ofpacts.data,
> +            .ofpacts_len = ofpacts.size,
> +        };
> +
> +        match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> +        enum ofp_version version = rconn_get_version(swconn);
> +        enum ofputil_protocol proto =
> +            ofputil_protocol_from_ofp_version(version);
> +        queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
> +        dp_packet_uninit(&packet);
> +        ofpbuf_uninit(&ofpacts);
> +
> +        entry->next_tx = cur_time + 5000;
> +next:
> +        if (*bfd_time > entry->next_tx) {
> +            *bfd_time = entry->next_tx;

I think there is  some problem here. I applied all the patches of this
series and deployed
fake multinode.

When I create a BFD entry like below, ovn-controller pinctrl thread on
ovn-gw-1 fake chassis, just wakes up
100% of the time.

---
ovn-nbctl create bfd logical_port=lr0-public dst_ip=172.16.0.50
---

In the ovn-controller logs I see

----
2020-12-18T13:29:03.181Z|77940|poll_loop(ovn_pinctrl0)|DBG|wakeup due
to 0-ms timeout at controller/pinctrl.c:6495
2020-12-18T13:29:03.181Z|77941|poll_loop(ovn_pinctrl0)|DBG|wakeup due
to 0-ms timeout at controller/pinctrl.c:6495
2020-12-18T13:29:03.181Z|77942|poll_loop(ovn_pinctrl0)|DBG|wakeup due
to 0-ms timeout at controller/pinctrl.c:6495
2020-12-18T13:29:03.181Z|77943|poll_loop(ovn_pinctrl0)|DBG|wakeup due
to 0-ms timeout at controller/pinctrl.c:6495
...
..
----

Thanks
Numan


> +        }
> +    }
> +}
> +
> +#define BFD_MONITOR_STALE_TIMEOUT  180000LL
> +static void
> +bfd_monitor_run(const struct sbrec_bfd_table *bfd_table,
> +                struct ovsdb_idl_index *sbrec_port_binding_by_name,
> +                const struct sbrec_chassis *chassis,
> +                const struct sset *active_tunnels)
> +    OVS_REQUIRES(pinctrl_mutex)
> +{
> +    long long int cur_time = time_msec();
> +    bool changed = false;
> +
> +    const struct sbrec_bfd *bt;
> +    SBREC_BFD_TABLE_FOR_EACH (bt, bfd_table) {
> +        const struct sbrec_port_binding *pb
> +            = lport_lookup_by_name(sbrec_port_binding_by_name,
> +                                   bt->logical_port);
> +        if (!pb) {
> +            continue;
> +        }
> +
> +        struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_port(
> +                bt->dst_ip, bt->src_port);
> +        if (!entry) {
> +            ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP);
> +            struct eth_addr ea = eth_addr_zero;
> +            int i;
> +
> +            if (!ip_parse(bt->dst_ip, &ip_dst)) {
> +                continue;
> +            }
> +
> +            const char *peer_s = smap_get(&pb->options, "peer");
> +            if (!peer_s) {
> +                continue;
> +            }
> +
> +            const struct sbrec_port_binding *peer
> +                = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s);
> +            if (!peer) {
> +                continue;
> +            }
> +
> +            char *redirect_name = xasprintf("cr-%s", pb->logical_port);
> +            bool resident = lport_is_chassis_resident(
> +                    sbrec_port_binding_by_name, chassis, active_tunnels,
> +                    redirect_name);
> +            free(redirect_name);
> +            if (!resident && strcmp(pb->type, "l3gateway") &&
> +                pb->chassis != chassis) {
> +                continue;
> +            }
> +
> +            for (i = 0; i < pb->n_mac; i++) {
> +                struct lport_addresses laddrs;
> +
> +                if (!extract_lsp_addresses(pb->mac[i], &laddrs)) {
> +                    continue;
> +                }
> +
> +                ea = laddrs.ea;
> +                if (laddrs.n_ipv4_addrs > 0) {
> +                    ip_src = laddrs.ipv4_addrs[0].addr;
> +                    destroy_lport_addresses(&laddrs);
> +                    break;
> +                }
> +                destroy_lport_addresses(&laddrs);
> +            }
> +
> +            if (eth_addr_is_zero(ea)) {
> +                continue;
> +            }
> +
> +            entry = xzalloc(sizeof *entry);
> +            entry->src_mac = ea;
> +            entry->ip_src = ip_src;
> +            entry->ip_dst = ip_dst;
> +            entry->udp_src = bt->src_port;
> +            entry->disc = htonl(bt->disc);
> +            entry->next_tx = cur_time;
> +            entry->metadata = pb->datapath->tunnel_key;
> +            entry->port_key = pb->tunnel_key;
> +
> +            uint32_t hash = hash_string(bt->dst_ip, 0);
> +            hmap_insert(&bfd_monitor_map, &entry->node, hash);
> +            changed = true;
> +        }
> +        entry->last_update = cur_time;
> +    }
> +
> +    struct bfd_entry *entry, *next_entry;
> +    HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) {
> +        if (entry->last_update + BFD_MONITOR_STALE_TIMEOUT < cur_time) {
> +            hmap_remove(&bfd_monitor_map, &entry->node);
> +            free(entry);
> +        }
> +    }
> +
> +    if (changed) {
> +        notify_pinctrl_handler();
> +    }
> +}
> +
>  static uint16_t
>  get_random_src_port(void)
>  {
> diff --git a/controller/pinctrl.h b/controller/pinctrl.h
> index 4b101ec92..8555d983d 100644
> --- a/controller/pinctrl.h
> +++ b/controller/pinctrl.h
> @@ -31,6 +31,7 @@ struct sbrec_chassis;
>  struct sbrec_dns_table;
>  struct sbrec_controller_event_table;
>  struct sbrec_service_monitor_table;
> +struct sbrec_bfd_table;
>
>  void pinctrl_init(void);
>  void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> @@ -44,6 +45,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                   const struct sbrec_dns_table *,
>                   const struct sbrec_controller_event_table *,
>                   const struct sbrec_service_monitor_table *,
> +                 const struct sbrec_bfd_table *,
>                   const struct ovsrec_bridge *, const struct sbrec_chassis *,
>                   const struct hmap *local_datapaths,
>                   const struct sset *active_tunnels);
> diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h
> index c84a0e7a9..d00982449 100644
> --- a/lib/ovn-l7.h
> +++ b/lib/ovn-l7.h
> @@ -26,6 +26,25 @@
>  #include "hash.h"
>  #include "ovn/logical-fields.h"
>
> +#define BFD_PACKET_LEN  24
> +#define BFD_DEST_PORT   3784
> +#define BFD_VERSION     1
> +#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */
> +#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */
> +
> +struct bfd_msg {
> +    uint8_t vers_diag;
> +    uint8_t flags;
> +    uint8_t mult;
> +    uint8_t length;
> +    ovs_be32 my_disc;
> +    ovs_be32 your_disc;
> +    ovs_be32 min_tx;
> +    ovs_be32 min_rx;
> +    ovs_be32 min_rx_echo;
> +};
> +BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct bfd_msg));
> +
>  /* Generic options map which is used to store dhcpv4 opts and dhcpv6 opts. */
>  struct gen_opts_map {
>      struct hmap_node hmap_node;
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index b377dffa1..e8c7629e7 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -11707,6 +11707,92 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
>      }
>  }
>
> +struct bfd_entry {
> +    struct hmap_node hmap_node;
> +
> +    const struct sbrec_bfd *sb_bt;
> +    bool found;
> +};
> +
> +static struct bfd_entry *
> +bfd_port_lookup(struct hmap *bfd_map, const char *logical_port,
> +                const char *dst_ip)
> +{
> +    struct bfd_entry *bfd_e;
> +    uint32_t hash;
> +
> +    hash = hash_string(dst_ip, 0);
> +    hash = hash_string(logical_port, hash);
> +    HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) {
> +        if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) &&
> +            !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) {
> +            return bfd_e;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static void
> +build_bfd_table(struct northd_context *ctx)
> +{
> +    struct hmap sb_only = HMAP_INITIALIZER(&sb_only);
> +    struct bfd_entry *bfd_e, *next_bfd_e;
> +    const struct sbrec_bfd *sb_bt;
> +    uint32_t hash;
> +
> +    SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) {
> +        bfd_e = xmalloc(sizeof *bfd_e);
> +        bfd_e->sb_bt = sb_bt;
> +        bfd_e->found = false;
> +        hash = hash_string(sb_bt->dst_ip, 0);
> +        hash = hash_string(sb_bt->logical_port, hash);
> +        hmap_insert(&sb_only, &bfd_e->hmap_node, hash);
> +    }
> +
> +    const struct nbrec_bfd *nb_bt;
> +    NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) {
> +        bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip);
> +        if (!bfd_e) {
> +            sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn);
> +            sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port);
> +            sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip);
> +            sbrec_bfd_set_disc(sb_bt, 1 + random_uint32());
> +            /* RFC 5881 section 4
> +             * The source port MUST be in the range 49152 through 65535.
> +             * The same UDP source port number MUST be used for all BFD
> +             * Control packets associated with a particular session.
> +             * The source port number SHOULD be unique among all BFD
> +             * sessions on the system
> +             */
> +            uint16_t udp_src = random_range(16383) + 49152;
> +            sbrec_bfd_set_src_port(sb_bt, udp_src);
> +            sbrec_bfd_set_status(sb_bt, "down");
> +            sbrec_bfd_set_min_tx(sb_bt, nb_bt->min_tx);
> +            sbrec_bfd_set_min_rx(sb_bt, nb_bt->min_rx);
> +            sbrec_bfd_set_detect_mult(sb_bt, nb_bt->detect_mult);
> +        } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) {
> +            if (!strcmp(nb_bt->status, "admin_down") ||
> +                !strcmp(bfd_e->sb_bt->status, "admin_down")) {
> +                sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status);
> +            } else {
> +                nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status);
> +            }
> +        }
> +        if (bfd_e) {
> +            bfd_e->found = true;
> +        }
> +    }
> +
> +    HMAP_FOR_EACH_SAFE (bfd_e, next_bfd_e, hmap_node, &sb_only) {
> +        if (!bfd_e->found && bfd_e->sb_bt) {
> +            sbrec_bfd_delete(bfd_e->sb_bt);
> +        }
> +        hmap_remove(&sb_only, &bfd_e->hmap_node);
> +        free(bfd_e);
> +    }
> +    hmap_destroy(&sb_only);
> +}
> +
>  static void
>  sync_address_set(struct northd_context *ctx, const char *name,
>                   const char **addrs, size_t n_addrs,
> @@ -12504,6 +12590,7 @@ ovnnb_db_run(struct northd_context *ctx,
>      build_ip_mcast(ctx, datapaths);
>      build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups);
>      build_meter_groups(ctx, &meter_groups);
> +    build_bfd_table(ctx);
>      build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups,
>                   &igmp_groups, &meter_groups, &lbs);
>      ovn_update_ipv6_prefix(ports);
> @@ -13463,6 +13550,10 @@ main(int argc, char *argv[])
>      add_column_noalert(ovnsb_idl_loop.idl,
>                         &sbrec_load_balancer_col_external_ids);
>
> +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port);
> +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip);
> +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status);
> +
>      struct ovsdb_idl_index *sbrec_chassis_by_name
>          = chassis_index_create(ovnsb_idl_loop.idl);
>
> diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> index b77a2308c..a880482a4 100644
> --- a/ovn-nb.ovsschema
> +++ b/ovn-nb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Northbound",
> -    "version": "5.30.0",
> -    "cksum": "3273824429 27172",
> +    "version": "5.31.0",
> +    "cksum": "3663844734 28178",
>      "tables": {
>          "NB_Global": {
>              "columns": {
> @@ -526,5 +526,25 @@
>                      "type": {"key": "string", "value": "string",
>                               "min": 0, "max": "unlimited"}}},
>              "indexes": [["name"]],
> +            "isRoot": true},
> +        "BFD": {
> +            "columns": {
> +                "logical_port": {"type": "string"},
> +                "dst_ip": {"type": "string"},
> +                "min_tx": {"type": {"key": {"type": "integer"}}},
> +                "min_rx": {"type": {"key": {"type": "integer"}}},
> +                "detect_mult": {"type": {"key": {"type": "integer"}}},
> +                "status": {
> +                    "type": {"key": {"type": "string",
> +                             "enum": ["set", ["down", "init", "up",
> +                                              "admin_down"]]},
> +                             "min": 0, "max": 1}},
> +                "external_ids": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}},
> +                "options": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}}},
> +            "indexes": [["logical_port", "dst_ip"]],
>              "isRoot": true}}
>      }
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index 0cf043790..4a28d3f0d 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -3728,4 +3728,70 @@
>        </column>
>      </group>
>    </table>
> +
> +  <table name="BFD">
> +    <p>
> +      Contains BFD parameter for ovn-controller bfd configuration
> +    </p>
> +
> +    <group title="Configuration">
> +      <column name="logical_port">
> +        OVN logical port when BFD engine is running.
> +      </column>
> +
> +      <column name="dst_ip">
> +        BFD peer IP address.
> +      </column>
> +
> +      <column name="min_tx">
> +        This is the minimum interval, in milliseconds, that the local
> +        system would like to use when transmitting BFD Control packets,
> +        less any jitter applied. The value zero is reserved.
> +      </column>
> +
> +      <column name="min_rx">
> +        This is the minimum interval, in milliseconds, between received
> +        BFD Control packets that this system is capable of supporting,
> +        less any jitter applied by the sender. If this value is zero,
> +        the transmitting system does not want the remote system to send
> +        any periodic BFD Control packets.
> +      </column>
> +
> +      <column name="detect_mult">
> +        Detection time multiplier.  The negotiated transmit interval,
> +        multiplied by this value, provides the Detection Time for the
> +        receiving system in Asynchronous mode.
> +      </column>
> +
> +      <column name="options">
> +        Reserved for future use.
> +      </column>
> +
> +      <column name="external_ids">
> +        See <em>External IDs</em> at the beginning of this document.
> +      </column>
> +    </group>
> +
> +    <group title="Status Reporting">
> +      <column name="status">
> +        <p>
> +          BFD port logical states. Possible values are:
> +          <ul>
> +            <li>
> +              <code>admin_down</code>
> +            </li>
> +            <li>
> +              <code>down</code>
> +            </li>
> +            <li>
> +              <code>init</code>
> +            </li>
> +            <li>
> +              <code>up</code>
> +            </li>
> +          </ul>
> +        </p>
> +      </column>
> +    </group>
> +  </table>
>  </database>
> diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> index 5228839b8..97db6de39 100644
> --- a/ovn-sb.ovsschema
> +++ b/ovn-sb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Southbound",
> -    "version": "20.12.0",
> -    "cksum": "3969471120 24441",
> +    "version": "20.13.0",
> +    "cksum": "3035725595 25676",
>      "tables": {
>          "SB_Global": {
>              "columns": {
> @@ -484,6 +484,29 @@
>                  "external_ids": {
>                      "type": {"key": "string", "value": "string",
>                               "min": 0, "max": "unlimited"}}},
> +            "isRoot": true},
> +        "BFD": {
> +            "columns": {
> +                "src_port": {"type": {"key": {"type": "integer",
> +                                          "minInteger": 49152,
> +                                          "maxInteger": 65535}}},
> +                "disc": {"type": {"key": {"type": "integer"}}},
> +                "logical_port": {"type": "string"},
> +                "dst_ip": {"type": "string"},
> +                "min_tx": {"type": {"key": {"type": "integer"}}},
> +                "min_rx": {"type": {"key": {"type": "integer"}}},
> +                "detect_mult": {"type": {"key": {"type": "integer"}}},
> +                "status": {
> +                    "type": {"key": {"type": "string",
> +                             "enum": ["set", ["down", "init", "up",
> +                                              "admin_down"]]}}},
> +                "external_ids": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}},
> +                "options": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}}},
> +            "indexes": [["logical_port", "dst_ip", "src_port", "disc"]],
>              "isRoot": true}
>      }
>  }
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index c13994848..ccfe2e415 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -4231,4 +4231,82 @@ tcp.flags = RST;
>        </column>
>      </group>
>    </table>
> +
> +  <table name="BFD">
> +    <p>
> +      Contains BFD parameter for ovn-controller bfd configuration
> +    </p>
> +
> +    <group title="Configuration">
> +      <column name="src_port">
> +        udp source port used in bfd control packets.
> +        The source port MUST be in the range 49152 through 65535
> +        (RFC5881 section 4).
> +      </column>
> +
> +      <column name="disc">
> +        A unique, nonzero discriminator value generated by the transmitting
> +        system, used to demultiplex multiple BFD sessions between the same pair
> +        of systems.
> +      </column>
> +
> +      <column name="logical_port">
> +        OVN logical port when BFD engine is running.
> +      </column>
> +
> +      <column name="dst_ip">
> +        BFD peer IP address.
> +      </column>
> +
> +      <column name="min_tx">
> +        This is the minimum interval, in milliseconds, that the local
> +        system would like to use when transmitting BFD Control packets,
> +        less any jitter applied. The value zero is reserved.
> +      </column>
> +
> +      <column name="min_rx">
> +        This is the minimum interval, in milliseconds, between received
> +        BFD Control packets that this system is capable of supporting,
> +        less any jitter applied by the sender. If this value is zero,
> +        the transmitting system does not want the remote system to send
> +        any periodic BFD Control packets.
> +      </column>
> +
> +      <column name="detect_mult">
> +        Detection time multiplier.  The negotiated transmit interval,
> +        multiplied by this value, provides the Detection Time for the
> +        receiving system in Asynchronous mode.
> +      </column>
> +
> +      <column name="options">
> +        Reserved for future use.
> +      </column>
> +
> +      <column name="external_ids">
> +        See <em>External IDs</em> at the beginning of this document.
> +      </column>
> +    </group>
> +
> +    <group title="Status Reporting">
> +      <column name="status">
> +        <p>
> +          BFD port logical states. Possible values are:
> +          <ul>
> +            <li>
> +              <code>admin_down</code>
> +            </li>
> +            <li>
> +              <code>down</code>
> +            </li>
> +            <li>
> +              <code>init</code>
> +            </li>
> +            <li>
> +              <code>up</code>
> +            </li>
> +          </ul>
> +        </p>
> +      </column>
> +    </group>
> +  </table>
>  </database>
> --
> 2.29.2
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Numan Siddique Dec. 18, 2020, 1:31 p.m. UTC | #2
On Fri, Dec 18, 2020 at 7:00 PM Numan Siddique <numans@ovn.org> wrote:
>
> On Wed, Dec 16, 2020 at 8:14 PM Lorenzo Bianconi
> <lorenzo.bianconi@redhat.com> wrote:
> >
> > Introduce the capability to transmit BFD packets in ovn-controller.
> > Introduce BFD tables in nb/sb dbs in order to configure BFD parameters
> > (e.g. min_tx, min_rx, ..) for ovn-controller.
> >
> > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
>
> Hi Lorenzo,
>
> Please see below for a few comments.
>
> Thanks
> Numan
>
> > ---
> >  controller/ovn-controller.c |   1 +
> >  controller/pinctrl.c        | 288 +++++++++++++++++++++++++++++++++++-
> >  controller/pinctrl.h        |   2 +
> >  lib/ovn-l7.h                |  19 +++
> >  northd/ovn-northd.c         |  91 ++++++++++++
> >  ovn-nb.ovsschema            |  24 ++-
> >  ovn-nb.xml                  |  66 +++++++++
> >  ovn-sb.ovsschema            |  27 +++-
> >  ovn-sb.xml                  |  78 ++++++++++
> >  9 files changed, 591 insertions(+), 5 deletions(-)
> >
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index b5f0c1315..84b3f85ad 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -2864,6 +2864,7 @@ main(int argc, char *argv[])
> >                                          ovnsb_idl_loop.idl),
> >                                      sbrec_service_monitor_table_get(
> >                                          ovnsb_idl_loop.idl),
> > +                                    sbrec_bfd_table_get(ovnsb_idl_loop.idl),
> >                                      br_int, chassis,
> >                                      &runtime_data->local_datapaths,
> >                                      &runtime_data->active_tunnels);
> > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > index 7e3abf0a4..ae994b513 100644
> > --- a/controller/pinctrl.c
> > +++ b/controller/pinctrl.c
> > @@ -323,6 +323,18 @@ put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
> >  static void notify_pinctrl_main(void);
> >  static void notify_pinctrl_handler(void);
> >
> > +static bool bfd_monitor_should_inject(void);
> > +static void bfd_monitor_wait(long long int timeout);
> > +static void bfd_monitor_init(void);
> > +static void bfd_monitor_destroy(void);
> > +static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time)
> > +                                 OVS_REQUIRES(pinctrl_mutex);
> > +static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table,
> > +                            struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > +                            const struct sbrec_chassis *chassis,
> > +                            const struct sset *active_tunnels)
> > +                            OVS_REQUIRES(pinctrl_mutex);
> > +
> >  COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
> >  COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map);
> >  COVERAGE_DEFINE(pinctrl_drop_controller_event);
> > @@ -487,6 +499,7 @@ pinctrl_init(void)
> >      ip_mcast_snoop_init();
> >      init_put_vport_bindings();
> >      init_svc_monitors();
> > +    bfd_monitor_init();
> >      pinctrl.br_int_name = NULL;
> >      pinctrl_handler_seq = seq_create();
> >      pinctrl_main_seq = seq_create();
> > @@ -3053,6 +3066,8 @@ pinctrl_handler(void *arg_)
> >      swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION);
> >
> >      while (!latch_is_set(&pctrl->pinctrl_thread_exit)) {
> > +        long long int bfd_time = LLONG_MAX;
> > +
> >          ovs_mutex_lock(&pinctrl_mutex);
> >          pinctrl_rconn_setup(swconn, pctrl->br_int_name);
> >          ip_mcast_snoop_run();
> > @@ -3085,6 +3100,7 @@ pinctrl_handler(void *arg_)
> >                  send_ipv6_ras(swconn, &send_ipv6_ra_time);
> >                  send_ipv6_prefixd(swconn, &send_prefixd_time);
> >                  send_mac_binding_buffered_pkts(swconn);
> > +                bfd_monitor_send_msg(swconn, &bfd_time);
> >                  ovs_mutex_unlock(&pinctrl_mutex);
> >
> >                  ip_mcast_querier_run(swconn, &send_mcast_query_time);
> > @@ -3102,6 +3118,7 @@ pinctrl_handler(void *arg_)
> >          ip_mcast_querier_wait(send_mcast_query_time);
> >          svc_monitors_wait(svc_monitors_next_run_time);
> >          ipv6_prefixd_wait(send_prefixd_time);
> > +        bfd_monitor_wait(bfd_time);
> >
> >          new_seq = seq_read(pinctrl_handler_seq);
> >          seq_wait(pinctrl_handler_seq, new_seq);
> > @@ -3149,6 +3166,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >              const struct sbrec_dns_table *dns_table,
> >              const struct sbrec_controller_event_table *ce_table,
> >              const struct sbrec_service_monitor_table *svc_mon_table,
> > +            const struct sbrec_bfd_table *bfd_table,
> >              const struct ovsrec_bridge *br_int,
> >              const struct sbrec_chassis *chassis,
> >              const struct hmap *local_datapaths,
> > @@ -3179,6 +3197,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >                           local_datapaths);
> >      sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name,
> >                        chassis);
> > +    bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis,
> > +                    active_tunnels);
> >      ovs_mutex_unlock(&pinctrl_mutex);
> >  }
> >
> > @@ -3722,6 +3742,7 @@ pinctrl_destroy(void)
> >      destroy_dns_cache();
> >      ip_mcast_snoop_destroy();
> >      destroy_svc_monitors();
> > +    bfd_monitor_destroy();
> >      seq_destroy(pinctrl_main_seq);
> >      seq_destroy(pinctrl_handler_seq);
> >  }
> > @@ -5525,7 +5546,8 @@ may_inject_pkts(void)
> >              !shash_is_empty(&send_garp_rarp_data) ||
> >              ipv6_prefixd_should_inject() ||
> >              !ovs_list_is_empty(&mcast_query_list) ||
> > -            !ovs_list_is_empty(&buffered_mac_bindings));
> > +            !ovs_list_is_empty(&buffered_mac_bindings) ||
> > +            bfd_monitor_should_inject());
> >  }
> >
> >  static void
> > @@ -6312,6 +6334,270 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >
> >  }
> >
> > +static struct hmap bfd_monitor_map;
> > +
> > +struct bfd_entry {
> > +    struct hmap_node node;
> > +
> > +    /* L2 source address */
> > +    struct eth_addr src_mac;
> > +    /* IPv4 source address */
> > +    ovs_be32 ip_src;
> > +    /* IPv4 destination address */
> > +    ovs_be32 ip_dst;
> > +    /* RFC 5881 section 4
> > +     * The source port MUST be in the range 49152 through 65535.
> > +     * The same UDP source port number MUST be used for all BFD
> > +     * Control packets associated with a particular session.
> > +     * The source port number SHOULD be unique among all BFD
> > +     * sessions on the system
> > +     */
> > +    uint16_t udp_src;
> > +    ovs_be32 disc;
> > +
> > +    int64_t port_key;
> > +    int64_t metadata;
> > +
> > +    long long int last_update;
> > +    long long int next_tx;
> > +};
> > +
> > +static void
> > +bfd_monitor_init(void)
> > +{
> > +    hmap_init(&bfd_monitor_map);
> > +}
> > +
> > +static void
> > +bfd_monitor_destroy(void)
> > +{
> > +    hmap_destroy(&bfd_monitor_map);
>
> I think you also need to remove the hmap nodes  and free the 'struct
> bfd_entry' objects
> before destroying the hmap.
>
>
> > +}
> > +
> > +static struct bfd_entry *
> > +pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port)
> > +{
> > +    struct bfd_entry *entry;
> > +    HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0),
> > +                             &bfd_monitor_map) {
> > +        if (entry->udp_src == port) {
> > +            return entry;
> > +        }
> > +    }
> > +    return NULL;
> > +}
> > +
> > +static bool
> > +bfd_monitor_should_inject(void)
> > +{
> > +    long long int cur_time = time_msec();
> > +    struct bfd_entry *entry;
> > +
> > +    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
> > +        if (entry->next_tx < cur_time) {
> > +            return true;
> > +        }
> > +    }
> > +    return false;
> > +}
> > +
> > +static void
> > +bfd_monitor_wait(long long int timeout)
> > +{
> > +    if (!hmap_is_empty(&bfd_monitor_map)) {
> > +        poll_timer_wait_until(timeout);
> > +    }
> > +}
> > +
> > +static void
> > +bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet)
> > +{
> > +    struct udp_header *udp;
> > +    struct bfd_msg *msg;
> > +
> > +    dp_packet_reserve(packet, 2);

I don't understand why reserving 2 bytes at the head is required ?

Can you please put some comments on why it is required ?

Thanks
Numan

> > +    struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth);
> > +    eth->eth_dst = eth_addr_broadcast;
> > +    eth->eth_src = entry->src_mac;
> > +    eth->eth_type = htons(ETH_TYPE_IP);
> > +
> > +    struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip);
> > +    ip->ip_ihl_ver = IP_IHL_VER(5, 4);
> > +    ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg);
> > +    ip->ip_ttl = MAXTTL;
> > +    ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
> > +    ip->ip_proto = IPPROTO_UDP;
> > +    put_16aligned_be32(&ip->ip_src, entry->ip_src);
> > +    put_16aligned_be32(&ip->ip_dst, entry->ip_dst);
> > +    /* Checksum has already been zeroed by put_zeros call. */
> > +    ip->ip_csum = csum(ip, sizeof *ip);
> > +
> > +    udp = dp_packet_put_zeros(packet, sizeof *udp);
> > +    udp->udp_src = htons(entry->udp_src);
> > +    udp->udp_dst = htons(BFD_DEST_PORT);
> > +    udp->udp_len = htons(sizeof *udp + sizeof *msg);
> > +
> > +    msg = dp_packet_put_uninit(packet, sizeof *msg);
> > +    msg->vers_diag = (BFD_VERSION << 5);
> > +    msg->length = BFD_PACKET_LEN;
> > +}
> > +
> > +static void
> > +bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time)
> > +    OVS_REQUIRES(pinctrl_mutex)
> > +{
> > +    long long int cur_time = time_msec();
> > +    struct bfd_entry *entry;
> > +
> > +    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
> > +        if (cur_time < entry->next_tx) {
> > +            goto next;
> > +        }
> > +
> > +        uint64_t packet_stub[256 / 8];
> > +        struct dp_packet packet;
> > +        dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
> > +        bfd_monitor_put_bfd_msg(entry, &packet);
> > +
> > +        uint64_t ofpacts_stub[4096 / 8];
> > +        struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> > +
> > +        /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
> > +        uint32_t dp_key = entry->metadata;
> > +        uint32_t port_key = entry->port_key;
> > +        put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
> > +        put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
> > +        put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts);
> > +        struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
> > +        resubmit->in_port = OFPP_CONTROLLER;
> > +        resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
> > +
> > +        struct ofputil_packet_out po = {
> > +            .packet = dp_packet_data(&packet),
> > +            .packet_len = dp_packet_size(&packet),
> > +            .buffer_id = UINT32_MAX,
> > +            .ofpacts = ofpacts.data,
> > +            .ofpacts_len = ofpacts.size,
> > +        };
> > +
> > +        match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> > +        enum ofp_version version = rconn_get_version(swconn);
> > +        enum ofputil_protocol proto =
> > +            ofputil_protocol_from_ofp_version(version);
> > +        queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
> > +        dp_packet_uninit(&packet);
> > +        ofpbuf_uninit(&ofpacts);
> > +
> > +        entry->next_tx = cur_time + 5000;
> > +next:
> > +        if (*bfd_time > entry->next_tx) {
> > +            *bfd_time = entry->next_tx;
>
> I think there is  some problem here. I applied all the patches of this
> series and deployed
> fake multinode.
>
> When I create a BFD entry like below, ovn-controller pinctrl thread on
> ovn-gw-1 fake chassis, just wakes up
> 100% of the time.
>
> ---
> ovn-nbctl create bfd logical_port=lr0-public dst_ip=172.16.0.50
> ---
>
> In the ovn-controller logs I see
>
> ----
> 2020-12-18T13:29:03.181Z|77940|poll_loop(ovn_pinctrl0)|DBG|wakeup due
> to 0-ms timeout at controller/pinctrl.c:6495
> 2020-12-18T13:29:03.181Z|77941|poll_loop(ovn_pinctrl0)|DBG|wakeup due
> to 0-ms timeout at controller/pinctrl.c:6495
> 2020-12-18T13:29:03.181Z|77942|poll_loop(ovn_pinctrl0)|DBG|wakeup due
> to 0-ms timeout at controller/pinctrl.c:6495
> 2020-12-18T13:29:03.181Z|77943|poll_loop(ovn_pinctrl0)|DBG|wakeup due
> to 0-ms timeout at controller/pinctrl.c:6495
> ...
> ..
> ----
>
> Thanks
> Numan
>
>
> > +        }
> > +    }
> > +}
> > +
> > +#define BFD_MONITOR_STALE_TIMEOUT  180000LL
> > +static void
> > +bfd_monitor_run(const struct sbrec_bfd_table *bfd_table,
> > +                struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > +                const struct sbrec_chassis *chassis,
> > +                const struct sset *active_tunnels)
> > +    OVS_REQUIRES(pinctrl_mutex)
> > +{
> > +    long long int cur_time = time_msec();
> > +    bool changed = false;
> > +
> > +    const struct sbrec_bfd *bt;
> > +    SBREC_BFD_TABLE_FOR_EACH (bt, bfd_table) {
> > +        const struct sbrec_port_binding *pb
> > +            = lport_lookup_by_name(sbrec_port_binding_by_name,
> > +                                   bt->logical_port);
> > +        if (!pb) {
> > +            continue;
> > +        }
> > +
> > +        struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_port(
> > +                bt->dst_ip, bt->src_port);
> > +        if (!entry) {
> > +            ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP);
> > +            struct eth_addr ea = eth_addr_zero;
> > +            int i;
> > +
> > +            if (!ip_parse(bt->dst_ip, &ip_dst)) {
> > +                continue;
> > +            }
> > +
> > +            const char *peer_s = smap_get(&pb->options, "peer");
> > +            if (!peer_s) {
> > +                continue;
> > +            }
> > +
> > +            const struct sbrec_port_binding *peer
> > +                = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s);
> > +            if (!peer) {
> > +                continue;
> > +            }
> > +
> > +            char *redirect_name = xasprintf("cr-%s", pb->logical_port);
> > +            bool resident = lport_is_chassis_resident(
> > +                    sbrec_port_binding_by_name, chassis, active_tunnels,
> > +                    redirect_name);
> > +            free(redirect_name);
> > +            if (!resident && strcmp(pb->type, "l3gateway") &&
> > +                pb->chassis != chassis) {
> > +                continue;
> > +            }
> > +
> > +            for (i = 0; i < pb->n_mac; i++) {
> > +                struct lport_addresses laddrs;
> > +
> > +                if (!extract_lsp_addresses(pb->mac[i], &laddrs)) {
> > +                    continue;
> > +                }
> > +
> > +                ea = laddrs.ea;
> > +                if (laddrs.n_ipv4_addrs > 0) {
> > +                    ip_src = laddrs.ipv4_addrs[0].addr;
> > +                    destroy_lport_addresses(&laddrs);
> > +                    break;
> > +                }
> > +                destroy_lport_addresses(&laddrs);
> > +            }
> > +
> > +            if (eth_addr_is_zero(ea)) {
> > +                continue;
> > +            }
> > +
> > +            entry = xzalloc(sizeof *entry);
> > +            entry->src_mac = ea;
> > +            entry->ip_src = ip_src;
> > +            entry->ip_dst = ip_dst;
> > +            entry->udp_src = bt->src_port;
> > +            entry->disc = htonl(bt->disc);
> > +            entry->next_tx = cur_time;
> > +            entry->metadata = pb->datapath->tunnel_key;
> > +            entry->port_key = pb->tunnel_key;
> > +
> > +            uint32_t hash = hash_string(bt->dst_ip, 0);
> > +            hmap_insert(&bfd_monitor_map, &entry->node, hash);
> > +            changed = true;
> > +        }
> > +        entry->last_update = cur_time;
> > +    }
> > +
> > +    struct bfd_entry *entry, *next_entry;
> > +    HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) {
> > +        if (entry->last_update + BFD_MONITOR_STALE_TIMEOUT < cur_time) {
> > +            hmap_remove(&bfd_monitor_map, &entry->node);
> > +            free(entry);
> > +        }
> > +    }
> > +
> > +    if (changed) {
> > +        notify_pinctrl_handler();
> > +    }
> > +}
> > +
> >  static uint16_t
> >  get_random_src_port(void)
> >  {
> > diff --git a/controller/pinctrl.h b/controller/pinctrl.h
> > index 4b101ec92..8555d983d 100644
> > --- a/controller/pinctrl.h
> > +++ b/controller/pinctrl.h
> > @@ -31,6 +31,7 @@ struct sbrec_chassis;
> >  struct sbrec_dns_table;
> >  struct sbrec_controller_event_table;
> >  struct sbrec_service_monitor_table;
> > +struct sbrec_bfd_table;
> >
> >  void pinctrl_init(void);
> >  void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > @@ -44,6 +45,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >                   const struct sbrec_dns_table *,
> >                   const struct sbrec_controller_event_table *,
> >                   const struct sbrec_service_monitor_table *,
> > +                 const struct sbrec_bfd_table *,
> >                   const struct ovsrec_bridge *, const struct sbrec_chassis *,
> >                   const struct hmap *local_datapaths,
> >                   const struct sset *active_tunnels);
> > diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h
> > index c84a0e7a9..d00982449 100644
> > --- a/lib/ovn-l7.h
> > +++ b/lib/ovn-l7.h
> > @@ -26,6 +26,25 @@
> >  #include "hash.h"
> >  #include "ovn/logical-fields.h"
> >
> > +#define BFD_PACKET_LEN  24
> > +#define BFD_DEST_PORT   3784
> > +#define BFD_VERSION     1
> > +#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */
> > +#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */
> > +
> > +struct bfd_msg {
> > +    uint8_t vers_diag;
> > +    uint8_t flags;
> > +    uint8_t mult;
> > +    uint8_t length;
> > +    ovs_be32 my_disc;
> > +    ovs_be32 your_disc;
> > +    ovs_be32 min_tx;
> > +    ovs_be32 min_rx;
> > +    ovs_be32 min_rx_echo;
> > +};
> > +BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct bfd_msg));
> > +
> >  /* Generic options map which is used to store dhcpv4 opts and dhcpv6 opts. */
> >  struct gen_opts_map {
> >      struct hmap_node hmap_node;
> > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> > index b377dffa1..e8c7629e7 100644
> > --- a/northd/ovn-northd.c
> > +++ b/northd/ovn-northd.c
> > @@ -11707,6 +11707,92 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
> >      }
> >  }
> >
> > +struct bfd_entry {
> > +    struct hmap_node hmap_node;
> > +
> > +    const struct sbrec_bfd *sb_bt;
> > +    bool found;
> > +};
> > +
> > +static struct bfd_entry *
> > +bfd_port_lookup(struct hmap *bfd_map, const char *logical_port,
> > +                const char *dst_ip)
> > +{
> > +    struct bfd_entry *bfd_e;
> > +    uint32_t hash;
> > +
> > +    hash = hash_string(dst_ip, 0);
> > +    hash = hash_string(logical_port, hash);
> > +    HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) {
> > +        if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) &&
> > +            !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) {
> > +            return bfd_e;
> > +        }
> > +    }
> > +    return NULL;
> > +}
> > +
> > +static void
> > +build_bfd_table(struct northd_context *ctx)
> > +{
> > +    struct hmap sb_only = HMAP_INITIALIZER(&sb_only);
> > +    struct bfd_entry *bfd_e, *next_bfd_e;
> > +    const struct sbrec_bfd *sb_bt;
> > +    uint32_t hash;
> > +
> > +    SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) {
> > +        bfd_e = xmalloc(sizeof *bfd_e);
> > +        bfd_e->sb_bt = sb_bt;
> > +        bfd_e->found = false;
> > +        hash = hash_string(sb_bt->dst_ip, 0);
> > +        hash = hash_string(sb_bt->logical_port, hash);
> > +        hmap_insert(&sb_only, &bfd_e->hmap_node, hash);
> > +    }
> > +
> > +    const struct nbrec_bfd *nb_bt;
> > +    NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) {
> > +        bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip);
> > +        if (!bfd_e) {
> > +            sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn);
> > +            sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port);
> > +            sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip);
> > +            sbrec_bfd_set_disc(sb_bt, 1 + random_uint32());
> > +            /* RFC 5881 section 4
> > +             * The source port MUST be in the range 49152 through 65535.
> > +             * The same UDP source port number MUST be used for all BFD
> > +             * Control packets associated with a particular session.
> > +             * The source port number SHOULD be unique among all BFD
> > +             * sessions on the system
> > +             */
> > +            uint16_t udp_src = random_range(16383) + 49152;
> > +            sbrec_bfd_set_src_port(sb_bt, udp_src);
> > +            sbrec_bfd_set_status(sb_bt, "down");
> > +            sbrec_bfd_set_min_tx(sb_bt, nb_bt->min_tx);
> > +            sbrec_bfd_set_min_rx(sb_bt, nb_bt->min_rx);
> > +            sbrec_bfd_set_detect_mult(sb_bt, nb_bt->detect_mult);
> > +        } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) {
> > +            if (!strcmp(nb_bt->status, "admin_down") ||
> > +                !strcmp(bfd_e->sb_bt->status, "admin_down")) {
> > +                sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status);
> > +            } else {
> > +                nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status);
> > +            }
> > +        }
> > +        if (bfd_e) {
> > +            bfd_e->found = true;
> > +        }
> > +    }
> > +
> > +    HMAP_FOR_EACH_SAFE (bfd_e, next_bfd_e, hmap_node, &sb_only) {
> > +        if (!bfd_e->found && bfd_e->sb_bt) {
> > +            sbrec_bfd_delete(bfd_e->sb_bt);
> > +        }
> > +        hmap_remove(&sb_only, &bfd_e->hmap_node);
> > +        free(bfd_e);
> > +    }
> > +    hmap_destroy(&sb_only);
> > +}
> > +
> >  static void
> >  sync_address_set(struct northd_context *ctx, const char *name,
> >                   const char **addrs, size_t n_addrs,
> > @@ -12504,6 +12590,7 @@ ovnnb_db_run(struct northd_context *ctx,
> >      build_ip_mcast(ctx, datapaths);
> >      build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups);
> >      build_meter_groups(ctx, &meter_groups);
> > +    build_bfd_table(ctx);
> >      build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups,
> >                   &igmp_groups, &meter_groups, &lbs);
> >      ovn_update_ipv6_prefix(ports);
> > @@ -13463,6 +13550,10 @@ main(int argc, char *argv[])
> >      add_column_noalert(ovnsb_idl_loop.idl,
> >                         &sbrec_load_balancer_col_external_ids);
> >
> > +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port);
> > +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip);
> > +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status);
> > +
> >      struct ovsdb_idl_index *sbrec_chassis_by_name
> >          = chassis_index_create(ovnsb_idl_loop.idl);
> >
> > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> > index b77a2308c..a880482a4 100644
> > --- a/ovn-nb.ovsschema
> > +++ b/ovn-nb.ovsschema
> > @@ -1,7 +1,7 @@
> >  {
> >      "name": "OVN_Northbound",
> > -    "version": "5.30.0",
> > -    "cksum": "3273824429 27172",
> > +    "version": "5.31.0",
> > +    "cksum": "3663844734 28178",
> >      "tables": {
> >          "NB_Global": {
> >              "columns": {
> > @@ -526,5 +526,25 @@
> >                      "type": {"key": "string", "value": "string",
> >                               "min": 0, "max": "unlimited"}}},
> >              "indexes": [["name"]],
> > +            "isRoot": true},
> > +        "BFD": {
> > +            "columns": {
> > +                "logical_port": {"type": "string"},
> > +                "dst_ip": {"type": "string"},
> > +                "min_tx": {"type": {"key": {"type": "integer"}}},
> > +                "min_rx": {"type": {"key": {"type": "integer"}}},
> > +                "detect_mult": {"type": {"key": {"type": "integer"}}},
> > +                "status": {
> > +                    "type": {"key": {"type": "string",
> > +                             "enum": ["set", ["down", "init", "up",
> > +                                              "admin_down"]]},
> > +                             "min": 0, "max": 1}},
> > +                "external_ids": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}},
> > +                "options": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}}},
> > +            "indexes": [["logical_port", "dst_ip"]],
> >              "isRoot": true}}
> >      }
> > diff --git a/ovn-nb.xml b/ovn-nb.xml
> > index 0cf043790..4a28d3f0d 100644
> > --- a/ovn-nb.xml
> > +++ b/ovn-nb.xml
> > @@ -3728,4 +3728,70 @@
> >        </column>
> >      </group>
> >    </table>
> > +
> > +  <table name="BFD">
> > +    <p>
> > +      Contains BFD parameter for ovn-controller bfd configuration
> > +    </p>
> > +
> > +    <group title="Configuration">
> > +      <column name="logical_port">
> > +        OVN logical port when BFD engine is running.
> > +      </column>
> > +
> > +      <column name="dst_ip">
> > +        BFD peer IP address.
> > +      </column>
> > +
> > +      <column name="min_tx">
> > +        This is the minimum interval, in milliseconds, that the local
> > +        system would like to use when transmitting BFD Control packets,
> > +        less any jitter applied. The value zero is reserved.
> > +      </column>
> > +
> > +      <column name="min_rx">
> > +        This is the minimum interval, in milliseconds, between received
> > +        BFD Control packets that this system is capable of supporting,
> > +        less any jitter applied by the sender. If this value is zero,
> > +        the transmitting system does not want the remote system to send
> > +        any periodic BFD Control packets.
> > +      </column>
> > +
> > +      <column name="detect_mult">
> > +        Detection time multiplier.  The negotiated transmit interval,
> > +        multiplied by this value, provides the Detection Time for the
> > +        receiving system in Asynchronous mode.
> > +      </column>
> > +
> > +      <column name="options">
> > +        Reserved for future use.
> > +      </column>
> > +
> > +      <column name="external_ids">
> > +        See <em>External IDs</em> at the beginning of this document.
> > +      </column>
> > +    </group>
> > +
> > +    <group title="Status Reporting">
> > +      <column name="status">
> > +        <p>
> > +          BFD port logical states. Possible values are:
> > +          <ul>
> > +            <li>
> > +              <code>admin_down</code>
> > +            </li>
> > +            <li>
> > +              <code>down</code>
> > +            </li>
> > +            <li>
> > +              <code>init</code>
> > +            </li>
> > +            <li>
> > +              <code>up</code>
> > +            </li>
> > +          </ul>
> > +        </p>
> > +      </column>
> > +    </group>
> > +  </table>
> >  </database>
> > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> > index 5228839b8..97db6de39 100644
> > --- a/ovn-sb.ovsschema
> > +++ b/ovn-sb.ovsschema
> > @@ -1,7 +1,7 @@
> >  {
> >      "name": "OVN_Southbound",
> > -    "version": "20.12.0",
> > -    "cksum": "3969471120 24441",
> > +    "version": "20.13.0",
> > +    "cksum": "3035725595 25676",
> >      "tables": {
> >          "SB_Global": {
> >              "columns": {
> > @@ -484,6 +484,29 @@
> >                  "external_ids": {
> >                      "type": {"key": "string", "value": "string",
> >                               "min": 0, "max": "unlimited"}}},
> > +            "isRoot": true},
> > +        "BFD": {
> > +            "columns": {
> > +                "src_port": {"type": {"key": {"type": "integer",
> > +                                          "minInteger": 49152,
> > +                                          "maxInteger": 65535}}},
> > +                "disc": {"type": {"key": {"type": "integer"}}},
> > +                "logical_port": {"type": "string"},
> > +                "dst_ip": {"type": "string"},
> > +                "min_tx": {"type": {"key": {"type": "integer"}}},
> > +                "min_rx": {"type": {"key": {"type": "integer"}}},
> > +                "detect_mult": {"type": {"key": {"type": "integer"}}},
> > +                "status": {
> > +                    "type": {"key": {"type": "string",
> > +                             "enum": ["set", ["down", "init", "up",
> > +                                              "admin_down"]]}}},
> > +                "external_ids": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}},
> > +                "options": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}}},
> > +            "indexes": [["logical_port", "dst_ip", "src_port", "disc"]],
> >              "isRoot": true}
> >      }
> >  }
> > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > index c13994848..ccfe2e415 100644
> > --- a/ovn-sb.xml
> > +++ b/ovn-sb.xml
> > @@ -4231,4 +4231,82 @@ tcp.flags = RST;
> >        </column>
> >      </group>
> >    </table>
> > +
> > +  <table name="BFD">
> > +    <p>
> > +      Contains BFD parameter for ovn-controller bfd configuration
> > +    </p>
> > +
> > +    <group title="Configuration">
> > +      <column name="src_port">
> > +        udp source port used in bfd control packets.
> > +        The source port MUST be in the range 49152 through 65535
> > +        (RFC5881 section 4).
> > +      </column>
> > +
> > +      <column name="disc">
> > +        A unique, nonzero discriminator value generated by the transmitting
> > +        system, used to demultiplex multiple BFD sessions between the same pair
> > +        of systems.
> > +      </column>
> > +
> > +      <column name="logical_port">
> > +        OVN logical port when BFD engine is running.
> > +      </column>
> > +
> > +      <column name="dst_ip">
> > +        BFD peer IP address.
> > +      </column>
> > +
> > +      <column name="min_tx">
> > +        This is the minimum interval, in milliseconds, that the local
> > +        system would like to use when transmitting BFD Control packets,
> > +        less any jitter applied. The value zero is reserved.
> > +      </column>
> > +
> > +      <column name="min_rx">
> > +        This is the minimum interval, in milliseconds, between received
> > +        BFD Control packets that this system is capable of supporting,
> > +        less any jitter applied by the sender. If this value is zero,
> > +        the transmitting system does not want the remote system to send
> > +        any periodic BFD Control packets.
> > +      </column>
> > +
> > +      <column name="detect_mult">
> > +        Detection time multiplier.  The negotiated transmit interval,
> > +        multiplied by this value, provides the Detection Time for the
> > +        receiving system in Asynchronous mode.
> > +      </column>
> > +
> > +      <column name="options">
> > +        Reserved for future use.
> > +      </column>
> > +
> > +      <column name="external_ids">
> > +        See <em>External IDs</em> at the beginning of this document.
> > +      </column>
> > +    </group>
> > +
> > +    <group title="Status Reporting">
> > +      <column name="status">
> > +        <p>
> > +          BFD port logical states. Possible values are:
> > +          <ul>
> > +            <li>
> > +              <code>admin_down</code>
> > +            </li>
> > +            <li>
> > +              <code>down</code>
> > +            </li>
> > +            <li>
> > +              <code>init</code>
> > +            </li>
> > +            <li>
> > +              <code>up</code>
> > +            </li>
> > +          </ul>
> > +        </p>
> > +      </column>
> > +    </group>
> > +  </table>
> >  </database>
> > --
> > 2.29.2
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
Lorenzo Bianconi Dec. 18, 2020, 6:57 p.m. UTC | #3
On Dec 18, Numan Siddique wrote:
> On Fri, Dec 18, 2020 at 7:00 PM Numan Siddique <numans@ovn.org> wrote:
> >
> > On Wed, Dec 16, 2020 at 8:14 PM Lorenzo Bianconi
> > <lorenzo.bianconi@redhat.com> wrote:
> > >
> > > Introduce the capability to transmit BFD packets in ovn-controller.
> > > Introduce BFD tables in nb/sb dbs in order to configure BFD parameters
> > > (e.g. min_tx, min_rx, ..) for ovn-controller.
> > >
> > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> >
> > Hi Lorenzo,
> >
> > Please see below for a few comments.
> >
> > Thanks
> > Numan
> >
> > > ---
> > >  controller/ovn-controller.c |   1 +
> > >  controller/pinctrl.c        | 288 +++++++++++++++++++++++++++++++++++-
> > >  controller/pinctrl.h        |   2 +
> > >  lib/ovn-l7.h                |  19 +++
> > >  northd/ovn-northd.c         |  91 ++++++++++++
> > >  ovn-nb.ovsschema            |  24 ++-
> > >  ovn-nb.xml                  |  66 +++++++++
> > >  ovn-sb.ovsschema            |  27 +++-
> > >  ovn-sb.xml                  |  78 ++++++++++
> > >  9 files changed, 591 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > > index b5f0c1315..84b3f85ad 100644
> > > --- a/controller/ovn-controller.c
> > > +++ b/controller/ovn-controller.c
> > > @@ -2864,6 +2864,7 @@ main(int argc, char *argv[])
> > >                                          ovnsb_idl_loop.idl),
> > >                                      sbrec_service_monitor_table_get(
> > >                                          ovnsb_idl_loop.idl),
> > > +                                    sbrec_bfd_table_get(ovnsb_idl_loop.idl),
> > >                                      br_int, chassis,
> > >                                      &runtime_data->local_datapaths,
> > >                                      &runtime_data->active_tunnels);
> > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > > index 7e3abf0a4..ae994b513 100644
> > > --- a/controller/pinctrl.c
> > > +++ b/controller/pinctrl.c
> > > @@ -323,6 +323,18 @@ put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
> > >  static void notify_pinctrl_main(void);
> > >  static void notify_pinctrl_handler(void);
> > >
> > > +static bool bfd_monitor_should_inject(void);
> > > +static void bfd_monitor_wait(long long int timeout);
> > > +static void bfd_monitor_init(void);
> > > +static void bfd_monitor_destroy(void);
> > > +static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time)
> > > +                                 OVS_REQUIRES(pinctrl_mutex);
> > > +static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table,
> > > +                            struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > > +                            const struct sbrec_chassis *chassis,
> > > +                            const struct sset *active_tunnels)
> > > +                            OVS_REQUIRES(pinctrl_mutex);
> > > +
> > >  COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
> > >  COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map);
> > >  COVERAGE_DEFINE(pinctrl_drop_controller_event);
> > > @@ -487,6 +499,7 @@ pinctrl_init(void)
> > >      ip_mcast_snoop_init();
> > >      init_put_vport_bindings();
> > >      init_svc_monitors();
> > > +    bfd_monitor_init();
> > >      pinctrl.br_int_name = NULL;
> > >      pinctrl_handler_seq = seq_create();
> > >      pinctrl_main_seq = seq_create();
> > > @@ -3053,6 +3066,8 @@ pinctrl_handler(void *arg_)
> > >      swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION);
> > >
> > >      while (!latch_is_set(&pctrl->pinctrl_thread_exit)) {
> > > +        long long int bfd_time = LLONG_MAX;
> > > +
> > >          ovs_mutex_lock(&pinctrl_mutex);
> > >          pinctrl_rconn_setup(swconn, pctrl->br_int_name);
> > >          ip_mcast_snoop_run();
> > > @@ -3085,6 +3100,7 @@ pinctrl_handler(void *arg_)
> > >                  send_ipv6_ras(swconn, &send_ipv6_ra_time);
> > >                  send_ipv6_prefixd(swconn, &send_prefixd_time);
> > >                  send_mac_binding_buffered_pkts(swconn);
> > > +                bfd_monitor_send_msg(swconn, &bfd_time);
> > >                  ovs_mutex_unlock(&pinctrl_mutex);
> > >
> > >                  ip_mcast_querier_run(swconn, &send_mcast_query_time);
> > > @@ -3102,6 +3118,7 @@ pinctrl_handler(void *arg_)
> > >          ip_mcast_querier_wait(send_mcast_query_time);
> > >          svc_monitors_wait(svc_monitors_next_run_time);
> > >          ipv6_prefixd_wait(send_prefixd_time);
> > > +        bfd_monitor_wait(bfd_time);
> > >
> > >          new_seq = seq_read(pinctrl_handler_seq);
> > >          seq_wait(pinctrl_handler_seq, new_seq);
> > > @@ -3149,6 +3166,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > >              const struct sbrec_dns_table *dns_table,
> > >              const struct sbrec_controller_event_table *ce_table,
> > >              const struct sbrec_service_monitor_table *svc_mon_table,
> > > +            const struct sbrec_bfd_table *bfd_table,
> > >              const struct ovsrec_bridge *br_int,
> > >              const struct sbrec_chassis *chassis,
> > >              const struct hmap *local_datapaths,
> > > @@ -3179,6 +3197,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > >                           local_datapaths);
> > >      sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name,
> > >                        chassis);
> > > +    bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis,
> > > +                    active_tunnels);
> > >      ovs_mutex_unlock(&pinctrl_mutex);
> > >  }
> > >
> > > @@ -3722,6 +3742,7 @@ pinctrl_destroy(void)
> > >      destroy_dns_cache();
> > >      ip_mcast_snoop_destroy();
> > >      destroy_svc_monitors();
> > > +    bfd_monitor_destroy();
> > >      seq_destroy(pinctrl_main_seq);
> > >      seq_destroy(pinctrl_handler_seq);
> > >  }
> > > @@ -5525,7 +5546,8 @@ may_inject_pkts(void)
> > >              !shash_is_empty(&send_garp_rarp_data) ||
> > >              ipv6_prefixd_should_inject() ||
> > >              !ovs_list_is_empty(&mcast_query_list) ||
> > > -            !ovs_list_is_empty(&buffered_mac_bindings));
> > > +            !ovs_list_is_empty(&buffered_mac_bindings) ||
> > > +            bfd_monitor_should_inject());
> > >  }
> > >
> > >  static void
> > > @@ -6312,6 +6334,270 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > >
> > >  }
> > >
> > > +static struct hmap bfd_monitor_map;
> > > +
> > > +struct bfd_entry {
> > > +    struct hmap_node node;
> > > +
> > > +    /* L2 source address */
> > > +    struct eth_addr src_mac;
> > > +    /* IPv4 source address */
> > > +    ovs_be32 ip_src;
> > > +    /* IPv4 destination address */
> > > +    ovs_be32 ip_dst;
> > > +    /* RFC 5881 section 4
> > > +     * The source port MUST be in the range 49152 through 65535.
> > > +     * The same UDP source port number MUST be used for all BFD
> > > +     * Control packets associated with a particular session.
> > > +     * The source port number SHOULD be unique among all BFD
> > > +     * sessions on the system
> > > +     */
> > > +    uint16_t udp_src;
> > > +    ovs_be32 disc;
> > > +
> > > +    int64_t port_key;
> > > +    int64_t metadata;
> > > +
> > > +    long long int last_update;
> > > +    long long int next_tx;
> > > +};
> > > +
> > > +static void
> > > +bfd_monitor_init(void)
> > > +{
> > > +    hmap_init(&bfd_monitor_map);
> > > +}
> > > +
> > > +static void
> > > +bfd_monitor_destroy(void)
> > > +{
> > > +    hmap_destroy(&bfd_monitor_map);
> >
> > I think you also need to remove the hmap nodes  and free the 'struct
> > bfd_entry' objects
> > before destroying the hmap.
> >
> >
> > > +}
> > > +
> > > +static struct bfd_entry *
> > > +pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port)
> > > +{
> > > +    struct bfd_entry *entry;
> > > +    HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0),
> > > +                             &bfd_monitor_map) {
> > > +        if (entry->udp_src == port) {
> > > +            return entry;
> > > +        }
> > > +    }
> > > +    return NULL;
> > > +}
> > > +
> > > +static bool
> > > +bfd_monitor_should_inject(void)
> > > +{
> > > +    long long int cur_time = time_msec();
> > > +    struct bfd_entry *entry;
> > > +
> > > +    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
> > > +        if (entry->next_tx < cur_time) {
> > > +            return true;
> > > +        }
> > > +    }
> > > +    return false;
> > > +}
> > > +
> > > +static void
> > > +bfd_monitor_wait(long long int timeout)
> > > +{
> > > +    if (!hmap_is_empty(&bfd_monitor_map)) {
> > > +        poll_timer_wait_until(timeout);
> > > +    }
> > > +}
> > > +
> > > +static void
> > > +bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet)
> > > +{
> > > +    struct udp_header *udp;
> > > +    struct bfd_msg *msg;
> > > +
> > > +    dp_packet_reserve(packet, 2);
> 
> I don't understand why reserving 2 bytes at the head is required ?
> 
> Can you please put some comments on why it is required ?

ack, will do..it is to align the IP header to 4byte-boundary.

Regards,
Lorenzo

> 
> Thanks
> Numan
> 
> > > +    struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth);
> > > +    eth->eth_dst = eth_addr_broadcast;
> > > +    eth->eth_src = entry->src_mac;
> > > +    eth->eth_type = htons(ETH_TYPE_IP);
> > > +
> > > +    struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip);
> > > +    ip->ip_ihl_ver = IP_IHL_VER(5, 4);
> > > +    ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg);
> > > +    ip->ip_ttl = MAXTTL;
> > > +    ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
> > > +    ip->ip_proto = IPPROTO_UDP;
> > > +    put_16aligned_be32(&ip->ip_src, entry->ip_src);
> > > +    put_16aligned_be32(&ip->ip_dst, entry->ip_dst);
> > > +    /* Checksum has already been zeroed by put_zeros call. */
> > > +    ip->ip_csum = csum(ip, sizeof *ip);
> > > +
> > > +    udp = dp_packet_put_zeros(packet, sizeof *udp);
> > > +    udp->udp_src = htons(entry->udp_src);
> > > +    udp->udp_dst = htons(BFD_DEST_PORT);
> > > +    udp->udp_len = htons(sizeof *udp + sizeof *msg);
> > > +
> > > +    msg = dp_packet_put_uninit(packet, sizeof *msg);
> > > +    msg->vers_diag = (BFD_VERSION << 5);
> > > +    msg->length = BFD_PACKET_LEN;
> > > +}
> > > +
> > > +static void
> > > +bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time)
> > > +    OVS_REQUIRES(pinctrl_mutex)
> > > +{
> > > +    long long int cur_time = time_msec();
> > > +    struct bfd_entry *entry;
> > > +
> > > +    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
> > > +        if (cur_time < entry->next_tx) {
> > > +            goto next;
> > > +        }
> > > +
> > > +        uint64_t packet_stub[256 / 8];
> > > +        struct dp_packet packet;
> > > +        dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
> > > +        bfd_monitor_put_bfd_msg(entry, &packet);
> > > +
> > > +        uint64_t ofpacts_stub[4096 / 8];
> > > +        struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> > > +
> > > +        /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
> > > +        uint32_t dp_key = entry->metadata;
> > > +        uint32_t port_key = entry->port_key;
> > > +        put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
> > > +        put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
> > > +        put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts);
> > > +        struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
> > > +        resubmit->in_port = OFPP_CONTROLLER;
> > > +        resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
> > > +
> > > +        struct ofputil_packet_out po = {
> > > +            .packet = dp_packet_data(&packet),
> > > +            .packet_len = dp_packet_size(&packet),
> > > +            .buffer_id = UINT32_MAX,
> > > +            .ofpacts = ofpacts.data,
> > > +            .ofpacts_len = ofpacts.size,
> > > +        };
> > > +
> > > +        match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> > > +        enum ofp_version version = rconn_get_version(swconn);
> > > +        enum ofputil_protocol proto =
> > > +            ofputil_protocol_from_ofp_version(version);
> > > +        queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
> > > +        dp_packet_uninit(&packet);
> > > +        ofpbuf_uninit(&ofpacts);
> > > +
> > > +        entry->next_tx = cur_time + 5000;
> > > +next:
> > > +        if (*bfd_time > entry->next_tx) {
> > > +            *bfd_time = entry->next_tx;
> >
> > I think there is  some problem here. I applied all the patches of this
> > series and deployed
> > fake multinode.
> >
> > When I create a BFD entry like below, ovn-controller pinctrl thread on
> > ovn-gw-1 fake chassis, just wakes up
> > 100% of the time.
> >
> > ---
> > ovn-nbctl create bfd logical_port=lr0-public dst_ip=172.16.0.50
> > ---
> >
> > In the ovn-controller logs I see
> >
> > ----
> > 2020-12-18T13:29:03.181Z|77940|poll_loop(ovn_pinctrl0)|DBG|wakeup due
> > to 0-ms timeout at controller/pinctrl.c:6495
> > 2020-12-18T13:29:03.181Z|77941|poll_loop(ovn_pinctrl0)|DBG|wakeup due
> > to 0-ms timeout at controller/pinctrl.c:6495
> > 2020-12-18T13:29:03.181Z|77942|poll_loop(ovn_pinctrl0)|DBG|wakeup due
> > to 0-ms timeout at controller/pinctrl.c:6495
> > 2020-12-18T13:29:03.181Z|77943|poll_loop(ovn_pinctrl0)|DBG|wakeup due
> > to 0-ms timeout at controller/pinctrl.c:6495
> > ...
> > ..
> > ----
> >
> > Thanks
> > Numan
> >
> >
> > > +        }
> > > +    }
> > > +}
> > > +
> > > +#define BFD_MONITOR_STALE_TIMEOUT  180000LL
> > > +static void
> > > +bfd_monitor_run(const struct sbrec_bfd_table *bfd_table,
> > > +                struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > > +                const struct sbrec_chassis *chassis,
> > > +                const struct sset *active_tunnels)
> > > +    OVS_REQUIRES(pinctrl_mutex)
> > > +{
> > > +    long long int cur_time = time_msec();
> > > +    bool changed = false;
> > > +
> > > +    const struct sbrec_bfd *bt;
> > > +    SBREC_BFD_TABLE_FOR_EACH (bt, bfd_table) {
> > > +        const struct sbrec_port_binding *pb
> > > +            = lport_lookup_by_name(sbrec_port_binding_by_name,
> > > +                                   bt->logical_port);
> > > +        if (!pb) {
> > > +            continue;
> > > +        }
> > > +
> > > +        struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_port(
> > > +                bt->dst_ip, bt->src_port);
> > > +        if (!entry) {
> > > +            ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP);
> > > +            struct eth_addr ea = eth_addr_zero;
> > > +            int i;
> > > +
> > > +            if (!ip_parse(bt->dst_ip, &ip_dst)) {
> > > +                continue;
> > > +            }
> > > +
> > > +            const char *peer_s = smap_get(&pb->options, "peer");
> > > +            if (!peer_s) {
> > > +                continue;
> > > +            }
> > > +
> > > +            const struct sbrec_port_binding *peer
> > > +                = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s);
> > > +            if (!peer) {
> > > +                continue;
> > > +            }
> > > +
> > > +            char *redirect_name = xasprintf("cr-%s", pb->logical_port);
> > > +            bool resident = lport_is_chassis_resident(
> > > +                    sbrec_port_binding_by_name, chassis, active_tunnels,
> > > +                    redirect_name);
> > > +            free(redirect_name);
> > > +            if (!resident && strcmp(pb->type, "l3gateway") &&
> > > +                pb->chassis != chassis) {
> > > +                continue;
> > > +            }
> > > +
> > > +            for (i = 0; i < pb->n_mac; i++) {
> > > +                struct lport_addresses laddrs;
> > > +
> > > +                if (!extract_lsp_addresses(pb->mac[i], &laddrs)) {
> > > +                    continue;
> > > +                }
> > > +
> > > +                ea = laddrs.ea;
> > > +                if (laddrs.n_ipv4_addrs > 0) {
> > > +                    ip_src = laddrs.ipv4_addrs[0].addr;
> > > +                    destroy_lport_addresses(&laddrs);
> > > +                    break;
> > > +                }
> > > +                destroy_lport_addresses(&laddrs);
> > > +            }
> > > +
> > > +            if (eth_addr_is_zero(ea)) {
> > > +                continue;
> > > +            }
> > > +
> > > +            entry = xzalloc(sizeof *entry);
> > > +            entry->src_mac = ea;
> > > +            entry->ip_src = ip_src;
> > > +            entry->ip_dst = ip_dst;
> > > +            entry->udp_src = bt->src_port;
> > > +            entry->disc = htonl(bt->disc);
> > > +            entry->next_tx = cur_time;
> > > +            entry->metadata = pb->datapath->tunnel_key;
> > > +            entry->port_key = pb->tunnel_key;
> > > +
> > > +            uint32_t hash = hash_string(bt->dst_ip, 0);
> > > +            hmap_insert(&bfd_monitor_map, &entry->node, hash);
> > > +            changed = true;
> > > +        }
> > > +        entry->last_update = cur_time;
> > > +    }
> > > +
> > > +    struct bfd_entry *entry, *next_entry;
> > > +    HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) {
> > > +        if (entry->last_update + BFD_MONITOR_STALE_TIMEOUT < cur_time) {
> > > +            hmap_remove(&bfd_monitor_map, &entry->node);
> > > +            free(entry);
> > > +        }
> > > +    }
> > > +
> > > +    if (changed) {
> > > +        notify_pinctrl_handler();
> > > +    }
> > > +}
> > > +
> > >  static uint16_t
> > >  get_random_src_port(void)
> > >  {
> > > diff --git a/controller/pinctrl.h b/controller/pinctrl.h
> > > index 4b101ec92..8555d983d 100644
> > > --- a/controller/pinctrl.h
> > > +++ b/controller/pinctrl.h
> > > @@ -31,6 +31,7 @@ struct sbrec_chassis;
> > >  struct sbrec_dns_table;
> > >  struct sbrec_controller_event_table;
> > >  struct sbrec_service_monitor_table;
> > > +struct sbrec_bfd_table;
> > >
> > >  void pinctrl_init(void);
> > >  void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > > @@ -44,6 +45,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > >                   const struct sbrec_dns_table *,
> > >                   const struct sbrec_controller_event_table *,
> > >                   const struct sbrec_service_monitor_table *,
> > > +                 const struct sbrec_bfd_table *,
> > >                   const struct ovsrec_bridge *, const struct sbrec_chassis *,
> > >                   const struct hmap *local_datapaths,
> > >                   const struct sset *active_tunnels);
> > > diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h
> > > index c84a0e7a9..d00982449 100644
> > > --- a/lib/ovn-l7.h
> > > +++ b/lib/ovn-l7.h
> > > @@ -26,6 +26,25 @@
> > >  #include "hash.h"
> > >  #include "ovn/logical-fields.h"
> > >
> > > +#define BFD_PACKET_LEN  24
> > > +#define BFD_DEST_PORT   3784
> > > +#define BFD_VERSION     1
> > > +#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */
> > > +#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */
> > > +
> > > +struct bfd_msg {
> > > +    uint8_t vers_diag;
> > > +    uint8_t flags;
> > > +    uint8_t mult;
> > > +    uint8_t length;
> > > +    ovs_be32 my_disc;
> > > +    ovs_be32 your_disc;
> > > +    ovs_be32 min_tx;
> > > +    ovs_be32 min_rx;
> > > +    ovs_be32 min_rx_echo;
> > > +};
> > > +BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct bfd_msg));
> > > +
> > >  /* Generic options map which is used to store dhcpv4 opts and dhcpv6 opts. */
> > >  struct gen_opts_map {
> > >      struct hmap_node hmap_node;
> > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> > > index b377dffa1..e8c7629e7 100644
> > > --- a/northd/ovn-northd.c
> > > +++ b/northd/ovn-northd.c
> > > @@ -11707,6 +11707,92 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
> > >      }
> > >  }
> > >
> > > +struct bfd_entry {
> > > +    struct hmap_node hmap_node;
> > > +
> > > +    const struct sbrec_bfd *sb_bt;
> > > +    bool found;
> > > +};
> > > +
> > > +static struct bfd_entry *
> > > +bfd_port_lookup(struct hmap *bfd_map, const char *logical_port,
> > > +                const char *dst_ip)
> > > +{
> > > +    struct bfd_entry *bfd_e;
> > > +    uint32_t hash;
> > > +
> > > +    hash = hash_string(dst_ip, 0);
> > > +    hash = hash_string(logical_port, hash);
> > > +    HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) {
> > > +        if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) &&
> > > +            !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) {
> > > +            return bfd_e;
> > > +        }
> > > +    }
> > > +    return NULL;
> > > +}
> > > +
> > > +static void
> > > +build_bfd_table(struct northd_context *ctx)
> > > +{
> > > +    struct hmap sb_only = HMAP_INITIALIZER(&sb_only);
> > > +    struct bfd_entry *bfd_e, *next_bfd_e;
> > > +    const struct sbrec_bfd *sb_bt;
> > > +    uint32_t hash;
> > > +
> > > +    SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) {
> > > +        bfd_e = xmalloc(sizeof *bfd_e);
> > > +        bfd_e->sb_bt = sb_bt;
> > > +        bfd_e->found = false;
> > > +        hash = hash_string(sb_bt->dst_ip, 0);
> > > +        hash = hash_string(sb_bt->logical_port, hash);
> > > +        hmap_insert(&sb_only, &bfd_e->hmap_node, hash);
> > > +    }
> > > +
> > > +    const struct nbrec_bfd *nb_bt;
> > > +    NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) {
> > > +        bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip);
> > > +        if (!bfd_e) {
> > > +            sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn);
> > > +            sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port);
> > > +            sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip);
> > > +            sbrec_bfd_set_disc(sb_bt, 1 + random_uint32());
> > > +            /* RFC 5881 section 4
> > > +             * The source port MUST be in the range 49152 through 65535.
> > > +             * The same UDP source port number MUST be used for all BFD
> > > +             * Control packets associated with a particular session.
> > > +             * The source port number SHOULD be unique among all BFD
> > > +             * sessions on the system
> > > +             */
> > > +            uint16_t udp_src = random_range(16383) + 49152;
> > > +            sbrec_bfd_set_src_port(sb_bt, udp_src);
> > > +            sbrec_bfd_set_status(sb_bt, "down");
> > > +            sbrec_bfd_set_min_tx(sb_bt, nb_bt->min_tx);
> > > +            sbrec_bfd_set_min_rx(sb_bt, nb_bt->min_rx);
> > > +            sbrec_bfd_set_detect_mult(sb_bt, nb_bt->detect_mult);
> > > +        } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) {
> > > +            if (!strcmp(nb_bt->status, "admin_down") ||
> > > +                !strcmp(bfd_e->sb_bt->status, "admin_down")) {
> > > +                sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status);
> > > +            } else {
> > > +                nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status);
> > > +            }
> > > +        }
> > > +        if (bfd_e) {
> > > +            bfd_e->found = true;
> > > +        }
> > > +    }
> > > +
> > > +    HMAP_FOR_EACH_SAFE (bfd_e, next_bfd_e, hmap_node, &sb_only) {
> > > +        if (!bfd_e->found && bfd_e->sb_bt) {
> > > +            sbrec_bfd_delete(bfd_e->sb_bt);
> > > +        }
> > > +        hmap_remove(&sb_only, &bfd_e->hmap_node);
> > > +        free(bfd_e);
> > > +    }
> > > +    hmap_destroy(&sb_only);
> > > +}
> > > +
> > >  static void
> > >  sync_address_set(struct northd_context *ctx, const char *name,
> > >                   const char **addrs, size_t n_addrs,
> > > @@ -12504,6 +12590,7 @@ ovnnb_db_run(struct northd_context *ctx,
> > >      build_ip_mcast(ctx, datapaths);
> > >      build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups);
> > >      build_meter_groups(ctx, &meter_groups);
> > > +    build_bfd_table(ctx);
> > >      build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups,
> > >                   &igmp_groups, &meter_groups, &lbs);
> > >      ovn_update_ipv6_prefix(ports);
> > > @@ -13463,6 +13550,10 @@ main(int argc, char *argv[])
> > >      add_column_noalert(ovnsb_idl_loop.idl,
> > >                         &sbrec_load_balancer_col_external_ids);
> > >
> > > +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port);
> > > +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip);
> > > +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status);
> > > +
> > >      struct ovsdb_idl_index *sbrec_chassis_by_name
> > >          = chassis_index_create(ovnsb_idl_loop.idl);
> > >
> > > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> > > index b77a2308c..a880482a4 100644
> > > --- a/ovn-nb.ovsschema
> > > +++ b/ovn-nb.ovsschema
> > > @@ -1,7 +1,7 @@
> > >  {
> > >      "name": "OVN_Northbound",
> > > -    "version": "5.30.0",
> > > -    "cksum": "3273824429 27172",
> > > +    "version": "5.31.0",
> > > +    "cksum": "3663844734 28178",
> > >      "tables": {
> > >          "NB_Global": {
> > >              "columns": {
> > > @@ -526,5 +526,25 @@
> > >                      "type": {"key": "string", "value": "string",
> > >                               "min": 0, "max": "unlimited"}}},
> > >              "indexes": [["name"]],
> > > +            "isRoot": true},
> > > +        "BFD": {
> > > +            "columns": {
> > > +                "logical_port": {"type": "string"},
> > > +                "dst_ip": {"type": "string"},
> > > +                "min_tx": {"type": {"key": {"type": "integer"}}},
> > > +                "min_rx": {"type": {"key": {"type": "integer"}}},
> > > +                "detect_mult": {"type": {"key": {"type": "integer"}}},
> > > +                "status": {
> > > +                    "type": {"key": {"type": "string",
> > > +                             "enum": ["set", ["down", "init", "up",
> > > +                                              "admin_down"]]},
> > > +                             "min": 0, "max": 1}},
> > > +                "external_ids": {
> > > +                    "type": {"key": "string", "value": "string",
> > > +                             "min": 0, "max": "unlimited"}},
> > > +                "options": {
> > > +                    "type": {"key": "string", "value": "string",
> > > +                             "min": 0, "max": "unlimited"}}},
> > > +            "indexes": [["logical_port", "dst_ip"]],
> > >              "isRoot": true}}
> > >      }
> > > diff --git a/ovn-nb.xml b/ovn-nb.xml
> > > index 0cf043790..4a28d3f0d 100644
> > > --- a/ovn-nb.xml
> > > +++ b/ovn-nb.xml
> > > @@ -3728,4 +3728,70 @@
> > >        </column>
> > >      </group>
> > >    </table>
> > > +
> > > +  <table name="BFD">
> > > +    <p>
> > > +      Contains BFD parameter for ovn-controller bfd configuration
> > > +    </p>
> > > +
> > > +    <group title="Configuration">
> > > +      <column name="logical_port">
> > > +        OVN logical port when BFD engine is running.
> > > +      </column>
> > > +
> > > +      <column name="dst_ip">
> > > +        BFD peer IP address.
> > > +      </column>
> > > +
> > > +      <column name="min_tx">
> > > +        This is the minimum interval, in milliseconds, that the local
> > > +        system would like to use when transmitting BFD Control packets,
> > > +        less any jitter applied. The value zero is reserved.
> > > +      </column>
> > > +
> > > +      <column name="min_rx">
> > > +        This is the minimum interval, in milliseconds, between received
> > > +        BFD Control packets that this system is capable of supporting,
> > > +        less any jitter applied by the sender. If this value is zero,
> > > +        the transmitting system does not want the remote system to send
> > > +        any periodic BFD Control packets.
> > > +      </column>
> > > +
> > > +      <column name="detect_mult">
> > > +        Detection time multiplier.  The negotiated transmit interval,
> > > +        multiplied by this value, provides the Detection Time for the
> > > +        receiving system in Asynchronous mode.
> > > +      </column>
> > > +
> > > +      <column name="options">
> > > +        Reserved for future use.
> > > +      </column>
> > > +
> > > +      <column name="external_ids">
> > > +        See <em>External IDs</em> at the beginning of this document.
> > > +      </column>
> > > +    </group>
> > > +
> > > +    <group title="Status Reporting">
> > > +      <column name="status">
> > > +        <p>
> > > +          BFD port logical states. Possible values are:
> > > +          <ul>
> > > +            <li>
> > > +              <code>admin_down</code>
> > > +            </li>
> > > +            <li>
> > > +              <code>down</code>
> > > +            </li>
> > > +            <li>
> > > +              <code>init</code>
> > > +            </li>
> > > +            <li>
> > > +              <code>up</code>
> > > +            </li>
> > > +          </ul>
> > > +        </p>
> > > +      </column>
> > > +    </group>
> > > +  </table>
> > >  </database>
> > > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> > > index 5228839b8..97db6de39 100644
> > > --- a/ovn-sb.ovsschema
> > > +++ b/ovn-sb.ovsschema
> > > @@ -1,7 +1,7 @@
> > >  {
> > >      "name": "OVN_Southbound",
> > > -    "version": "20.12.0",
> > > -    "cksum": "3969471120 24441",
> > > +    "version": "20.13.0",
> > > +    "cksum": "3035725595 25676",
> > >      "tables": {
> > >          "SB_Global": {
> > >              "columns": {
> > > @@ -484,6 +484,29 @@
> > >                  "external_ids": {
> > >                      "type": {"key": "string", "value": "string",
> > >                               "min": 0, "max": "unlimited"}}},
> > > +            "isRoot": true},
> > > +        "BFD": {
> > > +            "columns": {
> > > +                "src_port": {"type": {"key": {"type": "integer",
> > > +                                          "minInteger": 49152,
> > > +                                          "maxInteger": 65535}}},
> > > +                "disc": {"type": {"key": {"type": "integer"}}},
> > > +                "logical_port": {"type": "string"},
> > > +                "dst_ip": {"type": "string"},
> > > +                "min_tx": {"type": {"key": {"type": "integer"}}},
> > > +                "min_rx": {"type": {"key": {"type": "integer"}}},
> > > +                "detect_mult": {"type": {"key": {"type": "integer"}}},
> > > +                "status": {
> > > +                    "type": {"key": {"type": "string",
> > > +                             "enum": ["set", ["down", "init", "up",
> > > +                                              "admin_down"]]}}},
> > > +                "external_ids": {
> > > +                    "type": {"key": "string", "value": "string",
> > > +                             "min": 0, "max": "unlimited"}},
> > > +                "options": {
> > > +                    "type": {"key": "string", "value": "string",
> > > +                             "min": 0, "max": "unlimited"}}},
> > > +            "indexes": [["logical_port", "dst_ip", "src_port", "disc"]],
> > >              "isRoot": true}
> > >      }
> > >  }
> > > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > > index c13994848..ccfe2e415 100644
> > > --- a/ovn-sb.xml
> > > +++ b/ovn-sb.xml
> > > @@ -4231,4 +4231,82 @@ tcp.flags = RST;
> > >        </column>
> > >      </group>
> > >    </table>
> > > +
> > > +  <table name="BFD">
> > > +    <p>
> > > +      Contains BFD parameter for ovn-controller bfd configuration
> > > +    </p>
> > > +
> > > +    <group title="Configuration">
> > > +      <column name="src_port">
> > > +        udp source port used in bfd control packets.
> > > +        The source port MUST be in the range 49152 through 65535
> > > +        (RFC5881 section 4).
> > > +      </column>
> > > +
> > > +      <column name="disc">
> > > +        A unique, nonzero discriminator value generated by the transmitting
> > > +        system, used to demultiplex multiple BFD sessions between the same pair
> > > +        of systems.
> > > +      </column>
> > > +
> > > +      <column name="logical_port">
> > > +        OVN logical port when BFD engine is running.
> > > +      </column>
> > > +
> > > +      <column name="dst_ip">
> > > +        BFD peer IP address.
> > > +      </column>
> > > +
> > > +      <column name="min_tx">
> > > +        This is the minimum interval, in milliseconds, that the local
> > > +        system would like to use when transmitting BFD Control packets,
> > > +        less any jitter applied. The value zero is reserved.
> > > +      </column>
> > > +
> > > +      <column name="min_rx">
> > > +        This is the minimum interval, in milliseconds, between received
> > > +        BFD Control packets that this system is capable of supporting,
> > > +        less any jitter applied by the sender. If this value is zero,
> > > +        the transmitting system does not want the remote system to send
> > > +        any periodic BFD Control packets.
> > > +      </column>
> > > +
> > > +      <column name="detect_mult">
> > > +        Detection time multiplier.  The negotiated transmit interval,
> > > +        multiplied by this value, provides the Detection Time for the
> > > +        receiving system in Asynchronous mode.
> > > +      </column>
> > > +
> > > +      <column name="options">
> > > +        Reserved for future use.
> > > +      </column>
> > > +
> > > +      <column name="external_ids">
> > > +        See <em>External IDs</em> at the beginning of this document.
> > > +      </column>
> > > +    </group>
> > > +
> > > +    <group title="Status Reporting">
> > > +      <column name="status">
> > > +        <p>
> > > +          BFD port logical states. Possible values are:
> > > +          <ul>
> > > +            <li>
> > > +              <code>admin_down</code>
> > > +            </li>
> > > +            <li>
> > > +              <code>down</code>
> > > +            </li>
> > > +            <li>
> > > +              <code>init</code>
> > > +            </li>
> > > +            <li>
> > > +              <code>up</code>
> > > +            </li>
> > > +          </ul>
> > > +        </p>
> > > +      </column>
> > > +    </group>
> > > +  </table>
> > >  </database>
> > > --
> > > 2.29.2
> > >
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > >
>
Lorenzo Bianconi Dec. 19, 2020, 9:33 a.m. UTC | #4
On Dec 18, Numan Siddique wrote:
> On Wed, Dec 16, 2020 at 8:14 PM Lorenzo Bianconi
> <lorenzo.bianconi@redhat.com> wrote:
> >
> > Introduce the capability to transmit BFD packets in ovn-controller.
> > Introduce BFD tables in nb/sb dbs in order to configure BFD parameters
> > (e.g. min_tx, min_rx, ..) for ovn-controller.
> >
> > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> 
> Hi Lorenzo,
> 
> Please see below for a few comments.

Hi Numan,

thx for the review.

> 
> Thanks
> Numan
> 
> > ---
> >  controller/ovn-controller.c |   1 +
> >  controller/pinctrl.c        | 288 +++++++++++++++++++++++++++++++++++-
> >  controller/pinctrl.h        |   2 +
> >  lib/ovn-l7.h                |  19 +++
> >  northd/ovn-northd.c         |  91 ++++++++++++
> >  ovn-nb.ovsschema            |  24 ++-
> >  ovn-nb.xml                  |  66 +++++++++
> >  ovn-sb.ovsschema            |  27 +++-
> >  ovn-sb.xml                  |  78 ++++++++++
> >  9 files changed, 591 insertions(+), 5 deletions(-)
> >
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index b5f0c1315..84b3f85ad 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -2864,6 +2864,7 @@ main(int argc, char *argv[])
> >                                          ovnsb_idl_loop.idl),
> >                                      sbrec_service_monitor_table_get(
> >                                          ovnsb_idl_loop.idl),
> > +                                    sbrec_bfd_table_get(ovnsb_idl_loop.idl),
> >                                      br_int, chassis,
> >                                      &runtime_data->local_datapaths,
> >                                      &runtime_data->active_tunnels);
> > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > index 7e3abf0a4..ae994b513 100644
> > --- a/controller/pinctrl.c
> > +++ b/controller/pinctrl.c
> > @@ -323,6 +323,18 @@ put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
> >  static void notify_pinctrl_main(void);
> >  static void notify_pinctrl_handler(void);
> >
> > +static bool bfd_monitor_should_inject(void);
> > +static void bfd_monitor_wait(long long int timeout);
> > +static void bfd_monitor_init(void);
> > +static void bfd_monitor_destroy(void);
> > +static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time)
> > +                                 OVS_REQUIRES(pinctrl_mutex);
> > +static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table,
> > +                            struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > +                            const struct sbrec_chassis *chassis,
> > +                            const struct sset *active_tunnels)
> > +                            OVS_REQUIRES(pinctrl_mutex);
> > +
> >  COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
> >  COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map);
> >  COVERAGE_DEFINE(pinctrl_drop_controller_event);
> > @@ -487,6 +499,7 @@ pinctrl_init(void)
> >      ip_mcast_snoop_init();
> >      init_put_vport_bindings();
> >      init_svc_monitors();
> > +    bfd_monitor_init();
> >      pinctrl.br_int_name = NULL;
> >      pinctrl_handler_seq = seq_create();
> >      pinctrl_main_seq = seq_create();
> > @@ -3053,6 +3066,8 @@ pinctrl_handler(void *arg_)
> >      swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION);
> >
> >      while (!latch_is_set(&pctrl->pinctrl_thread_exit)) {
> > +        long long int bfd_time = LLONG_MAX;
> > +
> >          ovs_mutex_lock(&pinctrl_mutex);
> >          pinctrl_rconn_setup(swconn, pctrl->br_int_name);
> >          ip_mcast_snoop_run();
> > @@ -3085,6 +3100,7 @@ pinctrl_handler(void *arg_)
> >                  send_ipv6_ras(swconn, &send_ipv6_ra_time);
> >                  send_ipv6_prefixd(swconn, &send_prefixd_time);
> >                  send_mac_binding_buffered_pkts(swconn);
> > +                bfd_monitor_send_msg(swconn, &bfd_time);
> >                  ovs_mutex_unlock(&pinctrl_mutex);
> >
> >                  ip_mcast_querier_run(swconn, &send_mcast_query_time);
> > @@ -3102,6 +3118,7 @@ pinctrl_handler(void *arg_)
> >          ip_mcast_querier_wait(send_mcast_query_time);
> >          svc_monitors_wait(svc_monitors_next_run_time);
> >          ipv6_prefixd_wait(send_prefixd_time);
> > +        bfd_monitor_wait(bfd_time);
> >
> >          new_seq = seq_read(pinctrl_handler_seq);
> >          seq_wait(pinctrl_handler_seq, new_seq);
> > @@ -3149,6 +3166,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >              const struct sbrec_dns_table *dns_table,
> >              const struct sbrec_controller_event_table *ce_table,
> >              const struct sbrec_service_monitor_table *svc_mon_table,
> > +            const struct sbrec_bfd_table *bfd_table,
> >              const struct ovsrec_bridge *br_int,
> >              const struct sbrec_chassis *chassis,
> >              const struct hmap *local_datapaths,
> > @@ -3179,6 +3197,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >                           local_datapaths);
> >      sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name,
> >                        chassis);
> > +    bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis,
> > +                    active_tunnels);
> >      ovs_mutex_unlock(&pinctrl_mutex);
> >  }
> >
> > @@ -3722,6 +3742,7 @@ pinctrl_destroy(void)
> >      destroy_dns_cache();
> >      ip_mcast_snoop_destroy();
> >      destroy_svc_monitors();
> > +    bfd_monitor_destroy();
> >      seq_destroy(pinctrl_main_seq);
> >      seq_destroy(pinctrl_handler_seq);
> >  }
> > @@ -5525,7 +5546,8 @@ may_inject_pkts(void)
> >              !shash_is_empty(&send_garp_rarp_data) ||
> >              ipv6_prefixd_should_inject() ||
> >              !ovs_list_is_empty(&mcast_query_list) ||
> > -            !ovs_list_is_empty(&buffered_mac_bindings));
> > +            !ovs_list_is_empty(&buffered_mac_bindings) ||
> > +            bfd_monitor_should_inject());
> >  }
> >
> >  static void
> > @@ -6312,6 +6334,270 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >
> >  }
> >
> > +static struct hmap bfd_monitor_map;
> > +
> > +struct bfd_entry {
> > +    struct hmap_node node;
> > +
> > +    /* L2 source address */
> > +    struct eth_addr src_mac;
> > +    /* IPv4 source address */
> > +    ovs_be32 ip_src;
> > +    /* IPv4 destination address */
> > +    ovs_be32 ip_dst;
> > +    /* RFC 5881 section 4
> > +     * The source port MUST be in the range 49152 through 65535.
> > +     * The same UDP source port number MUST be used for all BFD
> > +     * Control packets associated with a particular session.
> > +     * The source port number SHOULD be unique among all BFD
> > +     * sessions on the system
> > +     */
> > +    uint16_t udp_src;
> > +    ovs_be32 disc;
> > +
> > +    int64_t port_key;
> > +    int64_t metadata;
> > +
> > +    long long int last_update;
> > +    long long int next_tx;
> > +};
> > +
> > +static void
> > +bfd_monitor_init(void)
> > +{
> > +    hmap_init(&bfd_monitor_map);
> > +}
> > +
> > +static void
> > +bfd_monitor_destroy(void)
> > +{
> > +    hmap_destroy(&bfd_monitor_map);
> 
> I think you also need to remove the hmap nodes  and free the 'struct
> bfd_entry' objects
> before destroying the hmap.

ack, I will fix it in v4

> 
> 
> > +}
> > +
> > +static struct bfd_entry *
> > +pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port)
> > +{
> > +    struct bfd_entry *entry;
> > +    HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0),
> > +                             &bfd_monitor_map) {
> > +        if (entry->udp_src == port) {
> > +            return entry;
> > +        }
> > +    }
> > +    return NULL;
> > +}
> > +
> > +static bool
> > +bfd_monitor_should_inject(void)
> > +{
> > +    long long int cur_time = time_msec();
> > +    struct bfd_entry *entry;
> > +
> > +    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
> > +        if (entry->next_tx < cur_time) {
> > +            return true;
> > +        }
> > +    }
> > +    return false;
> > +}
> > +
> > +static void
> > +bfd_monitor_wait(long long int timeout)
> > +{
> > +    if (!hmap_is_empty(&bfd_monitor_map)) {
> > +        poll_timer_wait_until(timeout);
> > +    }
> > +}
> > +
> > +static void
> > +bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet)
> > +{
> > +    struct udp_header *udp;
> > +    struct bfd_msg *msg;
> > +
> > +    dp_packet_reserve(packet, 2);
> > +    struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth);
> > +    eth->eth_dst = eth_addr_broadcast;
> > +    eth->eth_src = entry->src_mac;
> > +    eth->eth_type = htons(ETH_TYPE_IP);
> > +
> > +    struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip);
> > +    ip->ip_ihl_ver = IP_IHL_VER(5, 4);
> > +    ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg);
> > +    ip->ip_ttl = MAXTTL;
> > +    ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
> > +    ip->ip_proto = IPPROTO_UDP;
> > +    put_16aligned_be32(&ip->ip_src, entry->ip_src);
> > +    put_16aligned_be32(&ip->ip_dst, entry->ip_dst);
> > +    /* Checksum has already been zeroed by put_zeros call. */
> > +    ip->ip_csum = csum(ip, sizeof *ip);
> > +
> > +    udp = dp_packet_put_zeros(packet, sizeof *udp);
> > +    udp->udp_src = htons(entry->udp_src);
> > +    udp->udp_dst = htons(BFD_DEST_PORT);
> > +    udp->udp_len = htons(sizeof *udp + sizeof *msg);
> > +
> > +    msg = dp_packet_put_uninit(packet, sizeof *msg);
> > +    msg->vers_diag = (BFD_VERSION << 5);
> > +    msg->length = BFD_PACKET_LEN;
> > +}
> > +
> > +static void
> > +bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time)
> > +    OVS_REQUIRES(pinctrl_mutex)
> > +{
> > +    long long int cur_time = time_msec();
> > +    struct bfd_entry *entry;
> > +
> > +    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
> > +        if (cur_time < entry->next_tx) {
> > +            goto next;
> > +        }
> > +
> > +        uint64_t packet_stub[256 / 8];
> > +        struct dp_packet packet;
> > +        dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
> > +        bfd_monitor_put_bfd_msg(entry, &packet);
> > +
> > +        uint64_t ofpacts_stub[4096 / 8];
> > +        struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> > +
> > +        /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
> > +        uint32_t dp_key = entry->metadata;
> > +        uint32_t port_key = entry->port_key;
> > +        put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
> > +        put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
> > +        put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts);
> > +        struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
> > +        resubmit->in_port = OFPP_CONTROLLER;
> > +        resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
> > +
> > +        struct ofputil_packet_out po = {
> > +            .packet = dp_packet_data(&packet),
> > +            .packet_len = dp_packet_size(&packet),
> > +            .buffer_id = UINT32_MAX,
> > +            .ofpacts = ofpacts.data,
> > +            .ofpacts_len = ofpacts.size,
> > +        };
> > +
> > +        match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> > +        enum ofp_version version = rconn_get_version(swconn);
> > +        enum ofputil_protocol proto =
> > +            ofputil_protocol_from_ofp_version(version);
> > +        queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
> > +        dp_packet_uninit(&packet);
> > +        ofpbuf_uninit(&ofpacts);
> > +
> > +        entry->next_tx = cur_time + 5000;
> > +next:
> > +        if (*bfd_time > entry->next_tx) {
> > +            *bfd_time = entry->next_tx;
> 
> I think there is  some problem here. I applied all the patches of this
> series and deployed
> fake multinode.
> 
> When I create a BFD entry like below, ovn-controller pinctrl thread on
> ovn-gw-1 fake chassis, just wakes up
> 100% of the time.

ack, I spotted the issue. Using the following command:

$ovn-nbctl create bfd logical_port=lr0-public dst_ip=172.16.0.50

I will not provide a value fpr min_tx and min_rx. This will immediately trigger
a thread wakeup. I will fix it in v4

Regards,
Lorenzo

> 
> ---
> ovn-nbctl create bfd logical_port=lr0-public dst_ip=172.16.0.50
> ---
> 
> In the ovn-controller logs I see
> 
> ----
> 2020-12-18T13:29:03.181Z|77940|poll_loop(ovn_pinctrl0)|DBG|wakeup due
> to 0-ms timeout at controller/pinctrl.c:6495
> 2020-12-18T13:29:03.181Z|77941|poll_loop(ovn_pinctrl0)|DBG|wakeup due
> to 0-ms timeout at controller/pinctrl.c:6495
> 2020-12-18T13:29:03.181Z|77942|poll_loop(ovn_pinctrl0)|DBG|wakeup due
> to 0-ms timeout at controller/pinctrl.c:6495
> 2020-12-18T13:29:03.181Z|77943|poll_loop(ovn_pinctrl0)|DBG|wakeup due
> to 0-ms timeout at controller/pinctrl.c:6495
> ...
> ..
> ----
> 
> Thanks
> Numan
> 
> 
> > +        }
> > +    }
> > +}
> > +
> > +#define BFD_MONITOR_STALE_TIMEOUT  180000LL
> > +static void
> > +bfd_monitor_run(const struct sbrec_bfd_table *bfd_table,
> > +                struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > +                const struct sbrec_chassis *chassis,
> > +                const struct sset *active_tunnels)
> > +    OVS_REQUIRES(pinctrl_mutex)
> > +{
> > +    long long int cur_time = time_msec();
> > +    bool changed = false;
> > +
> > +    const struct sbrec_bfd *bt;
> > +    SBREC_BFD_TABLE_FOR_EACH (bt, bfd_table) {
> > +        const struct sbrec_port_binding *pb
> > +            = lport_lookup_by_name(sbrec_port_binding_by_name,
> > +                                   bt->logical_port);
> > +        if (!pb) {
> > +            continue;
> > +        }
> > +
> > +        struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_port(
> > +                bt->dst_ip, bt->src_port);
> > +        if (!entry) {
> > +            ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP);
> > +            struct eth_addr ea = eth_addr_zero;
> > +            int i;
> > +
> > +            if (!ip_parse(bt->dst_ip, &ip_dst)) {
> > +                continue;
> > +            }
> > +
> > +            const char *peer_s = smap_get(&pb->options, "peer");
> > +            if (!peer_s) {
> > +                continue;
> > +            }
> > +
> > +            const struct sbrec_port_binding *peer
> > +                = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s);
> > +            if (!peer) {
> > +                continue;
> > +            }
> > +
> > +            char *redirect_name = xasprintf("cr-%s", pb->logical_port);
> > +            bool resident = lport_is_chassis_resident(
> > +                    sbrec_port_binding_by_name, chassis, active_tunnels,
> > +                    redirect_name);
> > +            free(redirect_name);
> > +            if (!resident && strcmp(pb->type, "l3gateway") &&
> > +                pb->chassis != chassis) {
> > +                continue;
> > +            }
> > +
> > +            for (i = 0; i < pb->n_mac; i++) {
> > +                struct lport_addresses laddrs;
> > +
> > +                if (!extract_lsp_addresses(pb->mac[i], &laddrs)) {
> > +                    continue;
> > +                }
> > +
> > +                ea = laddrs.ea;
> > +                if (laddrs.n_ipv4_addrs > 0) {
> > +                    ip_src = laddrs.ipv4_addrs[0].addr;
> > +                    destroy_lport_addresses(&laddrs);
> > +                    break;
> > +                }
> > +                destroy_lport_addresses(&laddrs);
> > +            }
> > +
> > +            if (eth_addr_is_zero(ea)) {
> > +                continue;
> > +            }
> > +
> > +            entry = xzalloc(sizeof *entry);
> > +            entry->src_mac = ea;
> > +            entry->ip_src = ip_src;
> > +            entry->ip_dst = ip_dst;
> > +            entry->udp_src = bt->src_port;
> > +            entry->disc = htonl(bt->disc);
> > +            entry->next_tx = cur_time;
> > +            entry->metadata = pb->datapath->tunnel_key;
> > +            entry->port_key = pb->tunnel_key;
> > +
> > +            uint32_t hash = hash_string(bt->dst_ip, 0);
> > +            hmap_insert(&bfd_monitor_map, &entry->node, hash);
> > +            changed = true;
> > +        }
> > +        entry->last_update = cur_time;
> > +    }
> > +
> > +    struct bfd_entry *entry, *next_entry;
> > +    HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) {
> > +        if (entry->last_update + BFD_MONITOR_STALE_TIMEOUT < cur_time) {
> > +            hmap_remove(&bfd_monitor_map, &entry->node);
> > +            free(entry);
> > +        }
> > +    }
> > +
> > +    if (changed) {
> > +        notify_pinctrl_handler();
> > +    }
> > +}
> > +
> >  static uint16_t
> >  get_random_src_port(void)
> >  {
> > diff --git a/controller/pinctrl.h b/controller/pinctrl.h
> > index 4b101ec92..8555d983d 100644
> > --- a/controller/pinctrl.h
> > +++ b/controller/pinctrl.h
> > @@ -31,6 +31,7 @@ struct sbrec_chassis;
> >  struct sbrec_dns_table;
> >  struct sbrec_controller_event_table;
> >  struct sbrec_service_monitor_table;
> > +struct sbrec_bfd_table;
> >
> >  void pinctrl_init(void);
> >  void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > @@ -44,6 +45,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >                   const struct sbrec_dns_table *,
> >                   const struct sbrec_controller_event_table *,
> >                   const struct sbrec_service_monitor_table *,
> > +                 const struct sbrec_bfd_table *,
> >                   const struct ovsrec_bridge *, const struct sbrec_chassis *,
> >                   const struct hmap *local_datapaths,
> >                   const struct sset *active_tunnels);
> > diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h
> > index c84a0e7a9..d00982449 100644
> > --- a/lib/ovn-l7.h
> > +++ b/lib/ovn-l7.h
> > @@ -26,6 +26,25 @@
> >  #include "hash.h"
> >  #include "ovn/logical-fields.h"
> >
> > +#define BFD_PACKET_LEN  24
> > +#define BFD_DEST_PORT   3784
> > +#define BFD_VERSION     1
> > +#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */
> > +#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */
> > +
> > +struct bfd_msg {
> > +    uint8_t vers_diag;
> > +    uint8_t flags;
> > +    uint8_t mult;
> > +    uint8_t length;
> > +    ovs_be32 my_disc;
> > +    ovs_be32 your_disc;
> > +    ovs_be32 min_tx;
> > +    ovs_be32 min_rx;
> > +    ovs_be32 min_rx_echo;
> > +};
> > +BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct bfd_msg));
> > +
> >  /* Generic options map which is used to store dhcpv4 opts and dhcpv6 opts. */
> >  struct gen_opts_map {
> >      struct hmap_node hmap_node;
> > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> > index b377dffa1..e8c7629e7 100644
> > --- a/northd/ovn-northd.c
> > +++ b/northd/ovn-northd.c
> > @@ -11707,6 +11707,92 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
> >      }
> >  }
> >
> > +struct bfd_entry {
> > +    struct hmap_node hmap_node;
> > +
> > +    const struct sbrec_bfd *sb_bt;
> > +    bool found;
> > +};
> > +
> > +static struct bfd_entry *
> > +bfd_port_lookup(struct hmap *bfd_map, const char *logical_port,
> > +                const char *dst_ip)
> > +{
> > +    struct bfd_entry *bfd_e;
> > +    uint32_t hash;
> > +
> > +    hash = hash_string(dst_ip, 0);
> > +    hash = hash_string(logical_port, hash);
> > +    HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) {
> > +        if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) &&
> > +            !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) {
> > +            return bfd_e;
> > +        }
> > +    }
> > +    return NULL;
> > +}
> > +
> > +static void
> > +build_bfd_table(struct northd_context *ctx)
> > +{
> > +    struct hmap sb_only = HMAP_INITIALIZER(&sb_only);
> > +    struct bfd_entry *bfd_e, *next_bfd_e;
> > +    const struct sbrec_bfd *sb_bt;
> > +    uint32_t hash;
> > +
> > +    SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) {
> > +        bfd_e = xmalloc(sizeof *bfd_e);
> > +        bfd_e->sb_bt = sb_bt;
> > +        bfd_e->found = false;
> > +        hash = hash_string(sb_bt->dst_ip, 0);
> > +        hash = hash_string(sb_bt->logical_port, hash);
> > +        hmap_insert(&sb_only, &bfd_e->hmap_node, hash);
> > +    }
> > +
> > +    const struct nbrec_bfd *nb_bt;
> > +    NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) {
> > +        bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip);
> > +        if (!bfd_e) {
> > +            sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn);
> > +            sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port);
> > +            sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip);
> > +            sbrec_bfd_set_disc(sb_bt, 1 + random_uint32());
> > +            /* RFC 5881 section 4
> > +             * The source port MUST be in the range 49152 through 65535.
> > +             * The same UDP source port number MUST be used for all BFD
> > +             * Control packets associated with a particular session.
> > +             * The source port number SHOULD be unique among all BFD
> > +             * sessions on the system
> > +             */
> > +            uint16_t udp_src = random_range(16383) + 49152;
> > +            sbrec_bfd_set_src_port(sb_bt, udp_src);
> > +            sbrec_bfd_set_status(sb_bt, "down");
> > +            sbrec_bfd_set_min_tx(sb_bt, nb_bt->min_tx);
> > +            sbrec_bfd_set_min_rx(sb_bt, nb_bt->min_rx);
> > +            sbrec_bfd_set_detect_mult(sb_bt, nb_bt->detect_mult);
> > +        } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) {
> > +            if (!strcmp(nb_bt->status, "admin_down") ||
> > +                !strcmp(bfd_e->sb_bt->status, "admin_down")) {
> > +                sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status);
> > +            } else {
> > +                nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status);
> > +            }
> > +        }
> > +        if (bfd_e) {
> > +            bfd_e->found = true;
> > +        }
> > +    }
> > +
> > +    HMAP_FOR_EACH_SAFE (bfd_e, next_bfd_e, hmap_node, &sb_only) {
> > +        if (!bfd_e->found && bfd_e->sb_bt) {
> > +            sbrec_bfd_delete(bfd_e->sb_bt);
> > +        }
> > +        hmap_remove(&sb_only, &bfd_e->hmap_node);
> > +        free(bfd_e);
> > +    }
> > +    hmap_destroy(&sb_only);
> > +}
> > +
> >  static void
> >  sync_address_set(struct northd_context *ctx, const char *name,
> >                   const char **addrs, size_t n_addrs,
> > @@ -12504,6 +12590,7 @@ ovnnb_db_run(struct northd_context *ctx,
> >      build_ip_mcast(ctx, datapaths);
> >      build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups);
> >      build_meter_groups(ctx, &meter_groups);
> > +    build_bfd_table(ctx);
> >      build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups,
> >                   &igmp_groups, &meter_groups, &lbs);
> >      ovn_update_ipv6_prefix(ports);
> > @@ -13463,6 +13550,10 @@ main(int argc, char *argv[])
> >      add_column_noalert(ovnsb_idl_loop.idl,
> >                         &sbrec_load_balancer_col_external_ids);
> >
> > +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port);
> > +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip);
> > +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status);
> > +
> >      struct ovsdb_idl_index *sbrec_chassis_by_name
> >          = chassis_index_create(ovnsb_idl_loop.idl);
> >
> > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> > index b77a2308c..a880482a4 100644
> > --- a/ovn-nb.ovsschema
> > +++ b/ovn-nb.ovsschema
> > @@ -1,7 +1,7 @@
> >  {
> >      "name": "OVN_Northbound",
> > -    "version": "5.30.0",
> > -    "cksum": "3273824429 27172",
> > +    "version": "5.31.0",
> > +    "cksum": "3663844734 28178",
> >      "tables": {
> >          "NB_Global": {
> >              "columns": {
> > @@ -526,5 +526,25 @@
> >                      "type": {"key": "string", "value": "string",
> >                               "min": 0, "max": "unlimited"}}},
> >              "indexes": [["name"]],
> > +            "isRoot": true},
> > +        "BFD": {
> > +            "columns": {
> > +                "logical_port": {"type": "string"},
> > +                "dst_ip": {"type": "string"},
> > +                "min_tx": {"type": {"key": {"type": "integer"}}},
> > +                "min_rx": {"type": {"key": {"type": "integer"}}},
> > +                "detect_mult": {"type": {"key": {"type": "integer"}}},
> > +                "status": {
> > +                    "type": {"key": {"type": "string",
> > +                             "enum": ["set", ["down", "init", "up",
> > +                                              "admin_down"]]},
> > +                             "min": 0, "max": 1}},
> > +                "external_ids": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}},
> > +                "options": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}}},
> > +            "indexes": [["logical_port", "dst_ip"]],
> >              "isRoot": true}}
> >      }
> > diff --git a/ovn-nb.xml b/ovn-nb.xml
> > index 0cf043790..4a28d3f0d 100644
> > --- a/ovn-nb.xml
> > +++ b/ovn-nb.xml
> > @@ -3728,4 +3728,70 @@
> >        </column>
> >      </group>
> >    </table>
> > +
> > +  <table name="BFD">
> > +    <p>
> > +      Contains BFD parameter for ovn-controller bfd configuration
> > +    </p>
> > +
> > +    <group title="Configuration">
> > +      <column name="logical_port">
> > +        OVN logical port when BFD engine is running.
> > +      </column>
> > +
> > +      <column name="dst_ip">
> > +        BFD peer IP address.
> > +      </column>
> > +
> > +      <column name="min_tx">
> > +        This is the minimum interval, in milliseconds, that the local
> > +        system would like to use when transmitting BFD Control packets,
> > +        less any jitter applied. The value zero is reserved.
> > +      </column>
> > +
> > +      <column name="min_rx">
> > +        This is the minimum interval, in milliseconds, between received
> > +        BFD Control packets that this system is capable of supporting,
> > +        less any jitter applied by the sender. If this value is zero,
> > +        the transmitting system does not want the remote system to send
> > +        any periodic BFD Control packets.
> > +      </column>
> > +
> > +      <column name="detect_mult">
> > +        Detection time multiplier.  The negotiated transmit interval,
> > +        multiplied by this value, provides the Detection Time for the
> > +        receiving system in Asynchronous mode.
> > +      </column>
> > +
> > +      <column name="options">
> > +        Reserved for future use.
> > +      </column>
> > +
> > +      <column name="external_ids">
> > +        See <em>External IDs</em> at the beginning of this document.
> > +      </column>
> > +    </group>
> > +
> > +    <group title="Status Reporting">
> > +      <column name="status">
> > +        <p>
> > +          BFD port logical states. Possible values are:
> > +          <ul>
> > +            <li>
> > +              <code>admin_down</code>
> > +            </li>
> > +            <li>
> > +              <code>down</code>
> > +            </li>
> > +            <li>
> > +              <code>init</code>
> > +            </li>
> > +            <li>
> > +              <code>up</code>
> > +            </li>
> > +          </ul>
> > +        </p>
> > +      </column>
> > +    </group>
> > +  </table>
> >  </database>
> > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> > index 5228839b8..97db6de39 100644
> > --- a/ovn-sb.ovsschema
> > +++ b/ovn-sb.ovsschema
> > @@ -1,7 +1,7 @@
> >  {
> >      "name": "OVN_Southbound",
> > -    "version": "20.12.0",
> > -    "cksum": "3969471120 24441",
> > +    "version": "20.13.0",
> > +    "cksum": "3035725595 25676",
> >      "tables": {
> >          "SB_Global": {
> >              "columns": {
> > @@ -484,6 +484,29 @@
> >                  "external_ids": {
> >                      "type": {"key": "string", "value": "string",
> >                               "min": 0, "max": "unlimited"}}},
> > +            "isRoot": true},
> > +        "BFD": {
> > +            "columns": {
> > +                "src_port": {"type": {"key": {"type": "integer",
> > +                                          "minInteger": 49152,
> > +                                          "maxInteger": 65535}}},
> > +                "disc": {"type": {"key": {"type": "integer"}}},
> > +                "logical_port": {"type": "string"},
> > +                "dst_ip": {"type": "string"},
> > +                "min_tx": {"type": {"key": {"type": "integer"}}},
> > +                "min_rx": {"type": {"key": {"type": "integer"}}},
> > +                "detect_mult": {"type": {"key": {"type": "integer"}}},
> > +                "status": {
> > +                    "type": {"key": {"type": "string",
> > +                             "enum": ["set", ["down", "init", "up",
> > +                                              "admin_down"]]}}},
> > +                "external_ids": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}},
> > +                "options": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}}},
> > +            "indexes": [["logical_port", "dst_ip", "src_port", "disc"]],
> >              "isRoot": true}
> >      }
> >  }
> > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > index c13994848..ccfe2e415 100644
> > --- a/ovn-sb.xml
> > +++ b/ovn-sb.xml
> > @@ -4231,4 +4231,82 @@ tcp.flags = RST;
> >        </column>
> >      </group>
> >    </table>
> > +
> > +  <table name="BFD">
> > +    <p>
> > +      Contains BFD parameter for ovn-controller bfd configuration
> > +    </p>
> > +
> > +    <group title="Configuration">
> > +      <column name="src_port">
> > +        udp source port used in bfd control packets.
> > +        The source port MUST be in the range 49152 through 65535
> > +        (RFC5881 section 4).
> > +      </column>
> > +
> > +      <column name="disc">
> > +        A unique, nonzero discriminator value generated by the transmitting
> > +        system, used to demultiplex multiple BFD sessions between the same pair
> > +        of systems.
> > +      </column>
> > +
> > +      <column name="logical_port">
> > +        OVN logical port when BFD engine is running.
> > +      </column>
> > +
> > +      <column name="dst_ip">
> > +        BFD peer IP address.
> > +      </column>
> > +
> > +      <column name="min_tx">
> > +        This is the minimum interval, in milliseconds, that the local
> > +        system would like to use when transmitting BFD Control packets,
> > +        less any jitter applied. The value zero is reserved.
> > +      </column>
> > +
> > +      <column name="min_rx">
> > +        This is the minimum interval, in milliseconds, between received
> > +        BFD Control packets that this system is capable of supporting,
> > +        less any jitter applied by the sender. If this value is zero,
> > +        the transmitting system does not want the remote system to send
> > +        any periodic BFD Control packets.
> > +      </column>
> > +
> > +      <column name="detect_mult">
> > +        Detection time multiplier.  The negotiated transmit interval,
> > +        multiplied by this value, provides the Detection Time for the
> > +        receiving system in Asynchronous mode.
> > +      </column>
> > +
> > +      <column name="options">
> > +        Reserved for future use.
> > +      </column>
> > +
> > +      <column name="external_ids">
> > +        See <em>External IDs</em> at the beginning of this document.
> > +      </column>
> > +    </group>
> > +
> > +    <group title="Status Reporting">
> > +      <column name="status">
> > +        <p>
> > +          BFD port logical states. Possible values are:
> > +          <ul>
> > +            <li>
> > +              <code>admin_down</code>
> > +            </li>
> > +            <li>
> > +              <code>down</code>
> > +            </li>
> > +            <li>
> > +              <code>init</code>
> > +            </li>
> > +            <li>
> > +              <code>up</code>
> > +            </li>
> > +          </ul>
> > +        </p>
> > +      </column>
> > +    </group>
> > +  </table>
> >  </database>
> > --
> > 2.29.2
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
>
diff mbox series

Patch

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index b5f0c1315..84b3f85ad 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -2864,6 +2864,7 @@  main(int argc, char *argv[])
                                         ovnsb_idl_loop.idl),
                                     sbrec_service_monitor_table_get(
                                         ovnsb_idl_loop.idl),
+                                    sbrec_bfd_table_get(ovnsb_idl_loop.idl),
                                     br_int, chassis,
                                     &runtime_data->local_datapaths,
                                     &runtime_data->active_tunnels);
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 7e3abf0a4..ae994b513 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -323,6 +323,18 @@  put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
 static void notify_pinctrl_main(void);
 static void notify_pinctrl_handler(void);
 
+static bool bfd_monitor_should_inject(void);
+static void bfd_monitor_wait(long long int timeout);
+static void bfd_monitor_init(void);
+static void bfd_monitor_destroy(void);
+static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time)
+                                 OVS_REQUIRES(pinctrl_mutex);
+static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table,
+                            struct ovsdb_idl_index *sbrec_port_binding_by_name,
+                            const struct sbrec_chassis *chassis,
+                            const struct sset *active_tunnels)
+                            OVS_REQUIRES(pinctrl_mutex);
+
 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
 COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map);
 COVERAGE_DEFINE(pinctrl_drop_controller_event);
@@ -487,6 +499,7 @@  pinctrl_init(void)
     ip_mcast_snoop_init();
     init_put_vport_bindings();
     init_svc_monitors();
+    bfd_monitor_init();
     pinctrl.br_int_name = NULL;
     pinctrl_handler_seq = seq_create();
     pinctrl_main_seq = seq_create();
@@ -3053,6 +3066,8 @@  pinctrl_handler(void *arg_)
     swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION);
 
     while (!latch_is_set(&pctrl->pinctrl_thread_exit)) {
+        long long int bfd_time = LLONG_MAX;
+
         ovs_mutex_lock(&pinctrl_mutex);
         pinctrl_rconn_setup(swconn, pctrl->br_int_name);
         ip_mcast_snoop_run();
@@ -3085,6 +3100,7 @@  pinctrl_handler(void *arg_)
                 send_ipv6_ras(swconn, &send_ipv6_ra_time);
                 send_ipv6_prefixd(swconn, &send_prefixd_time);
                 send_mac_binding_buffered_pkts(swconn);
+                bfd_monitor_send_msg(swconn, &bfd_time);
                 ovs_mutex_unlock(&pinctrl_mutex);
 
                 ip_mcast_querier_run(swconn, &send_mcast_query_time);
@@ -3102,6 +3118,7 @@  pinctrl_handler(void *arg_)
         ip_mcast_querier_wait(send_mcast_query_time);
         svc_monitors_wait(svc_monitors_next_run_time);
         ipv6_prefixd_wait(send_prefixd_time);
+        bfd_monitor_wait(bfd_time);
 
         new_seq = seq_read(pinctrl_handler_seq);
         seq_wait(pinctrl_handler_seq, new_seq);
@@ -3149,6 +3166,7 @@  pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
             const struct sbrec_dns_table *dns_table,
             const struct sbrec_controller_event_table *ce_table,
             const struct sbrec_service_monitor_table *svc_mon_table,
+            const struct sbrec_bfd_table *bfd_table,
             const struct ovsrec_bridge *br_int,
             const struct sbrec_chassis *chassis,
             const struct hmap *local_datapaths,
@@ -3179,6 +3197,8 @@  pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                          local_datapaths);
     sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name,
                       chassis);
+    bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis,
+                    active_tunnels);
     ovs_mutex_unlock(&pinctrl_mutex);
 }
 
@@ -3722,6 +3742,7 @@  pinctrl_destroy(void)
     destroy_dns_cache();
     ip_mcast_snoop_destroy();
     destroy_svc_monitors();
+    bfd_monitor_destroy();
     seq_destroy(pinctrl_main_seq);
     seq_destroy(pinctrl_handler_seq);
 }
@@ -5525,7 +5546,8 @@  may_inject_pkts(void)
             !shash_is_empty(&send_garp_rarp_data) ||
             ipv6_prefixd_should_inject() ||
             !ovs_list_is_empty(&mcast_query_list) ||
-            !ovs_list_is_empty(&buffered_mac_bindings));
+            !ovs_list_is_empty(&buffered_mac_bindings) ||
+            bfd_monitor_should_inject());
 }
 
 static void
@@ -6312,6 +6334,270 @@  sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn,
 
 }
 
+static struct hmap bfd_monitor_map;
+
+struct bfd_entry {
+    struct hmap_node node;
+
+    /* L2 source address */
+    struct eth_addr src_mac;
+    /* IPv4 source address */
+    ovs_be32 ip_src;
+    /* IPv4 destination address */
+    ovs_be32 ip_dst;
+    /* RFC 5881 section 4
+     * The source port MUST be in the range 49152 through 65535.
+     * The same UDP source port number MUST be used for all BFD
+     * Control packets associated with a particular session.
+     * The source port number SHOULD be unique among all BFD
+     * sessions on the system
+     */
+    uint16_t udp_src;
+    ovs_be32 disc;
+
+    int64_t port_key;
+    int64_t metadata;
+
+    long long int last_update;
+    long long int next_tx;
+};
+
+static void
+bfd_monitor_init(void)
+{
+    hmap_init(&bfd_monitor_map);
+}
+
+static void
+bfd_monitor_destroy(void)
+{
+    hmap_destroy(&bfd_monitor_map);
+}
+
+static struct bfd_entry *
+pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port)
+{
+    struct bfd_entry *entry;
+    HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0),
+                             &bfd_monitor_map) {
+        if (entry->udp_src == port) {
+            return entry;
+        }
+    }
+    return NULL;
+}
+
+static bool
+bfd_monitor_should_inject(void)
+{
+    long long int cur_time = time_msec();
+    struct bfd_entry *entry;
+
+    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
+        if (entry->next_tx < cur_time) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static void
+bfd_monitor_wait(long long int timeout)
+{
+    if (!hmap_is_empty(&bfd_monitor_map)) {
+        poll_timer_wait_until(timeout);
+    }
+}
+
+static void
+bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet)
+{
+    struct udp_header *udp;
+    struct bfd_msg *msg;
+
+    dp_packet_reserve(packet, 2);
+    struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth);
+    eth->eth_dst = eth_addr_broadcast;
+    eth->eth_src = entry->src_mac;
+    eth->eth_type = htons(ETH_TYPE_IP);
+
+    struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip);
+    ip->ip_ihl_ver = IP_IHL_VER(5, 4);
+    ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg);
+    ip->ip_ttl = MAXTTL;
+    ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
+    ip->ip_proto = IPPROTO_UDP;
+    put_16aligned_be32(&ip->ip_src, entry->ip_src);
+    put_16aligned_be32(&ip->ip_dst, entry->ip_dst);
+    /* Checksum has already been zeroed by put_zeros call. */
+    ip->ip_csum = csum(ip, sizeof *ip);
+
+    udp = dp_packet_put_zeros(packet, sizeof *udp);
+    udp->udp_src = htons(entry->udp_src);
+    udp->udp_dst = htons(BFD_DEST_PORT);
+    udp->udp_len = htons(sizeof *udp + sizeof *msg);
+
+    msg = dp_packet_put_uninit(packet, sizeof *msg);
+    msg->vers_diag = (BFD_VERSION << 5);
+    msg->length = BFD_PACKET_LEN;
+}
+
+static void
+bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time)
+    OVS_REQUIRES(pinctrl_mutex)
+{
+    long long int cur_time = time_msec();
+    struct bfd_entry *entry;
+
+    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
+        if (cur_time < entry->next_tx) {
+            goto next;
+        }
+
+        uint64_t packet_stub[256 / 8];
+        struct dp_packet packet;
+        dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
+        bfd_monitor_put_bfd_msg(entry, &packet);
+
+        uint64_t ofpacts_stub[4096 / 8];
+        struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
+
+        /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
+        uint32_t dp_key = entry->metadata;
+        uint32_t port_key = entry->port_key;
+        put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
+        put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
+        put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts);
+        struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
+        resubmit->in_port = OFPP_CONTROLLER;
+        resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
+
+        struct ofputil_packet_out po = {
+            .packet = dp_packet_data(&packet),
+            .packet_len = dp_packet_size(&packet),
+            .buffer_id = UINT32_MAX,
+            .ofpacts = ofpacts.data,
+            .ofpacts_len = ofpacts.size,
+        };
+
+        match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
+        enum ofp_version version = rconn_get_version(swconn);
+        enum ofputil_protocol proto =
+            ofputil_protocol_from_ofp_version(version);
+        queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
+        dp_packet_uninit(&packet);
+        ofpbuf_uninit(&ofpacts);
+
+        entry->next_tx = cur_time + 5000;
+next:
+        if (*bfd_time > entry->next_tx) {
+            *bfd_time = entry->next_tx;
+        }
+    }
+}
+
+#define BFD_MONITOR_STALE_TIMEOUT  180000LL
+static void
+bfd_monitor_run(const struct sbrec_bfd_table *bfd_table,
+                struct ovsdb_idl_index *sbrec_port_binding_by_name,
+                const struct sbrec_chassis *chassis,
+                const struct sset *active_tunnels)
+    OVS_REQUIRES(pinctrl_mutex)
+{
+    long long int cur_time = time_msec();
+    bool changed = false;
+
+    const struct sbrec_bfd *bt;
+    SBREC_BFD_TABLE_FOR_EACH (bt, bfd_table) {
+        const struct sbrec_port_binding *pb
+            = lport_lookup_by_name(sbrec_port_binding_by_name,
+                                   bt->logical_port);
+        if (!pb) {
+            continue;
+        }
+
+        struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_port(
+                bt->dst_ip, bt->src_port);
+        if (!entry) {
+            ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP);
+            struct eth_addr ea = eth_addr_zero;
+            int i;
+
+            if (!ip_parse(bt->dst_ip, &ip_dst)) {
+                continue;
+            }
+
+            const char *peer_s = smap_get(&pb->options, "peer");
+            if (!peer_s) {
+                continue;
+            }
+
+            const struct sbrec_port_binding *peer
+                = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s);
+            if (!peer) {
+                continue;
+            }
+
+            char *redirect_name = xasprintf("cr-%s", pb->logical_port);
+            bool resident = lport_is_chassis_resident(
+                    sbrec_port_binding_by_name, chassis, active_tunnels,
+                    redirect_name);
+            free(redirect_name);
+            if (!resident && strcmp(pb->type, "l3gateway") &&
+                pb->chassis != chassis) {
+                continue;
+            }
+
+            for (i = 0; i < pb->n_mac; i++) {
+                struct lport_addresses laddrs;
+
+                if (!extract_lsp_addresses(pb->mac[i], &laddrs)) {
+                    continue;
+                }
+
+                ea = laddrs.ea;
+                if (laddrs.n_ipv4_addrs > 0) {
+                    ip_src = laddrs.ipv4_addrs[0].addr;
+                    destroy_lport_addresses(&laddrs);
+                    break;
+                }
+                destroy_lport_addresses(&laddrs);
+            }
+
+            if (eth_addr_is_zero(ea)) {
+                continue;
+            }
+
+            entry = xzalloc(sizeof *entry);
+            entry->src_mac = ea;
+            entry->ip_src = ip_src;
+            entry->ip_dst = ip_dst;
+            entry->udp_src = bt->src_port;
+            entry->disc = htonl(bt->disc);
+            entry->next_tx = cur_time;
+            entry->metadata = pb->datapath->tunnel_key;
+            entry->port_key = pb->tunnel_key;
+
+            uint32_t hash = hash_string(bt->dst_ip, 0);
+            hmap_insert(&bfd_monitor_map, &entry->node, hash);
+            changed = true;
+        }
+        entry->last_update = cur_time;
+    }
+
+    struct bfd_entry *entry, *next_entry;
+    HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) {
+        if (entry->last_update + BFD_MONITOR_STALE_TIMEOUT < cur_time) {
+            hmap_remove(&bfd_monitor_map, &entry->node);
+            free(entry);
+        }
+    }
+
+    if (changed) {
+        notify_pinctrl_handler();
+    }
+}
+
 static uint16_t
 get_random_src_port(void)
 {
diff --git a/controller/pinctrl.h b/controller/pinctrl.h
index 4b101ec92..8555d983d 100644
--- a/controller/pinctrl.h
+++ b/controller/pinctrl.h
@@ -31,6 +31,7 @@  struct sbrec_chassis;
 struct sbrec_dns_table;
 struct sbrec_controller_event_table;
 struct sbrec_service_monitor_table;
+struct sbrec_bfd_table;
 
 void pinctrl_init(void);
 void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
@@ -44,6 +45,7 @@  void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                  const struct sbrec_dns_table *,
                  const struct sbrec_controller_event_table *,
                  const struct sbrec_service_monitor_table *,
+                 const struct sbrec_bfd_table *,
                  const struct ovsrec_bridge *, const struct sbrec_chassis *,
                  const struct hmap *local_datapaths,
                  const struct sset *active_tunnels);
diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h
index c84a0e7a9..d00982449 100644
--- a/lib/ovn-l7.h
+++ b/lib/ovn-l7.h
@@ -26,6 +26,25 @@ 
 #include "hash.h"
 #include "ovn/logical-fields.h"
 
+#define BFD_PACKET_LEN  24
+#define BFD_DEST_PORT   3784
+#define BFD_VERSION     1
+#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */
+#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */
+
+struct bfd_msg {
+    uint8_t vers_diag;
+    uint8_t flags;
+    uint8_t mult;
+    uint8_t length;
+    ovs_be32 my_disc;
+    ovs_be32 your_disc;
+    ovs_be32 min_tx;
+    ovs_be32 min_rx;
+    ovs_be32 min_rx_echo;
+};
+BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct bfd_msg));
+
 /* Generic options map which is used to store dhcpv4 opts and dhcpv6 opts. */
 struct gen_opts_map {
     struct hmap_node hmap_node;
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index b377dffa1..e8c7629e7 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -11707,6 +11707,92 @@  build_lflows(struct northd_context *ctx, struct hmap *datapaths,
     }
 }
 
+struct bfd_entry {
+    struct hmap_node hmap_node;
+
+    const struct sbrec_bfd *sb_bt;
+    bool found;
+};
+
+static struct bfd_entry *
+bfd_port_lookup(struct hmap *bfd_map, const char *logical_port,
+                const char *dst_ip)
+{
+    struct bfd_entry *bfd_e;
+    uint32_t hash;
+
+    hash = hash_string(dst_ip, 0);
+    hash = hash_string(logical_port, hash);
+    HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) {
+        if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) &&
+            !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) {
+            return bfd_e;
+        }
+    }
+    return NULL;
+}
+
+static void
+build_bfd_table(struct northd_context *ctx)
+{
+    struct hmap sb_only = HMAP_INITIALIZER(&sb_only);
+    struct bfd_entry *bfd_e, *next_bfd_e;
+    const struct sbrec_bfd *sb_bt;
+    uint32_t hash;
+
+    SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) {
+        bfd_e = xmalloc(sizeof *bfd_e);
+        bfd_e->sb_bt = sb_bt;
+        bfd_e->found = false;
+        hash = hash_string(sb_bt->dst_ip, 0);
+        hash = hash_string(sb_bt->logical_port, hash);
+        hmap_insert(&sb_only, &bfd_e->hmap_node, hash);
+    }
+
+    const struct nbrec_bfd *nb_bt;
+    NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) {
+        bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip);
+        if (!bfd_e) {
+            sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn);
+            sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port);
+            sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip);
+            sbrec_bfd_set_disc(sb_bt, 1 + random_uint32());
+            /* RFC 5881 section 4
+             * The source port MUST be in the range 49152 through 65535.
+             * The same UDP source port number MUST be used for all BFD
+             * Control packets associated with a particular session.
+             * The source port number SHOULD be unique among all BFD
+             * sessions on the system
+             */
+            uint16_t udp_src = random_range(16383) + 49152;
+            sbrec_bfd_set_src_port(sb_bt, udp_src);
+            sbrec_bfd_set_status(sb_bt, "down");
+            sbrec_bfd_set_min_tx(sb_bt, nb_bt->min_tx);
+            sbrec_bfd_set_min_rx(sb_bt, nb_bt->min_rx);
+            sbrec_bfd_set_detect_mult(sb_bt, nb_bt->detect_mult);
+        } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) {
+            if (!strcmp(nb_bt->status, "admin_down") ||
+                !strcmp(bfd_e->sb_bt->status, "admin_down")) {
+                sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status);
+            } else {
+                nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status);
+            }
+        }
+        if (bfd_e) {
+            bfd_e->found = true;
+        }
+    }
+
+    HMAP_FOR_EACH_SAFE (bfd_e, next_bfd_e, hmap_node, &sb_only) {
+        if (!bfd_e->found && bfd_e->sb_bt) {
+            sbrec_bfd_delete(bfd_e->sb_bt);
+        }
+        hmap_remove(&sb_only, &bfd_e->hmap_node);
+        free(bfd_e);
+    }
+    hmap_destroy(&sb_only);
+}
+
 static void
 sync_address_set(struct northd_context *ctx, const char *name,
                  const char **addrs, size_t n_addrs,
@@ -12504,6 +12590,7 @@  ovnnb_db_run(struct northd_context *ctx,
     build_ip_mcast(ctx, datapaths);
     build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups);
     build_meter_groups(ctx, &meter_groups);
+    build_bfd_table(ctx);
     build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups,
                  &igmp_groups, &meter_groups, &lbs);
     ovn_update_ipv6_prefix(ports);
@@ -13463,6 +13550,10 @@  main(int argc, char *argv[])
     add_column_noalert(ovnsb_idl_loop.idl,
                        &sbrec_load_balancer_col_external_ids);
 
+    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port);
+    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip);
+    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status);
+
     struct ovsdb_idl_index *sbrec_chassis_by_name
         = chassis_index_create(ovnsb_idl_loop.idl);
 
diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
index b77a2308c..a880482a4 100644
--- a/ovn-nb.ovsschema
+++ b/ovn-nb.ovsschema
@@ -1,7 +1,7 @@ 
 {
     "name": "OVN_Northbound",
-    "version": "5.30.0",
-    "cksum": "3273824429 27172",
+    "version": "5.31.0",
+    "cksum": "3663844734 28178",
     "tables": {
         "NB_Global": {
             "columns": {
@@ -526,5 +526,25 @@ 
                     "type": {"key": "string", "value": "string",
                              "min": 0, "max": "unlimited"}}},
             "indexes": [["name"]],
+            "isRoot": true},
+        "BFD": {
+            "columns": {
+                "logical_port": {"type": "string"},
+                "dst_ip": {"type": "string"},
+                "min_tx": {"type": {"key": {"type": "integer"}}},
+                "min_rx": {"type": {"key": {"type": "integer"}}},
+                "detect_mult": {"type": {"key": {"type": "integer"}}},
+                "status": {
+                    "type": {"key": {"type": "string",
+                             "enum": ["set", ["down", "init", "up",
+                                              "admin_down"]]},
+                             "min": 0, "max": 1}},
+                "external_ids": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}},
+                "options": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}}},
+            "indexes": [["logical_port", "dst_ip"]],
             "isRoot": true}}
     }
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 0cf043790..4a28d3f0d 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -3728,4 +3728,70 @@ 
       </column>
     </group>
   </table>
+
+  <table name="BFD">
+    <p>
+      Contains BFD parameter for ovn-controller bfd configuration
+    </p>
+
+    <group title="Configuration">
+      <column name="logical_port">
+        OVN logical port when BFD engine is running.
+      </column>
+
+      <column name="dst_ip">
+        BFD peer IP address.
+      </column>
+
+      <column name="min_tx">
+        This is the minimum interval, in milliseconds, that the local
+        system would like to use when transmitting BFD Control packets,
+        less any jitter applied. The value zero is reserved.
+      </column>
+
+      <column name="min_rx">
+        This is the minimum interval, in milliseconds, between received
+        BFD Control packets that this system is capable of supporting,
+        less any jitter applied by the sender. If this value is zero,
+        the transmitting system does not want the remote system to send
+        any periodic BFD Control packets.
+      </column>
+
+      <column name="detect_mult">
+        Detection time multiplier.  The negotiated transmit interval,
+        multiplied by this value, provides the Detection Time for the
+        receiving system in Asynchronous mode.
+      </column>
+
+      <column name="options">
+        Reserved for future use.
+      </column>
+
+      <column name="external_ids">
+        See <em>External IDs</em> at the beginning of this document.
+      </column>
+    </group>
+
+    <group title="Status Reporting">
+      <column name="status">
+        <p>
+          BFD port logical states. Possible values are:
+          <ul>
+            <li>
+              <code>admin_down</code>
+            </li>
+            <li>
+              <code>down</code>
+            </li>
+            <li>
+              <code>init</code>
+            </li>
+            <li>
+              <code>up</code>
+            </li>
+          </ul>
+        </p>
+      </column>
+    </group>
+  </table>
 </database>
diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
index 5228839b8..97db6de39 100644
--- a/ovn-sb.ovsschema
+++ b/ovn-sb.ovsschema
@@ -1,7 +1,7 @@ 
 {
     "name": "OVN_Southbound",
-    "version": "20.12.0",
-    "cksum": "3969471120 24441",
+    "version": "20.13.0",
+    "cksum": "3035725595 25676",
     "tables": {
         "SB_Global": {
             "columns": {
@@ -484,6 +484,29 @@ 
                 "external_ids": {
                     "type": {"key": "string", "value": "string",
                              "min": 0, "max": "unlimited"}}},
+            "isRoot": true},
+        "BFD": {
+            "columns": {
+                "src_port": {"type": {"key": {"type": "integer",
+                                          "minInteger": 49152,
+                                          "maxInteger": 65535}}},
+                "disc": {"type": {"key": {"type": "integer"}}},
+                "logical_port": {"type": "string"},
+                "dst_ip": {"type": "string"},
+                "min_tx": {"type": {"key": {"type": "integer"}}},
+                "min_rx": {"type": {"key": {"type": "integer"}}},
+                "detect_mult": {"type": {"key": {"type": "integer"}}},
+                "status": {
+                    "type": {"key": {"type": "string",
+                             "enum": ["set", ["down", "init", "up",
+                                              "admin_down"]]}}},
+                "external_ids": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}},
+                "options": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}}},
+            "indexes": [["logical_port", "dst_ip", "src_port", "disc"]],
             "isRoot": true}
     }
 }
diff --git a/ovn-sb.xml b/ovn-sb.xml
index c13994848..ccfe2e415 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -4231,4 +4231,82 @@  tcp.flags = RST;
       </column>
     </group>
   </table>
+
+  <table name="BFD">
+    <p>
+      Contains BFD parameter for ovn-controller bfd configuration
+    </p>
+
+    <group title="Configuration">
+      <column name="src_port">
+        udp source port used in bfd control packets.
+        The source port MUST be in the range 49152 through 65535
+        (RFC5881 section 4).
+      </column>
+
+      <column name="disc">
+        A unique, nonzero discriminator value generated by the transmitting
+        system, used to demultiplex multiple BFD sessions between the same pair
+        of systems.
+      </column>
+
+      <column name="logical_port">
+        OVN logical port when BFD engine is running.
+      </column>
+
+      <column name="dst_ip">
+        BFD peer IP address.
+      </column>
+
+      <column name="min_tx">
+        This is the minimum interval, in milliseconds, that the local
+        system would like to use when transmitting BFD Control packets,
+        less any jitter applied. The value zero is reserved.
+      </column>
+
+      <column name="min_rx">
+        This is the minimum interval, in milliseconds, between received
+        BFD Control packets that this system is capable of supporting,
+        less any jitter applied by the sender. If this value is zero,
+        the transmitting system does not want the remote system to send
+        any periodic BFD Control packets.
+      </column>
+
+      <column name="detect_mult">
+        Detection time multiplier.  The negotiated transmit interval,
+        multiplied by this value, provides the Detection Time for the
+        receiving system in Asynchronous mode.
+      </column>
+
+      <column name="options">
+        Reserved for future use.
+      </column>
+
+      <column name="external_ids">
+        See <em>External IDs</em> at the beginning of this document.
+      </column>
+    </group>
+
+    <group title="Status Reporting">
+      <column name="status">
+        <p>
+          BFD port logical states. Possible values are:
+          <ul>
+            <li>
+              <code>admin_down</code>
+            </li>
+            <li>
+              <code>down</code>
+            </li>
+            <li>
+              <code>init</code>
+            </li>
+            <li>
+              <code>up</code>
+            </li>
+          </ul>
+        </p>
+      </column>
+    </group>
+  </table>
 </database>