diff mbox series

[ovs-dev,v2,3/3] northd: Add BFD support for ECMP route policy.

Message ID 3ce65e66bfbcb5fa155b0b5f97333b417b81230a.1706881571.git.lorenzo.bianconi@redhat.com
State Accepted
Headers show
Series Introduce BFD support for ECMP route policy | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Lorenzo Bianconi Feb. 2, 2024, 1:49 p.m. UTC
Similar to OVN static routes, introduce the capability to link a BFD
session for OVN reroute policies.

Reported-at: https://issues.redhat.com/browse/FDP-234
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 NEWS                      |  1 +
 northd/northd.c           | 86 ++++++++++++++++++++++++++++++----
 ovn-nb.ovsschema          |  9 +++-
 ovn-nb.xml                |  7 +++
 tests/ovn-nbctl.at        |  6 +++
 tests/ovn-northd.at       | 20 +++++++-
 tests/system-ovn.at       | 51 ++++++++++++++++++--
 utilities/ovn-nbctl.8.xml |  8 ++++
 utilities/ovn-nbctl.c     | 97 ++++++++++++++++++++++++++++++++++++++-
 9 files changed, 267 insertions(+), 18 deletions(-)

Comments

Mark Michelson Feb. 2, 2024, 6:27 p.m. UTC | #1
Thanks Lorenzo,

Acked-by: Mark Michelson <mmichels@redhat.com>

On 2/2/24 08:49, Lorenzo Bianconi wrote:
> Similar to OVN static routes, introduce the capability to link a BFD
> session for OVN reroute policies.
> 
> Reported-at: https://issues.redhat.com/browse/FDP-234
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---
>   NEWS                      |  1 +
>   northd/northd.c           | 86 ++++++++++++++++++++++++++++++----
>   ovn-nb.ovsschema          |  9 +++-
>   ovn-nb.xml                |  7 +++
>   tests/ovn-nbctl.at        |  6 +++
>   tests/ovn-northd.at       | 20 +++++++-
>   tests/system-ovn.at       | 51 ++++++++++++++++++--
>   utilities/ovn-nbctl.8.xml |  8 ++++
>   utilities/ovn-nbctl.c     | 97 ++++++++++++++++++++++++++++++++++++++-
>   9 files changed, 267 insertions(+), 18 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index 6553bd078..d4227ac99 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -18,6 +18,7 @@ Post v23.09.0
>     - Support selecting encapsulation IP based on the source/destination VIF's
>       settting. See ovn-controller(8) 'external_ids:ovn-encap-ip' for more
>       details.
> +  - Introduce next-hop BFD availability check for OVN reroute policies.
>   
>   OVN v23.09.0 - 15 Sep 2023
>   --------------------------
> diff --git a/northd/northd.c b/northd/northd.c
> index d2091d4bc..ae50e4ff9 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -10991,10 +10991,63 @@ get_outport_for_routing_policy_nexthop(struct ovn_datapath *od,
>       return NULL;
>   }
>   
> +static struct ovs_mutex bfd_lock = OVS_MUTEX_INITIALIZER;
> +
> +static bool check_bfd_state(
> +        const struct nbrec_logical_router_policy *rule,
> +        const struct hmap *bfd_connections,
> +        struct ovn_port *out_port,
> +        const char *nexthop)
> +{
> +    struct in6_addr nexthop_v6;
> +    bool is_nexthop_v6 = ipv6_parse(nexthop, &nexthop_v6);
> +    bool ret = true;
> +
> +    for (size_t i = 0; i < rule->n_bfd_sessions; i++) {
> +        /* Check if there is a BFD session associated to the reroute
> +         * policy. */
> +        const struct nbrec_bfd *nb_bt = rule->bfd_sessions[i];
> +        struct in6_addr dst_ipv6;
> +        bool is_dst_v6 = ipv6_parse(nb_bt->dst_ip, &dst_ipv6);
> +
> +        if (is_nexthop_v6 ^ is_dst_v6) {
> +            continue;
> +        }
> +
> +        if ((is_nexthop_v6 && !ipv6_addr_equals(&nexthop_v6, &dst_ipv6)) ||
> +            strcmp(nb_bt->dst_ip, nexthop)) {
> +            continue;
> +        }
> +
> +        if (strcmp(nb_bt->logical_port, out_port->key)) {
> +            continue;
> +        }
> +
> +        struct bfd_entry *bfd_e = bfd_port_lookup(bfd_connections,
> +                                                  nb_bt->logical_port,
> +                                                  nb_bt->dst_ip);
> +        ovs_mutex_lock(&bfd_lock);
> +        if (bfd_e) {
> +            bfd_e->ref = true;
> +        }
> +
> +        if (!strcmp(nb_bt->status, "admin_down")) {
> +            nbrec_bfd_set_status(nb_bt, "down");
> +        }
> +
> +        ret = strcmp(nb_bt->status, "down");
> +        ovs_mutex_unlock(&bfd_lock);
> +        break;
> +    }
> +
> +    return ret;
> +}
> +
>   static void
>   build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
>                             const struct hmap *lr_ports,
>                             const struct nbrec_logical_router_policy *rule,
> +                          const struct hmap *bfd_connections,
>                             const struct ovsdb_idl_row *stage_hint)
>   {
>       struct ds match = DS_EMPTY_INITIALIZER;
> @@ -11019,6 +11072,11 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
>                            rule->priority, nexthop);
>               return;
>           }
> +
> +        if (!check_bfd_state(rule, bfd_connections, out_port, nexthop)) {
> +            return;
> +        }
> +
>           uint32_t pkt_mark = smap_get_uint(&rule->options, "pkt_mark", 0);
>           if (pkt_mark) {
>               ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
> @@ -11060,6 +11118,7 @@ static void
>   build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
>                                   const struct hmap *lr_ports,
>                                   const struct nbrec_logical_router_policy *rule,
> +                                const struct hmap *bfd_connections,
>                                   uint16_t ecmp_group_id)
>   {
>       ovs_assert(rule->n_nexthops > 1);
> @@ -11088,6 +11147,9 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
>       struct ds match = DS_EMPTY_INITIALIZER;
>       struct ds actions = DS_EMPTY_INITIALIZER;
>   
> +    size_t *valid_nexthops = xcalloc(rule->n_nexthops, sizeof *valid_nexthops);
> +    size_t n_valid_nexthops = 0;
> +
>       for (size_t i = 0; i < rule->n_nexthops; i++) {
>           struct ovn_port *out_port = get_outport_for_routing_policy_nexthop(
>                od, lr_ports, rule->priority, rule->nexthops[i]);
> @@ -11105,6 +11167,13 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
>               goto cleanup;
>           }
>   
> +        if (!check_bfd_state(rule, bfd_connections, out_port,
> +                             rule->nexthops[i])) {
> +            continue;
> +        }
> +
> +        valid_nexthops[n_valid_nexthops++] = i + 1;
> +
>           ds_clear(&actions);
>           uint32_t pkt_mark = smap_get_uint(&rule->options, "pkt_mark", 0);
>           if (pkt_mark) {
> @@ -11140,12 +11209,12 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
>                     "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id,
>                     REG_ECMP_MEMBER_ID);
>   
> -    for (size_t i = 0; i < rule->n_nexthops; i++) {
> +    for (size_t i = 0; i < n_valid_nexthops; i++) {
>           if (i > 0) {
>               ds_put_cstr(&actions, ", ");
>           }
>   
> -        ds_put_format(&actions, "%"PRIuSIZE, i + 1);
> +        ds_put_format(&actions, "%"PRIuSIZE, valid_nexthops[i]);
>       }
>       ds_put_cstr(&actions, ");");
>       ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY,
> @@ -11153,6 +11222,7 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
>                               ds_cstr(&actions), &rule->header_);
>   
>   cleanup:
> +    free(valid_nexthops);
>       ds_destroy(&match);
>       ds_destroy(&actions);
>   }
> @@ -11236,8 +11306,6 @@ route_hash(struct parsed_route *route)
>                         (uint32_t)route->plen);
>   }
>   
> -static struct ovs_mutex bfd_lock = OVS_MUTEX_INITIALIZER;
> -
>   static bool
>   find_static_route_outport(struct ovn_datapath *od, const struct hmap *lr_ports,
>       const struct nbrec_logical_router_static_route *route, bool is_ipv4,
> @@ -13856,7 +13924,8 @@ build_mcast_lookup_flows_for_lrouter(
>   static void
>   build_ingress_policy_flows_for_lrouter(
>           struct ovn_datapath *od, struct hmap *lflows,
> -        const struct hmap *lr_ports)
> +        const struct hmap *lr_ports,
> +        const struct hmap *bfd_connections)
>   {
>       ovs_assert(od->nbr);
>       /* This is a catch-all rule. It has the lowest priority (0)
> @@ -13877,11 +13946,11 @@ build_ingress_policy_flows_for_lrouter(
>   
>           if (is_ecmp_reroute) {
>               build_ecmp_routing_policy_flows(lflows, od, lr_ports, rule,
> -                                            ecmp_group_id);
> +                                            bfd_connections, ecmp_group_id);
>               ecmp_group_id++;
>           } else {
>               build_routing_policy_flow(lflows, od, lr_ports, rule,
> -                                      &rule->header_);
> +                                      bfd_connections, &rule->header_);
>           }
>       }
>   }
> @@ -16316,7 +16385,8 @@ build_lswitch_and_lrouter_iterate_by_lr(struct ovn_datapath *od,
>                                            lsi->bfd_connections);
>       build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match,
>                                            &lsi->actions);
> -    build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports);
> +    build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports,
> +                                           lsi->bfd_connections);
>       build_arp_resolve_flows_for_lrouter(od, lsi->lflows);
>       build_check_pkt_len_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports,
>                                             &lsi->match, &lsi->actions,
> diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> index b2e0993e0..cec02a172 100644
> --- a/ovn-nb.ovsschema
> +++ b/ovn-nb.ovsschema
> @@ -1,7 +1,7 @@
>   {
>       "name": "OVN_Northbound",
> -    "version": "7.2.0",
> -    "cksum": "1069338687 34162",
> +    "version": "7.2.1",
> +    "cksum": "4156161406 34467",
>       "tables": {
>           "NB_Global": {
>               "columns": {
> @@ -477,6 +477,11 @@
>                   "nexthop": {"type": {"key": "string", "min": 0, "max": 1}},
>                   "nexthops": {"type": {
>                       "key": "string", "min": 0, "max": "unlimited"}},
> +                "bfd_sessions": {"type": {"key": {"type": "uuid",
> +                                                  "refTable": "BFD",
> +                                                  "refType": "weak"},
> +                                  "min": 0,
> +                                  "max": "unlimited"}},
>                   "options": {
>                       "type": {"key": "string", "value": "string",
>                                "min": 0, "max": "unlimited"}},
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index 765ffcf2e..d35c34517 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -3682,6 +3682,13 @@ or
>         </p>
>       </column>
>   
> +    <column name="bfd_sessions">
> +      <p>
> +        Reference to <ref table="BFD"/> row if the route policy has associated
> +        some BFD sessions.
> +      </p>
> +    </column>
> +
>       <column name="options" key="pkt_mark">
>         <p>
>           Marks the packet with the value specified when the router policy
> diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at
> index 7f37d7716..b15b5cfd3 100644
> --- a/tests/ovn-nbctl.at
> +++ b/tests/ovn-nbctl.at
> @@ -2189,6 +2189,12 @@ AT_CHECK([ovn-nbctl lr-policy-add lr0 100 "ip4.src == 1.1.2.0/24" allow pkt_mark
>   AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip4.src == 2.1.1.0/24" allow])
>   AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip4.src == 2.1.2.0/24" drop])
>   AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip6.src == 2002::/64" drop])
> +AT_CHECK([ovn-nbctl --bfd lr-policy-add lr0 103 "ip4.src == 1.2.3.0/24" reroute 192.168.1.1], [1], [],
> +  [ovn-nbctl: out lrp not found for 192.168.1.1 nexthop
> +])
> +AT_CHECK([ovn-nbctl --bfd lr-policy-add lr0 103 "ip4.src == 1.2.3.0/24" drop], [1], [],
> +  [ovn-nbctl: BFD is valid only with reroute action.
> +])
>   
>   dnl Incomplete option set.
>   AT_CHECK([ovn-nbctl lr-policy-add lr0 200 "ip4.src == 1.1.4.0/24" reroute 192.168.0.10 foo], [1], [],
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 67e81ddba..5407bd381 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -3624,7 +3624,7 @@ AT_KEYWORDS([northd-bfd])
>   ovn_start
>   
>   check ovn-nbctl --wait=sb lr-add r0
> -for i in $(seq 1 7); do
> +for i in $(seq 1 9); do
>       check ovn-nbctl --wait=sb lrp-add r0 r0-sw$i 00:00:00:00:00:0$i 192.168.$i.1/24
>       check ovn-nbctl --wait=sb ls-add sw$i
>       check ovn-nbctl --wait=sb lsp-add sw$i sw$i-r0
> @@ -3688,6 +3688,24 @@ bfd2_uuid=$(fetch_column bfd _uuid logical_port=r0-sw2)
>   check ovn-sbctl set bfd $bfd2_uuid status=up
>   wait_column up nb:bfd status logical_port=r0-sw2
>   
> +# Create reroute policies associated with BFD sessions
> +check ovn-nbctl lr-route-del r0
> +check ovn-nbctl --bfd lr-policy-add r0 100 "ip4.src == 1.2.3.0/24" reroute 192.168.8.2
> +wait_column down bfd status logical_port=r0-sw8
> +
> +bfd_route_policy_uuid=$(fetch_column nb:bfd _uuid logical_port=r0-sw8)
> +AT_CHECK([ovn-nbctl list logical_router_policy | grep -q $bfd_route_policy_uuid])
> +
> +check ovn-nbctl lr-policy-del r0
> +check ovn-nbctl --bfd lr-policy-add r0 100 "ip4.src == 2.3.4.0/24" reroute 192.168.9.2,192.168.9.3,192.168.9.4
> +
> +wait_column down bfd status dst_ip=192.168.9.2
> +wait_column down bfd status dst_ip=192.168.9.3
> +wait_column down bfd status dst_ip=192.168.9.4
> +
> +bfd_route_policy_uuid=$(fetch_column nb:bfd _uuid logical_port=r0-sw9)
> +AT_CHECK([ovn-nbctl list logical_router_policy | sed s/,//g | grep -q "$bfd_route_policy_uuid"])
> +
>   AT_CLEANUP
>   ])
>   
> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index 9ecb29cdb..8e3acd57e 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -6837,6 +6837,15 @@ OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst ==
>   check ovn-nbctl clear logical_router_static_route $route_uuid bfd
>   wait_column "admin_down" nb:bfd status logical_port=rp-public
>   OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> +
> +check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 200.0.0.0/8" reroute 172.16.1.50
> +wait_column "up" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep -q 172.16.1.50])
> +
> +check ovn-nbctl lr-policy-del R1
> +wait_column "admin_down" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> +
>   NS_CHECK_EXEC([server], [tcpdump -nni s1 udp port 3784 -Q in > bfd.pcap &])
>   sleep 5
>   kill $(pidof tcpdump)
> @@ -6844,25 +6853,57 @@ AT_CHECK([grep -qi bfd bfd.pcap],[1])
>   
>   # restart the connection
>   check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid
> +check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 200.0.0.0/8" reroute 172.16.1.50
>   wait_column "up" nb:bfd status logical_port=rp-public
>   
> +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep -q 172.16.1.50])
> +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep -q 172.16.1.50])
> +
> +# stop bfd endpoint
> +NS_CHECK_EXEC([server], [bfdd-control stop], [0], [dnl
> +stopping
> +])
> +wait_column "down" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep 172.16.1.50)" = ""])
> +OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep 172.16.1.50)" = ""])
> +
>   # switch to gw router configuration
>   check ovn-nbctl clear logical_router_static_route $route_uuid bfd
> -wait_column "admin_down" nb:bfd status logical_port=rp-public
> -OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> +check ovn-nbctl lr-policy-del R1
>   check ovn-nbctl clear logical_router_port rp-public gateway_chassis
>   check ovn-nbctl set logical_router R1 options:chassis=hv1
>   check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid
> +
> +# restart bfdd
> +NS_CHECK_EXEC([server], [bfdd-beacon --listen=172.16.1.50], [0])
> +NS_CHECK_EXEC([server], [bfdd-control allow 172.16.1.1], [0], [dnl
> +Allowing connections from 172.16.1.1
> +])
> +
>   wait_column "up" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep -q 172.16.1.50])
> +
> +check ovn-nbctl clear logical_router_static_route $route_uuid bfd
> +wait_column "admin_down" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> +ovn-nbctl destroy bfd $uuid
> +check_row_count bfd 0
> +
> +# create reroute route policy
> +check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 210.0.0.0/8" reroute 172.16.1.50
> +wait_column "up" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 210.0.0.0/8' |grep -q 172.16.1.50])
> +
> +check ovn-nbctl lr-policy-del R1
> +wait_column "admin_down" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> +uuid=$(fetch_column nb:bfd _uuid logical_port="rp-public")
>   
>   # stop bfd endpoint
>   NS_CHECK_EXEC([server], [bfdd-control stop], [0], [dnl
>   stopping
>   ])
>   
> -wait_column "down" nb:bfd status logical_port=rp-public
> -OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep 172.16.1.50)" = ""])
> -
>   # remove bfd entry
>   ovn-nbctl destroy bfd $uuid
>   check_row_count bfd 0
> diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml
> index 6f74bd557..5aa01f2f8 100644
> --- a/utilities/ovn-nbctl.8.xml
> +++ b/utilities/ovn-nbctl.8.xml
> @@ -1119,6 +1119,14 @@
>             duplicated routing policy results in error.
>           </p>
>   
> +        <p>
> +          <code>--bfd</code> option is used to link a BFD session to the
> +          OVN reroute policy. OVN will look for an already running BFD
> +          session using next-hop as lookup key in the BFD table.
> +          If the lookup fails, a new entry in the BFD table will be created
> +          using the <var>nexthop</var> as <var>dst_ip</var>.
> +        </p>
> +
>             <p>
>             The following example shows a policy to lr1, which will drop packets
>             from<code>192.168.100.0/24</code>.
> diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
> index 0586eccdb..5db1fc136 100644
> --- a/utilities/ovn-nbctl.c
> +++ b/utilities/ovn-nbctl.c
> @@ -4014,14 +4014,26 @@ normalize_addr_str(const char *orig_addr)
>       return ret;
>   }
>   
> +static bool
> +ip_in_lrp_networks(const struct nbrec_logical_router_port *lrp,
> +                   const char *ip_s);
> +
>   static void
>   nbctl_pre_lr_policy_add(struct ctl_context *ctx)
>   {
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_name);
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_ports);
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_policies);
>   
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_name);
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_mac);
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_networks);
> +
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_priority);
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_match);
> +
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_bfd_col_dst_ip);
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_bfd_col_logical_port);
>   }
>   
>   static void
> @@ -4158,6 +4170,81 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
>   
>       nbrec_logical_router_update_policies_addvalue(lr, policy);
>   
> +    struct shash_node *bfd = shash_find(&ctx->options, "--bfd");
> +    const struct nbrec_bfd **bfd_sessions = NULL;
> +
> +    if (bfd) {
> +        if (!reroute) {
> +            ctl_error(ctx, "BFD is valid only with reroute action.");
> +            goto free_nexthops;
> +        }
> +
> +        bfd_sessions = xcalloc(n_nexthops, sizeof *bfd_sessions);
> +        size_t j, n_bfd_sessions = 0;
> +
> +        for (i = 0; i < n_nexthops; i++) {
> +            for (j = 0; j < lr->n_ports; j++) {
> +                if (ip_in_lrp_networks(lr->ports[j], nexthops[i])) {
> +                    break;
> +                }
> +            }
> +
> +            if (j == lr->n_ports) {
> +                ctl_error(ctx, "out lrp not found for %s nexthop",
> +                          nexthops[i]);
> +                goto free_nexthops;
> +            }
> +
> +            struct in6_addr nexthop_v6;
> +            bool is_nexthop_v6 = ipv6_parse(nexthops[i], &nexthop_v6);
> +            const struct nbrec_bfd *iter, *nb_bt = NULL;
> +
> +            NBREC_BFD_FOR_EACH (iter, ctx->idl) {
> +                struct in6_addr dst_ipv6;
> +                bool is_dst_v6 = ipv6_parse(iter->dst_ip, &dst_ipv6);
> +
> +                if (is_nexthop_v6 ^ is_dst_v6) {
> +                    continue;
> +                }
> +
> +                /* match endpoint ip. */
> +                if ((is_nexthop_v6 &&
> +                     !ipv6_addr_equals(&nexthop_v6, &dst_ipv6)) ||
> +                    strcmp(iter->dst_ip, nexthops[i])) {
> +                    continue;
> +                }
> +
> +                /* match outport. */
> +                if (strcmp(iter->logical_port, lr->ports[j]->name)) {
> +                    continue;
> +                }
> +
> +                nb_bt = iter;
> +                break;
> +            }
> +
> +            /* Create the BFD session if it does not already exist. */
> +            if (!nb_bt) {
> +                nb_bt = nbrec_bfd_insert(ctx->txn);
> +                nbrec_bfd_set_dst_ip(nb_bt, nexthops[i]);
> +                nbrec_bfd_set_logical_port(nb_bt, lr->ports[j]->name);
> +            }
> +
> +            for (j = 0; j < n_bfd_sessions; j++) {
> +                if (bfd_sessions[j] == nb_bt) {
> +                    break;
> +                }
> +            }
> +            if (j == n_bfd_sessions) {
> +                bfd_sessions[n_bfd_sessions++] = nb_bt;
> +            }
> +        }
> +        nbrec_logical_router_policy_set_bfd_sessions(
> +                policy, (struct nbrec_bfd **) bfd_sessions, n_bfd_sessions);
> +    }
> +
> +free_nexthops:
> +    free(bfd_sessions);
>       for (i = 0; i < n_nexthops; i++) {
>           free(nexthops[i]);
>       }
> @@ -4282,8 +4369,11 @@ print_routing_policy(const struct nbrec_logical_router_policy *policy,
>                         policy->match, policy->action);
>       }
>   
> -    if (!smap_is_empty(&policy->options)) {
> +    if (!smap_is_empty(&policy->options) || policy->n_bfd_sessions) {
>           ds_put_format(s, "%15s", "");
> +        if (policy->n_bfd_sessions) {
> +            ds_put_cstr(s, "bfd,");
> +        }
>           struct smap_node *node;
>           SMAP_FOR_EACH (node, &policy->options) {
>               ds_put_format(s, "%s=%s,", node->key, node->value);
> @@ -4305,6 +4395,8 @@ nbctl_pre_lr_policy_list(struct ctl_context *ctx)
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_nexthops);
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_action);
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_options);
> +    ovsdb_idl_add_column(ctx->idl,
> +                         &nbrec_logical_router_policy_col_bfd_sessions);
>   }
>   
>   static void
> @@ -7900,7 +7992,8 @@ static const struct ctl_command_syntax nbctl_commands[] = {
>       /* Policy commands */
>       { "lr-policy-add", 4, INT_MAX,
>        "ROUTER PRIORITY MATCH ACTION [NEXTHOP] [OPTIONS - KEY=VALUE ...]",
> -     nbctl_pre_lr_policy_add, nbctl_lr_policy_add, NULL, "--may-exist", RW },
> +     nbctl_pre_lr_policy_add, nbctl_lr_policy_add, NULL, "--may-exist,--bfd?",
> +     RW },
>       { "lr-policy-del", 1, 3, "ROUTER [{PRIORITY | UUID} [MATCH]]",
>         nbctl_pre_lr_policy_del, nbctl_lr_policy_del, NULL, "--if-exists", RW },
>       { "lr-policy-list", 1, 1, "ROUTER", nbctl_pre_lr_policy_list,
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 6553bd078..d4227ac99 100644
--- a/NEWS
+++ b/NEWS
@@ -18,6 +18,7 @@  Post v23.09.0
   - Support selecting encapsulation IP based on the source/destination VIF's
     settting. See ovn-controller(8) 'external_ids:ovn-encap-ip' for more
     details.
+  - Introduce next-hop BFD availability check for OVN reroute policies.
 
 OVN v23.09.0 - 15 Sep 2023
 --------------------------
diff --git a/northd/northd.c b/northd/northd.c
index d2091d4bc..ae50e4ff9 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -10991,10 +10991,63 @@  get_outport_for_routing_policy_nexthop(struct ovn_datapath *od,
     return NULL;
 }
 
+static struct ovs_mutex bfd_lock = OVS_MUTEX_INITIALIZER;
+
+static bool check_bfd_state(
+        const struct nbrec_logical_router_policy *rule,
+        const struct hmap *bfd_connections,
+        struct ovn_port *out_port,
+        const char *nexthop)
+{
+    struct in6_addr nexthop_v6;
+    bool is_nexthop_v6 = ipv6_parse(nexthop, &nexthop_v6);
+    bool ret = true;
+
+    for (size_t i = 0; i < rule->n_bfd_sessions; i++) {
+        /* Check if there is a BFD session associated to the reroute
+         * policy. */
+        const struct nbrec_bfd *nb_bt = rule->bfd_sessions[i];
+        struct in6_addr dst_ipv6;
+        bool is_dst_v6 = ipv6_parse(nb_bt->dst_ip, &dst_ipv6);
+
+        if (is_nexthop_v6 ^ is_dst_v6) {
+            continue;
+        }
+
+        if ((is_nexthop_v6 && !ipv6_addr_equals(&nexthop_v6, &dst_ipv6)) ||
+            strcmp(nb_bt->dst_ip, nexthop)) {
+            continue;
+        }
+
+        if (strcmp(nb_bt->logical_port, out_port->key)) {
+            continue;
+        }
+
+        struct bfd_entry *bfd_e = bfd_port_lookup(bfd_connections,
+                                                  nb_bt->logical_port,
+                                                  nb_bt->dst_ip);
+        ovs_mutex_lock(&bfd_lock);
+        if (bfd_e) {
+            bfd_e->ref = true;
+        }
+
+        if (!strcmp(nb_bt->status, "admin_down")) {
+            nbrec_bfd_set_status(nb_bt, "down");
+        }
+
+        ret = strcmp(nb_bt->status, "down");
+        ovs_mutex_unlock(&bfd_lock);
+        break;
+    }
+
+    return ret;
+}
+
 static void
 build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
                           const struct hmap *lr_ports,
                           const struct nbrec_logical_router_policy *rule,
+                          const struct hmap *bfd_connections,
                           const struct ovsdb_idl_row *stage_hint)
 {
     struct ds match = DS_EMPTY_INITIALIZER;
@@ -11019,6 +11072,11 @@  build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
                          rule->priority, nexthop);
             return;
         }
+
+        if (!check_bfd_state(rule, bfd_connections, out_port, nexthop)) {
+            return;
+        }
+
         uint32_t pkt_mark = smap_get_uint(&rule->options, "pkt_mark", 0);
         if (pkt_mark) {
             ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
@@ -11060,6 +11118,7 @@  static void
 build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
                                 const struct hmap *lr_ports,
                                 const struct nbrec_logical_router_policy *rule,
+                                const struct hmap *bfd_connections,
                                 uint16_t ecmp_group_id)
 {
     ovs_assert(rule->n_nexthops > 1);
@@ -11088,6 +11147,9 @@  build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
     struct ds match = DS_EMPTY_INITIALIZER;
     struct ds actions = DS_EMPTY_INITIALIZER;
 
+    size_t *valid_nexthops = xcalloc(rule->n_nexthops, sizeof *valid_nexthops);
+    size_t n_valid_nexthops = 0;
+
     for (size_t i = 0; i < rule->n_nexthops; i++) {
         struct ovn_port *out_port = get_outport_for_routing_policy_nexthop(
              od, lr_ports, rule->priority, rule->nexthops[i]);
@@ -11105,6 +11167,13 @@  build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
             goto cleanup;
         }
 
+        if (!check_bfd_state(rule, bfd_connections, out_port,
+                             rule->nexthops[i])) {
+            continue;
+        }
+
+        valid_nexthops[n_valid_nexthops++] = i + 1;
+
         ds_clear(&actions);
         uint32_t pkt_mark = smap_get_uint(&rule->options, "pkt_mark", 0);
         if (pkt_mark) {
@@ -11140,12 +11209,12 @@  build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
                   "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id,
                   REG_ECMP_MEMBER_ID);
 
-    for (size_t i = 0; i < rule->n_nexthops; i++) {
+    for (size_t i = 0; i < n_valid_nexthops; i++) {
         if (i > 0) {
             ds_put_cstr(&actions, ", ");
         }
 
-        ds_put_format(&actions, "%"PRIuSIZE, i + 1);
+        ds_put_format(&actions, "%"PRIuSIZE, valid_nexthops[i]);
     }
     ds_put_cstr(&actions, ");");
     ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY,
@@ -11153,6 +11222,7 @@  build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
                             ds_cstr(&actions), &rule->header_);
 
 cleanup:
+    free(valid_nexthops);
     ds_destroy(&match);
     ds_destroy(&actions);
 }
@@ -11236,8 +11306,6 @@  route_hash(struct parsed_route *route)
                       (uint32_t)route->plen);
 }
 
-static struct ovs_mutex bfd_lock = OVS_MUTEX_INITIALIZER;
-
 static bool
 find_static_route_outport(struct ovn_datapath *od, const struct hmap *lr_ports,
     const struct nbrec_logical_router_static_route *route, bool is_ipv4,
@@ -13856,7 +13924,8 @@  build_mcast_lookup_flows_for_lrouter(
 static void
 build_ingress_policy_flows_for_lrouter(
         struct ovn_datapath *od, struct hmap *lflows,
-        const struct hmap *lr_ports)
+        const struct hmap *lr_ports,
+        const struct hmap *bfd_connections)
 {
     ovs_assert(od->nbr);
     /* This is a catch-all rule. It has the lowest priority (0)
@@ -13877,11 +13946,11 @@  build_ingress_policy_flows_for_lrouter(
 
         if (is_ecmp_reroute) {
             build_ecmp_routing_policy_flows(lflows, od, lr_ports, rule,
-                                            ecmp_group_id);
+                                            bfd_connections, ecmp_group_id);
             ecmp_group_id++;
         } else {
             build_routing_policy_flow(lflows, od, lr_ports, rule,
-                                      &rule->header_);
+                                      bfd_connections, &rule->header_);
         }
     }
 }
@@ -16316,7 +16385,8 @@  build_lswitch_and_lrouter_iterate_by_lr(struct ovn_datapath *od,
                                          lsi->bfd_connections);
     build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match,
                                          &lsi->actions);
-    build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports);
+    build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports,
+                                           lsi->bfd_connections);
     build_arp_resolve_flows_for_lrouter(od, lsi->lflows);
     build_check_pkt_len_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports,
                                           &lsi->match, &lsi->actions,
diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
index b2e0993e0..cec02a172 100644
--- a/ovn-nb.ovsschema
+++ b/ovn-nb.ovsschema
@@ -1,7 +1,7 @@ 
 {
     "name": "OVN_Northbound",
-    "version": "7.2.0",
-    "cksum": "1069338687 34162",
+    "version": "7.2.1",
+    "cksum": "4156161406 34467",
     "tables": {
         "NB_Global": {
             "columns": {
@@ -477,6 +477,11 @@ 
                 "nexthop": {"type": {"key": "string", "min": 0, "max": 1}},
                 "nexthops": {"type": {
                     "key": "string", "min": 0, "max": "unlimited"}},
+                "bfd_sessions": {"type": {"key": {"type": "uuid",
+                                                  "refTable": "BFD",
+                                                  "refType": "weak"},
+                                  "min": 0,
+                                  "max": "unlimited"}},
                 "options": {
                     "type": {"key": "string", "value": "string",
                              "min": 0, "max": "unlimited"}},
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 765ffcf2e..d35c34517 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -3682,6 +3682,13 @@  or
       </p>
     </column>
 
+    <column name="bfd_sessions">
+      <p>
+        Reference to <ref table="BFD"/> row if the route policy has associated
+        some BFD sessions.
+      </p>
+    </column>
+
     <column name="options" key="pkt_mark">
       <p>
         Marks the packet with the value specified when the router policy
diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at
index 7f37d7716..b15b5cfd3 100644
--- a/tests/ovn-nbctl.at
+++ b/tests/ovn-nbctl.at
@@ -2189,6 +2189,12 @@  AT_CHECK([ovn-nbctl lr-policy-add lr0 100 "ip4.src == 1.1.2.0/24" allow pkt_mark
 AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip4.src == 2.1.1.0/24" allow])
 AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip4.src == 2.1.2.0/24" drop])
 AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip6.src == 2002::/64" drop])
+AT_CHECK([ovn-nbctl --bfd lr-policy-add lr0 103 "ip4.src == 1.2.3.0/24" reroute 192.168.1.1], [1], [],
+  [ovn-nbctl: out lrp not found for 192.168.1.1 nexthop
+])
+AT_CHECK([ovn-nbctl --bfd lr-policy-add lr0 103 "ip4.src == 1.2.3.0/24" drop], [1], [],
+  [ovn-nbctl: BFD is valid only with reroute action.
+])
 
 dnl Incomplete option set.
 AT_CHECK([ovn-nbctl lr-policy-add lr0 200 "ip4.src == 1.1.4.0/24" reroute 192.168.0.10 foo], [1], [],
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 67e81ddba..5407bd381 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -3624,7 +3624,7 @@  AT_KEYWORDS([northd-bfd])
 ovn_start
 
 check ovn-nbctl --wait=sb lr-add r0
-for i in $(seq 1 7); do
+for i in $(seq 1 9); do
     check ovn-nbctl --wait=sb lrp-add r0 r0-sw$i 00:00:00:00:00:0$i 192.168.$i.1/24
     check ovn-nbctl --wait=sb ls-add sw$i
     check ovn-nbctl --wait=sb lsp-add sw$i sw$i-r0
@@ -3688,6 +3688,24 @@  bfd2_uuid=$(fetch_column bfd _uuid logical_port=r0-sw2)
 check ovn-sbctl set bfd $bfd2_uuid status=up
 wait_column up nb:bfd status logical_port=r0-sw2
 
+# Create reroute policies associated with BFD sessions
+check ovn-nbctl lr-route-del r0
+check ovn-nbctl --bfd lr-policy-add r0 100 "ip4.src == 1.2.3.0/24" reroute 192.168.8.2
+wait_column down bfd status logical_port=r0-sw8
+
+bfd_route_policy_uuid=$(fetch_column nb:bfd _uuid logical_port=r0-sw8)
+AT_CHECK([ovn-nbctl list logical_router_policy | grep -q $bfd_route_policy_uuid])
+
+check ovn-nbctl lr-policy-del r0
+check ovn-nbctl --bfd lr-policy-add r0 100 "ip4.src == 2.3.4.0/24" reroute 192.168.9.2,192.168.9.3,192.168.9.4
+
+wait_column down bfd status dst_ip=192.168.9.2
+wait_column down bfd status dst_ip=192.168.9.3
+wait_column down bfd status dst_ip=192.168.9.4
+
+bfd_route_policy_uuid=$(fetch_column nb:bfd _uuid logical_port=r0-sw9)
+AT_CHECK([ovn-nbctl list logical_router_policy | sed s/,//g | grep -q "$bfd_route_policy_uuid"])
+
 AT_CLEANUP
 ])
 
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 9ecb29cdb..8e3acd57e 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -6837,6 +6837,15 @@  OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst ==
 check ovn-nbctl clear logical_router_static_route $route_uuid bfd
 wait_column "admin_down" nb:bfd status logical_port=rp-public
 OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
+
+check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 200.0.0.0/8" reroute 172.16.1.50
+wait_column "up" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep -q 172.16.1.50])
+
+check ovn-nbctl lr-policy-del R1
+wait_column "admin_down" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
+
 NS_CHECK_EXEC([server], [tcpdump -nni s1 udp port 3784 -Q in > bfd.pcap &])
 sleep 5
 kill $(pidof tcpdump)
@@ -6844,25 +6853,57 @@  AT_CHECK([grep -qi bfd bfd.pcap],[1])
 
 # restart the connection
 check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid
+check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 200.0.0.0/8" reroute 172.16.1.50
 wait_column "up" nb:bfd status logical_port=rp-public
 
+OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep -q 172.16.1.50])
+OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep -q 172.16.1.50])
+
+# stop bfd endpoint
+NS_CHECK_EXEC([server], [bfdd-control stop], [0], [dnl
+stopping
+])
+wait_column "down" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep 172.16.1.50)" = ""])
+OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep 172.16.1.50)" = ""])
+
 # switch to gw router configuration
 check ovn-nbctl clear logical_router_static_route $route_uuid bfd
-wait_column "admin_down" nb:bfd status logical_port=rp-public
-OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
+check ovn-nbctl lr-policy-del R1
 check ovn-nbctl clear logical_router_port rp-public gateway_chassis
 check ovn-nbctl set logical_router R1 options:chassis=hv1
 check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid
+
+# restart bfdd
+NS_CHECK_EXEC([server], [bfdd-beacon --listen=172.16.1.50], [0])
+NS_CHECK_EXEC([server], [bfdd-control allow 172.16.1.1], [0], [dnl
+Allowing connections from 172.16.1.1
+])
+
 wait_column "up" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep -q 172.16.1.50])
+
+check ovn-nbctl clear logical_router_static_route $route_uuid bfd
+wait_column "admin_down" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
+ovn-nbctl destroy bfd $uuid
+check_row_count bfd 0
+
+# create reroute route policy
+check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 210.0.0.0/8" reroute 172.16.1.50
+wait_column "up" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 210.0.0.0/8' |grep -q 172.16.1.50])
+
+check ovn-nbctl lr-policy-del R1
+wait_column "admin_down" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
+uuid=$(fetch_column nb:bfd _uuid logical_port="rp-public")
 
 # stop bfd endpoint
 NS_CHECK_EXEC([server], [bfdd-control stop], [0], [dnl
 stopping
 ])
 
-wait_column "down" nb:bfd status logical_port=rp-public
-OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep 172.16.1.50)" = ""])
-
 # remove bfd entry
 ovn-nbctl destroy bfd $uuid
 check_row_count bfd 0
diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml
index 6f74bd557..5aa01f2f8 100644
--- a/utilities/ovn-nbctl.8.xml
+++ b/utilities/ovn-nbctl.8.xml
@@ -1119,6 +1119,14 @@ 
           duplicated routing policy results in error.
         </p>
 
+        <p>
+          <code>--bfd</code> option is used to link a BFD session to the
+          OVN reroute policy. OVN will look for an already running BFD
+          session using next-hop as lookup key in the BFD table.
+          If the lookup fails, a new entry in the BFD table will be created
+          using the <var>nexthop</var> as <var>dst_ip</var>.
+        </p>
+
           <p>
           The following example shows a policy to lr1, which will drop packets
           from<code>192.168.100.0/24</code>.
diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
index 0586eccdb..5db1fc136 100644
--- a/utilities/ovn-nbctl.c
+++ b/utilities/ovn-nbctl.c
@@ -4014,14 +4014,26 @@  normalize_addr_str(const char *orig_addr)
     return ret;
 }
 
+static bool
+ip_in_lrp_networks(const struct nbrec_logical_router_port *lrp,
+                   const char *ip_s);
+
 static void
 nbctl_pre_lr_policy_add(struct ctl_context *ctx)
 {
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_name);
+    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_ports);
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_policies);
 
+    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_name);
+    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_mac);
+    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_networks);
+
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_priority);
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_match);
+
+    ovsdb_idl_add_column(ctx->idl, &nbrec_bfd_col_dst_ip);
+    ovsdb_idl_add_column(ctx->idl, &nbrec_bfd_col_logical_port);
 }
 
 static void
@@ -4158,6 +4170,81 @@  nbctl_lr_policy_add(struct ctl_context *ctx)
 
     nbrec_logical_router_update_policies_addvalue(lr, policy);
 
+    struct shash_node *bfd = shash_find(&ctx->options, "--bfd");
+    const struct nbrec_bfd **bfd_sessions = NULL;
+
+    if (bfd) {
+        if (!reroute) {
+            ctl_error(ctx, "BFD is valid only with reroute action.");
+            goto free_nexthops;
+        }
+
+        bfd_sessions = xcalloc(n_nexthops, sizeof *bfd_sessions);
+        size_t j, n_bfd_sessions = 0;
+
+        for (i = 0; i < n_nexthops; i++) {
+            for (j = 0; j < lr->n_ports; j++) {
+                if (ip_in_lrp_networks(lr->ports[j], nexthops[i])) {
+                    break;
+                }
+            }
+
+            if (j == lr->n_ports) {
+                ctl_error(ctx, "out lrp not found for %s nexthop",
+                          nexthops[i]);
+                goto free_nexthops;
+            }
+
+            struct in6_addr nexthop_v6;
+            bool is_nexthop_v6 = ipv6_parse(nexthops[i], &nexthop_v6);
+            const struct nbrec_bfd *iter, *nb_bt = NULL;
+
+            NBREC_BFD_FOR_EACH (iter, ctx->idl) {
+                struct in6_addr dst_ipv6;
+                bool is_dst_v6 = ipv6_parse(iter->dst_ip, &dst_ipv6);
+
+                if (is_nexthop_v6 ^ is_dst_v6) {
+                    continue;
+                }
+
+                /* match endpoint ip. */
+                if ((is_nexthop_v6 &&
+                     !ipv6_addr_equals(&nexthop_v6, &dst_ipv6)) ||
+                    strcmp(iter->dst_ip, nexthops[i])) {
+                    continue;
+                }
+
+                /* match outport. */
+                if (strcmp(iter->logical_port, lr->ports[j]->name)) {
+                    continue;
+                }
+
+                nb_bt = iter;
+                break;
+            }
+
+            /* Create the BFD session if it does not already exist. */
+            if (!nb_bt) {
+                nb_bt = nbrec_bfd_insert(ctx->txn);
+                nbrec_bfd_set_dst_ip(nb_bt, nexthops[i]);
+                nbrec_bfd_set_logical_port(nb_bt, lr->ports[j]->name);
+            }
+
+            for (j = 0; j < n_bfd_sessions; j++) {
+                if (bfd_sessions[j] == nb_bt) {
+                    break;
+                }
+            }
+            if (j == n_bfd_sessions) {
+                bfd_sessions[n_bfd_sessions++] = nb_bt;
+            }
+        }
+        nbrec_logical_router_policy_set_bfd_sessions(
+                policy, (struct nbrec_bfd **) bfd_sessions, n_bfd_sessions);
+    }
+
+free_nexthops:
+    free(bfd_sessions);
     for (i = 0; i < n_nexthops; i++) {
         free(nexthops[i]);
     }
@@ -4282,8 +4369,11 @@  print_routing_policy(const struct nbrec_logical_router_policy *policy,
                       policy->match, policy->action);
     }
 
-    if (!smap_is_empty(&policy->options)) {
+    if (!smap_is_empty(&policy->options) || policy->n_bfd_sessions) {
         ds_put_format(s, "%15s", "");
+        if (policy->n_bfd_sessions) {
+            ds_put_cstr(s, "bfd,");
+        }
         struct smap_node *node;
         SMAP_FOR_EACH (node, &policy->options) {
             ds_put_format(s, "%s=%s,", node->key, node->value);
@@ -4305,6 +4395,8 @@  nbctl_pre_lr_policy_list(struct ctl_context *ctx)
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_nexthops);
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_action);
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_options);
+    ovsdb_idl_add_column(ctx->idl,
+                         &nbrec_logical_router_policy_col_bfd_sessions);
 }
 
 static void
@@ -7900,7 +7992,8 @@  static const struct ctl_command_syntax nbctl_commands[] = {
     /* Policy commands */
     { "lr-policy-add", 4, INT_MAX,
      "ROUTER PRIORITY MATCH ACTION [NEXTHOP] [OPTIONS - KEY=VALUE ...]",
-     nbctl_pre_lr_policy_add, nbctl_lr_policy_add, NULL, "--may-exist", RW },
+     nbctl_pre_lr_policy_add, nbctl_lr_policy_add, NULL, "--may-exist,--bfd?",
+     RW },
     { "lr-policy-del", 1, 3, "ROUTER [{PRIORITY | UUID} [MATCH]]",
       nbctl_pre_lr_policy_del, nbctl_lr_policy_del, NULL, "--if-exists", RW },
     { "lr-policy-list", 1, 1, "ROUTER", nbctl_pre_lr_policy_list,