diff mbox series

[ovs-dev,v6,5/5] Implement RARP activation strategy for ports

Message ID 20220505133823.1857072-6-ihrachys@redhat.com
State Changes Requested
Headers show
Series Support multiple requested-chassis | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Ihar Hrachyshka May 5, 2022, 1:38 p.m. UTC
When options:activation-strategy is set to "rarp" for LSP, when used in
combination with multiple chassis names listed in
options:requested-chassis, additional chassis will install special flows
that would block all ingress and egress traffic for the port until a
special activation event happens.

For "rarp" strategy, an observation of a RARP packet sent from the port
on the additional chassis is such an event. When it occurs, a special
flow passes control to a controller() action handler that removes the
installed blocking flows and also marks the port as
options:additional-chassis-activated in southbound db. Once vswitchd
processes the flow mod request, the port is ready to communicate from
a new location.

This feature is useful in live migration scenarios where it's not
advisable to unlock the destination port location prematurily to avoid
duplicate packets originating from the port.

Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
---
 NEWS                  |   2 +
 controller/physical.c |  79 +++++++++++++++
 controller/pinctrl.c  | 226 +++++++++++++++++++++++++++++++++++++++++-
 controller/pinctrl.h  |   5 +
 include/ovn/actions.h |   9 ++
 lib/actions.c         |  40 +++++++-
 northd/northd.c       |  10 ++
 northd/ovn-northd.c   |   5 +-
 ovn-nb.xml            |  11 ++
 ovn-sb.xml            |  15 +++
 tests/ovn.at          | 166 +++++++++++++++++++++++++++++++
 utilities/ovn-trace.c |   3 +
 12 files changed, 567 insertions(+), 4 deletions(-)

Comments

Han Zhou May 13, 2022, 6:36 a.m. UTC | #1
On Thu, May 5, 2022 at 6:38 AM Ihar Hrachyshka <ihrachys@redhat.com> wrote:
>
> When options:activation-strategy is set to "rarp" for LSP, when used in
> combination with multiple chassis names listed in
> options:requested-chassis, additional chassis will install special flows
> that would block all ingress and egress traffic for the port until a
> special activation event happens.
>
> For "rarp" strategy, an observation of a RARP packet sent from the port
> on the additional chassis is such an event. When it occurs, a special
> flow passes control to a controller() action handler that removes the
> installed blocking flows and also marks the port as
> options:additional-chassis-activated in southbound db. Once vswitchd
> processes the flow mod request, the port is ready to communicate from
> a new location.
>
> This feature is useful in live migration scenarios where it's not
> advisable to unlock the destination port location prematurily to avoid
> duplicate packets originating from the port.
>
> Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
> ---
>  NEWS                  |   2 +
>  controller/physical.c |  79 +++++++++++++++
>  controller/pinctrl.c  | 226 +++++++++++++++++++++++++++++++++++++++++-
>  controller/pinctrl.h  |   5 +
>  include/ovn/actions.h |   9 ++
>  lib/actions.c         |  40 +++++++-
>  northd/northd.c       |  10 ++
>  northd/ovn-northd.c   |   5 +-
>  ovn-nb.xml            |  11 ++
>  ovn-sb.xml            |  15 +++
>  tests/ovn.at          | 166 +++++++++++++++++++++++++++++++
>  utilities/ovn-trace.c |   3 +
>  12 files changed, 567 insertions(+), 4 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 3fedcfeed..b5dbda89c 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -8,6 +8,8 @@ Post v22.03.0
>    - Add global option (NB_Global.options:default_acl_drop) to enable
>      implicit drop behavior on logical switches with ACLs applied.
>    - Support list of chassis for
Logical_Switch_Port:options:requested-chassis.
> +  - Support Logical_Switch_Port:options:activation-strategy for live
migration
> +    scenarios.
>
>  OVN v22.03.0 - 11 Mar 2022
>  --------------------------
> diff --git a/controller/physical.c b/controller/physical.c
> index 6399d1fce..ce0ff24ba 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -40,7 +40,9 @@
>  #include "lib/mcast-group-index.h"
>  #include "lib/ovn-sb-idl.h"
>  #include "lib/ovn-util.h"
> +#include "ovn/actions.h"
>  #include "physical.h"
> +#include "pinctrl.h"
>  #include "openvswitch/shash.h"
>  #include "simap.h"
>  #include "smap.h"
> @@ -1030,6 +1032,59 @@ get_additional_tunnels(const struct
sbrec_port_binding *binding,
>      return additional_tunnels;
>  }
>
> +static void
> +setup_rarp_activation_strategy(const struct sbrec_port_binding *binding,
> +                               ofp_port_t ofport, struct zone_ids
*zone_ids,
> +                               struct ovn_desired_flow_table *flow_table,
> +                               struct ofpbuf *ofpacts_p)
> +{
> +    struct match match = MATCH_CATCHALL_INITIALIZER;
> +
> +    /* Unblock the port on ingress RARP. */
> +    match_set_dl_type(&match, htons(ETH_TYPE_RARP));
> +    match_set_in_port(&match, ofport);
> +    ofpbuf_clear(ofpacts_p);
> +
> +    load_logical_ingress_metadata(binding, zone_ids, ofpacts_p);
> +
> +    size_t ofs = ofpacts_p->size;
> +    struct ofpact_controller *oc = ofpact_put_CONTROLLER(ofpacts_p);
> +    oc->max_len = UINT16_MAX;
> +    oc->reason = OFPR_ACTION;
> +
> +    struct action_header ah = {
> +        .opcode = htonl(ACTION_OPCODE_ACTIVATION_STRATEGY_RARP)
> +    };
> +    ofpbuf_put(ofpacts_p, &ah, sizeof ah);
> +
> +    ofpacts_p->header = oc;
> +    oc->userdata_len = ofpacts_p->size - (ofs + sizeof *oc);
> +    ofpact_finish_CONTROLLER(ofpacts_p, &oc);
> +
> +    ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 1010,
> +                    binding->header_.uuid.parts[0],
> +                    &match, ofpacts_p, &binding->header_.uuid);
> +    ofpbuf_clear(ofpacts_p);
> +
> +    /* Block all non-RARP traffic for the port, both directions. */
> +    match_init_catchall(&match);
> +    match_set_in_port(&match, ofport);
> +
> +    ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 1000,
> +                    binding->header_.uuid.parts[0],
> +                    &match, ofpacts_p, &binding->header_.uuid);
> +
> +    match_init_catchall(&match);
> +    uint32_t dp_key = binding->datapath->tunnel_key;
> +    uint32_t port_key = binding->tunnel_key;
> +    match_set_metadata(&match, htonll(dp_key));
> +    match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);

What if another LSP on the same chassis (an additional chassis of the
migrating LSP) wants to send a packet to the migrating LSP on the main
chassis? Would it be blocked, too?
In addition, this module (physical.c) is better to add flows to physical
pipelines only. Shall we instead add a flow to LOG_TO_PHY?

> +
> +    ofctrl_add_flow(flow_table, OFTABLE_LOG_EGRESS_PIPELINE, 1000,
> +                    binding->header_.uuid.parts[0],
> +                    &match, ofpacts_p, &binding->header_.uuid);
> +}
> +
>  static void
>  consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>                        enum mf_field_id mff_ovn_geneve,
> @@ -1307,6 +1362,30 @@ consider_port_binding(struct ovsdb_idl_index
*sbrec_port_binding_by_name,
>              }
>          }
>
> +        for (int i = 0; i < binding->n_additional_chassis; i++) {
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1,
1);
> +            if (binding->additional_chassis[i] == chassis) {
> +                const char *strategy = smap_get(&binding->options,
> +                                                "activation-strategy");
> +                if (strategy
> +                        && !db_is_port_activated(binding, chassis)
> +                        && !pinctrl_is_port_activated(dp_key, port_key))
{
> +                    if (!strcmp(strategy, "rarp")) {
> +                        setup_rarp_activation_strategy(binding, ofport,
> +                                                       &zone_ids,
flow_table,
> +                                                       ofpacts_p);
> +                    } else {
> +                        VLOG_WARN_RL(&rl,
> +                                     "Unknown activation strategy
defined for "
> +                                     "port %s: %s",
> +                                     binding->logical_port, strategy);
> +                        goto out;
> +                    }
> +                }
> +            break;
> +            }
> +        }
> +
>          /* Remember the size with just strip vlan added so far,
>           * as we're going to remove this with ofpbuf_pull() later. */
>          uint32_t ofpacts_orig_size = ofpacts_p->size;
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index df9284e50..2c31688a4 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -29,10 +29,12 @@
>  #include "lport.h"
>  #include "mac-learn.h"
>  #include "nx-match.h"
> +#include "ofctrl.h"
>  #include "latch.h"
>  #include "lib/packets.h"
>  #include "lib/sset.h"
>  #include "openvswitch/ofp-actions.h"
> +#include "openvswitch/ofp-flow.h"
>  #include "openvswitch/ofp-msgs.h"
>  #include "openvswitch/ofp-packet.h"
>  #include "openvswitch/ofp-print.h"
> @@ -152,8 +154,8 @@ VLOG_DEFINE_THIS_MODULE(pinctrl);
>   *  and pinctrl_run().
>   *  'pinctrl_handler_seq' is used by pinctrl_run() to
>   *  wake up pinctrl_handler thread from poll_block() if any changes
happened
> - *  in 'send_garp_rarp_data', 'ipv6_ras' and 'buffered_mac_bindings'
> - *  structures.
> + *  in 'send_garp_rarp_data', 'ipv6_ras', 'activated_ports' and
> + *  'buffered_mac_bindings' structures.
>   *
>   *  'pinctrl_main_seq' is used by pinctrl_handler() thread to wake up
>   *  the main thread from poll_block() when mac bindings/igmp groups need
to
> @@ -198,6 +200,20 @@ static void wait_put_mac_bindings(struct
ovsdb_idl_txn *ovnsb_idl_txn);
>  static void send_mac_binding_buffered_pkts(struct rconn *swconn)
>      OVS_REQUIRES(pinctrl_mutex);
>
> +static void pinctrl_rarp_activation_strategy_handler(struct rconn
*swconn,
> +                                                     const struct match
*md,
> +                                                     struct dp_packet
*pkt_in);
> +
> +static void init_activated_ports(void);
> +static void destroy_activated_ports(void);
> +static void wait_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn);
> +static void run_activated_ports(
> +    struct ovsdb_idl_txn *ovnsb_idl_txn,
> +    struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
> +    struct ovsdb_idl_index *sbrec_port_binding_by_name,
> +    const struct sbrec_chassis *chassis)
> +    OVS_REQUIRES(pinctrl_mutex);
> +
>  static void init_send_garps_rarps(void);
>  static void destroy_send_garps_rarps(void);
>  static void send_garp_rarp_wait(long long int send_garp_rarp_time);
> @@ -522,6 +538,7 @@ pinctrl_init(void)
>      init_ipv6_ras();
>      init_ipv6_prefixd();
>      init_buffered_packets_map();
> +    init_activated_ports();
>      init_event_table();
>      ip_mcast_snoop_init();
>      init_put_vport_bindings();
> @@ -3244,6 +3261,13 @@ process_packet_in(struct rconn *swconn, const
struct ofp_header *msg)
>          ovs_mutex_unlock(&pinctrl_mutex);
>          break;
>
> +    case ACTION_OPCODE_ACTIVATION_STRATEGY_RARP:
> +        ovs_mutex_lock(&pinctrl_mutex);
> +        pinctrl_rarp_activation_strategy_handler(swconn,
&pin.flow_metadata,
> +                                                 &packet);
> +        ovs_mutex_unlock(&pinctrl_mutex);
> +        break;
> +
>      default:
>          VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
>                       ntohl(ah->opcode));
> @@ -3508,6 +3532,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>      bfd_monitor_run(ovnsb_idl_txn, bfd_table, sbrec_port_binding_by_name,
>                      chassis, active_tunnels);
>      run_put_fdbs(ovnsb_idl_txn, sbrec_fdb_by_dp_key_mac);
> +    run_activated_ports(ovnsb_idl_txn, sbrec_datapath_binding_by_key,
> +                        sbrec_port_binding_by_key, chassis);
>      ovs_mutex_unlock(&pinctrl_mutex);
>  }
>
> @@ -4011,6 +4037,7 @@ pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn)
>      int64_t new_seq = seq_read(pinctrl_main_seq);
>      seq_wait(pinctrl_main_seq, new_seq);
>      wait_put_fdbs(ovnsb_idl_txn);
> +    wait_activated_ports(ovnsb_idl_txn);
>  }
>
>  /* Called by ovn-controller. */
> @@ -4025,6 +4052,7 @@ pinctrl_destroy(void)
>      destroy_ipv6_ras();
>      destroy_ipv6_prefixd();
>      destroy_buffered_packets_map();
> +    destroy_activated_ports();
>      event_table_destroy();
>      destroy_put_mac_bindings();
>      destroy_put_vport_bindings();
> @@ -7702,6 +7730,200 @@ pinctrl_handle_svc_check(struct rconn *swconn,
const struct flow *ip_flow,
>      }
>  }
>
> +static struct ofpbuf *
> +encode_flow_mod(struct ofputil_flow_mod *fm)
> +{
> +    fm->buffer_id = UINT32_MAX;
> +    fm->out_port = OFPP_ANY;
> +    fm->out_group = OFPG_ANY;
> +    return ofputil_encode_flow_mod(fm, OFPUTIL_P_OF15_OXM);
> +}
> +
> +struct port_pair {
> +    uint32_t dp_key;
> +    uint32_t port_key;
> +    struct ovs_list list;
> +};
> +
> +static struct ovs_list activated_ports;
> +
> +static void
> +init_activated_ports(void)
> +{
> +    ovs_list_init(&activated_ports);
> +}
> +
> +static void
> +destroy_activated_ports(void)
> +{
> +    struct port_pair *pp;
> +    LIST_FOR_EACH_POP (pp, list, &activated_ports) {
> +        free(pp);
> +    }
> +}
> +
> +static void
> +wait_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn)
> +{
> +    if (ovnsb_idl_txn && !ovs_list_is_empty(&activated_ports)) {
> +        poll_immediate_wake();
> +    }
> +}
> +
> +bool
> +db_is_port_activated(const struct sbrec_port_binding *pb,
> +                     const struct sbrec_chassis *chassis)
> +{
> +    const char *activated_chassis = smap_get(&pb->options,
> +
"additional-chassis-activated");
> +    if (activated_chassis) {
> +        char *save_ptr;
> +        char *tokstr = xstrdup(activated_chassis);
> +        for (const char *chassis_name = strtok_r(tokstr, ",", &save_ptr);
> +             chassis_name != NULL;
> +             chassis_name = strtok_r(NULL, ",", &save_ptr)) {
> +            if (!strcmp(chassis_name, chassis->name)) {
> +                free(tokstr);
> +                return true;
> +            }
> +        }
> +        free(tokstr);
> +    }
> +    return false;
> +}
> +
> +static void
> +run_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn,
> +                    struct ovsdb_idl_index
*sbrec_datapath_binding_by_key,
> +                    struct ovsdb_idl_index *sbrec_port_binding_by_key,
> +                    const struct sbrec_chassis *chassis)
> +    OVS_REQUIRES(pinctrl_mutex)
> +{
> +    if (!ovnsb_idl_txn) {
> +        return;
> +    }
> +
> +    const struct port_pair *pp;
> +    LIST_FOR_EACH (pp, list, &activated_ports) {
> +        const struct sbrec_port_binding *pb = lport_lookup_by_key(
> +            sbrec_datapath_binding_by_key, sbrec_port_binding_by_key,
> +            pp->dp_key, pp->port_key);
> +        if (pb) {
> +            if (!db_is_port_activated(pb, chassis)) {
> +                const char *activated_chassis = smap_get(
> +                    &pb->options, "additional-chassis-activated");
> +                char *activated_str;
> +                if (activated_chassis) {
> +                    activated_str = xasprintf(
> +                        "%s,%s", activated_chassis, chassis->name);
> +                    sbrec_port_binding_update_options_setkey(
> +                        pb, "additional-chassis-activated",
activated_str);
> +                    free(activated_str);
> +                } else {
> +                    sbrec_port_binding_update_options_setkey(
> +                        pb, "additional-chassis-activated",
chassis->name);
> +                }
> +            }
> +        }
> +    }
> +    destroy_activated_ports();

Do we really want to destroy it here? If it is destroyed, what's the
purpose of calling pinctrl_is_port_activated() in physical.c? Just to
remind that when this function ends, the SB DB is not updated yet.

> +}
> +
> +bool pinctrl_is_port_activated(int64_t dp_key, int64_t port_key)
> +    OVS_REQUIRES(pinctrl_mutex)
> +{
> +    const struct port_pair *pp;
> +    LIST_FOR_EACH (pp, list, &activated_ports) {
> +        if (pp->dp_key == dp_key && pp->port_key == port_key) {
> +            return true;
> +        }
> +    }
> +    return false;
> +}
> +
> +static void
> +pinctrl_rarp_activation_strategy_handler(struct rconn *swconn,
> +                                         const struct match *md,
> +                                         struct dp_packet *pkt_in)
> +    OVS_REQUIRES(pinctrl_mutex)
> +{
> +    /* Admitted; send RARP back to the pipeline. */
> +    uint32_t port_key = md->flow.regs[MFF_LOG_INPORT - MFF_REG0];
> +    uint32_t dp_key = ntohll(md->flow.metadata);
> +
> +    uint64_t ofpacts_stub[4096 / 8];
> +    struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> +    enum ofp_version version = rconn_get_version(swconn);
> +    put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
> +    put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
> +    struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
> +    resubmit->in_port = OFPP_CONTROLLER;
> +    resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
> +
> +    struct ofputil_packet_out po = {
> +        .packet = dp_packet_data(pkt_in),
> +        .packet_len = dp_packet_size(pkt_in),
> +        .buffer_id = UINT32_MAX,
> +        .ofpacts = ofpacts.data,
> +        .ofpacts_len = ofpacts.size,
> +    };
> +    match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> +
> +    enum ofputil_protocol proto;
> +    proto = ofputil_protocol_from_ofp_version(version);
> +    queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
> +    ofpbuf_uninit(&ofpacts);
> +
> +    /* Delete ingress/egress drop flows to unblock the port. */
> +    struct match match;
> +    struct minimatch mmatch;
> +
> +    match_init_catchall(&match);
> +    match_set_metadata(&match, md->flow.metadata);
> +    match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0,
> +                  md->flow.regs[MFF_LOG_INPORT - MFF_REG0]);
> +    minimatch_init(&mmatch, &match);
> +
> +    struct ofputil_flow_mod fm = {
> +        .match = mmatch,
> +        .priority = 1000,
> +        .table_id = OFTABLE_LOG_INGRESS_PIPELINE,

This table_id doesn't match the one added in physical.c
(OFTABLE_PHY_TO_LOG). (probably something missed in the test cases as well?)

> +        .command = OFPFC_DELETE_STRICT,
> +    };
> +    queue_msg(swconn, encode_flow_mod(&fm));
> +    minimatch_destroy(&mmatch);
> +
> +    match_init_catchall(&match);
> +    match_set_metadata(&match, md->flow.metadata);
> +    match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
> +                  md->flow.regs[MFF_LOG_INPORT - MFF_REG0]);
> +    minimatch_init(&mmatch, &match);
> +
> +    fm.table_id = OFTABLE_LOG_EGRESS_PIPELINE;
> +    queue_msg(swconn, encode_flow_mod(&fm));
> +    minimatch_destroy(&mmatch);
> +
> +    /* Delete inport controller flow (the one that got us here */
> +    match_init_catchall(&match);
> +    match_set_metadata(&match, md->flow.metadata);
> +    match_set_dl_type(&match, htons(ETH_TYPE_RARP));
> +    minimatch_init(&mmatch, &match);
> +
> +    fm.priority = 1010;
> +    fm.table_id = OFTABLE_PHY_TO_LOG;
> +    queue_msg(swconn, encode_flow_mod(&fm));
> +    minimatch_destroy(&mmatch);
> +
> +    /* Tag the port as activated in-memory. */
> +    struct port_pair *pp = xmalloc(sizeof *pp);
> +    pp->port_key = port_key;
> +    pp->dp_key = dp_key;
> +    ovs_list_push_front(&activated_ports, &pp->list);
> +
> +    /* Notify main thread on pending additional-chassis-activated
updates. */
> +    notify_pinctrl_main();
> +}
> +

As discussed in a previous OVN meeting, it is not good to delete OF flows
by the pinctrl thread.
Firstly, here it doesn't call ofctrl module, while physical module adds
flows by calling ofctrl, which not only modifies flows in OVS, but also
maintains "desired_flows" and "installed_flows". ofctrl module would never
know these flows are deleted already, and would send flow deletion for the
same flows again later. Maybe it is not harmful, but it would better to
have ofctrl handle all the flow operations to OVS.
Secondly, it is easier for code maintenance if we put the add/delete for
these flows closer in the same file, probably with wrapper functions to
avoid mismatch between add & delete.
I think we can pass the port-binding information to the main thread, and
trigger the deletion by
pinctrl_run()->run_activated_ports()->physical_handle_flows_for_lport().

In the meeting we also discussed adding it to the I-P engine, but with my
understanding of this feature I think it is ok to leave the deletion out of
the engine. Here is my consideration. The pinctrl module would update the
port-binding in SB DB, which is an input that would eventually lead to
correct flow generation by the I-P engine - this can be considered the
*official* flow generation, and will be correct even if ovn-controller is
restarted/recomputed. Now here in the pinctrl module, we just want to do it
faster before SB DB is updated, for performance reasons, so just think
about it as a short-cut, and it is eventually consistent with what I-P
engine would do.

>  static struct hmap put_fdbs;
>
>  /* MAC learning (fdb) related functions.  Runs within the main
> diff --git a/controller/pinctrl.h b/controller/pinctrl.h
> index 88f18e983..1704bc6ba 100644
> --- a/controller/pinctrl.h
> +++ b/controller/pinctrl.h
> @@ -33,6 +33,7 @@ struct sbrec_dns_table;
>  struct sbrec_controller_event_table;
>  struct sbrec_service_monitor_table;
>  struct sbrec_bfd_table;
> +struct sbrec_port_binding;
>
>  void pinctrl_init(void);
>  void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> @@ -56,4 +57,8 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>  void pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn);
>  void pinctrl_destroy(void);
>  void pinctrl_set_br_int_name(char *br_int_name);
> +
> +bool pinctrl_is_port_activated(int64_t dp_key, int64_t port_key);
> +bool db_is_port_activated(const struct sbrec_port_binding *pb,
> +                          const struct sbrec_chassis *chassis);
>  #endif /* controller/pinctrl.h */
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index f55d77d47..0a1865741 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -116,6 +116,7 @@ struct ovn_extend_table;
>      OVNACT(PUT_FDB,           ovnact_put_fdb)         \
>      OVNACT(GET_FDB,           ovnact_get_fdb)         \
>      OVNACT(LOOKUP_FDB,        ovnact_lookup_fdb)      \
> +    OVNACT(ACTIVATION_STRATEGY_RARP, ovnact_activation_strategy_rarp) \

I am confused why we need to add a logical flow action for this. I think it
is not used, and doesn't seem to be used in the future. All we need is just
an action-code that is used in pinctrl handler.

Thanks,
Han

>
>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>  enum OVS_PACKED_ENUM ovnact_type {
> @@ -420,6 +421,11 @@ struct ovnact_handle_svc_check {
>      struct expr_field port;     /* Logical port name. */
>  };
>
> +/* OVNACT_ACTIVATION_STRATEGY_RARP. */
> +struct ovnact_activation_strategy_rarp {
> +    struct ovnact ovnact;
> +};
> +
>  /* OVNACT_FWD_GROUP. */
>  struct ovnact_fwd_group {
>      struct ovnact ovnact;
> @@ -681,6 +687,9 @@ enum action_opcode {
>      /* put_fdb(inport, eth.src).
>       */
>      ACTION_OPCODE_PUT_FDB,
> +
> +    /* activation_strategy_rarp() */
> +    ACTION_OPCODE_ACTIVATION_STRATEGY_RARP,
>  };
>
>  /* Header. */
> diff --git a/lib/actions.c b/lib/actions.c
> index a3cf747db..4766b3227 100644
> --- a/lib/actions.c
> +++ b/lib/actions.c
> @@ -3822,6 +3822,41 @@ ovnact_handle_svc_check_free(struct
ovnact_handle_svc_check *sc OVS_UNUSED)
>  {
>  }
>
> +static void
> +parse_activation_strategy_rarp(struct action_context *ctx OVS_UNUSED)
> +{
> +     if (!lexer_force_match(ctx->lexer, LEX_T_LPAREN)) {
> +        return;
> +    }
> +
> +    ovnact_put_ACTIVATION_STRATEGY_RARP(ctx->ovnacts);
> +    lexer_force_match(ctx->lexer, LEX_T_RPAREN);
> +}
> +
> +static void
> +format_ACTIVATION_STRATEGY_RARP(
> +        const struct ovnact_activation_strategy_rarp *activation
OVS_UNUSED,
> +        struct ds *s)
> +{
> +    ds_put_cstr(s, "activation_strategy_rarp();");
> +}
> +
> +static void
> +encode_ACTIVATION_STRATEGY_RARP(
> +        const struct ovnact_activation_strategy_rarp *activation
OVS_UNUSED,
> +        const struct ovnact_encode_params *ep,
> +        struct ofpbuf *ofpacts)
> +{
> +    encode_controller_op(ACTION_OPCODE_ACTIVATION_STRATEGY_RARP,
> +                         ep->ctrl_meter_id, ofpacts);
> +}
> +
> +static void
> +ovnact_activation_strategy_rarp_free(
> +    struct ovnact_activation_strategy_rarp *activation OVS_UNUSED)
> +{
> +}
> +
>  static void
>  parse_fwd_group_action(struct action_context *ctx)
>  {
> @@ -4376,6 +4411,8 @@ parse_action(struct action_context *ctx)
>          parse_bind_vport(ctx);
>      } else if (lexer_match_id(ctx->lexer, "handle_svc_check")) {
>          parse_handle_svc_check(ctx);
> +    } else if (lexer_match_id(ctx->lexer, "activation_strategy_rarp")) {
> +        parse_activation_strategy_rarp(ctx);
>      } else if (lexer_match_id(ctx->lexer, "fwd_group")) {
>          parse_fwd_group_action(ctx);
>      } else if (lexer_match_id(ctx->lexer, "handle_dhcpv6_reply")) {
> @@ -4619,7 +4656,8 @@ ovnact_op_to_string(uint32_t ovnact_opc)
>          ACTION_OPCODE(BIND_VPORT)                   \
>          ACTION_OPCODE(DHCP6_SERVER)                 \
>          ACTION_OPCODE(HANDLE_SVC_CHECK)             \
> -        ACTION_OPCODE(BFD_MSG)
> +        ACTION_OPCODE(BFD_MSG)                      \
> +        ACTION_OPCODE(ACTIVATION_STRATEGY_RARP)
>  #define ACTION_OPCODE(ENUM) \
>      case ACTION_OPCODE_##ENUM: return xstrdup(#ENUM);
>      ACTION_OPCODES
> diff --git a/northd/northd.c b/northd/northd.c
> index 55919188e..4fb63edf8 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -3385,6 +3385,16 @@ ovn_port_update_sbrec(struct northd_input
*input_data,
>                  smap_add(&options, "vlan-passthru", "true");
>              }
>
> +            /* Retain activated chassis flags. */
> +            if (op->sb->requested_additional_chassis) {
> +                const char *activated_str = smap_get(
> +                    &op->sb->options, "additional-chassis-activated");
> +                if (activated_str) {
> +                    smap_add(&options, "additional-chassis-activated",
> +                             activated_str);
> +                }
> +            }
> +
>              sbrec_port_binding_set_options(op->sb, &options);
>              smap_destroy(&options);
>              if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 1cfc2024d..b9ae81acb 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -102,7 +102,10 @@ static const char *rbac_port_binding_auth[] =
>  static const char *rbac_port_binding_update[] =
>      {"chassis", "additional_chassis",
>       "encap", "additional_encap",
> -     "up", "virtual_parent"};
> +     "up", "virtual_parent",
> +     /* NOTE: we only need to update the additional-chassis-activated
key,
> +      * but RBAC_Role doesn't support mutate operation for subkeys. */
> +     "options"};
>
>  static const char *rbac_mac_binding_auth[] =
>      {""};
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index df17f0cbe..97600e2c2 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -1045,6 +1045,17 @@
>            </p>
>          </column>
>
> +        <column name="options" key="activation-strategy">
> +          If used with multiple chassis set in
> +          <ref column="requested-chassis"/>, specifies an activation
strategy
> +          for all additional chassis. By default, no activation strategy
is
> +          used, meaning additional port locations are immediately
available for
> +          use. When set to "rarp", the port is blocked for ingress and
egress
> +          communication until a RARP packet is sent from a new location.
The
> +          "rarp" strategy is useful in live migration scenarios for
virtual
> +          machines.
> +        </column>
> +
>          <column name="options" key="iface-id-ver">
>            If set, this port will be bound by <code>ovn-controller</code>
>            only if this same key and value is configured in the
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index 78db5b7c5..5a1fdbe72 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -3300,6 +3300,21 @@ tcp.flags = RST;
>          </p>
>        </column>
>
> +      <column name="options" key="activation-strategy">
> +        If used with multiple chassis set in <ref
column="requested-chassis"/>,
> +        specifies an activation strategy for all additional chassis. By
> +        default, no activation strategy is used, meaning additional port
> +        locations are immediately available for use. When set to "rarp",
the
> +        port is blocked for ingress and egress communication until a RARP
> +        packet is sent from a new location. The "rarp" strategy is useful
> +        in live migration scenarios for virtual machines.
> +      </column>
> +
> +      <column name="options" key="additional-chassis-activated">
> +        When <ref column="activation-strategy"/> is set, this option
indicates
> +        that the port was activated using the strategy specified.
> +      </column>
> +
>        <column name="options" key="iface-id-ver">
>          If set, this port will be bound by <code>ovn-controller</code>
>          only if this same key and value is configured in the
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 1154fdc05..92821d6d4 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -14699,6 +14699,172 @@ OVN_CLEANUP([hv1],[hv2],[hv3])
>  AT_CLEANUP
>  ])
>
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([options:activation-strategy for logical port])
> +ovn_start
> +
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.11
> +
> +sim_add hv2
> +as hv2
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.12
> +
> +sim_add hv3
> +as hv3
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.13
> +
> +# Disable local ARP responder to pass ARP requests through tunnels
> +check ovn-nbctl ls-add ls0 -- add Logical_Switch ls0 other_config
vlan-passthru=true
> +
> +check ovn-nbctl lsp-add ls0 migrator
> +check ovn-nbctl lsp-set-options migrator requested-chassis=hv1,hv2 \
> +                                         activation-strategy=rarp
> +
> +check ovn-nbctl lsp-add ls0 outside
> +check ovn-nbctl lsp-set-options outside requested-chassis=hv3
> +
> +check ovn-nbctl lsp-set-addresses migrator "00:00:00:00:00:01 10.0.0.1"
> +check ovn-nbctl lsp-set-addresses outside "00:00:00:00:00:02 10.0.0.2"
> +
> +for hv in hv1 hv2; do
> +    as $hv check ovs-vsctl -- add-port br-int migrator -- \
> +        set Interface migrator external-ids:iface-id=migrator \
> +                               options:tx_pcap=$hv/migrator-tx.pcap \
> +                               options:rxq_pcap=$hv/migrator-rx.pcap
> +done
> +as hv3 check ovs-vsctl -- add-port br-int outside -- \
> +    set Interface outside external-ids:iface-id=outside
> +
> +wait_row_count Chassis 1 name=hv1
> +wait_row_count Chassis 1 name=hv2
> +hv1_uuid=$(fetch_column Chassis _uuid name=hv1)
> +hv2_uuid=$(fetch_column Chassis _uuid name=hv2)
> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=migrator
> +wait_column "$hv1_uuid" Port_Binding requested_chassis
logical_port=migrator
> +wait_column "$hv2_uuid" Port_Binding additional_chassis
logical_port=migrator
> +wait_column "$hv2_uuid" Port_Binding requested_additional_chassis
logical_port=migrator
> +
> +OVN_POPULATE_ARP
> +
> +send_arp() {
> +    local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
> +    local
request=${eth_dst}${eth_src}08060001080006040001${eth_src}${spa}${eth_dst}${tpa}
> +    as ${hv} ovs-appctl netdev-dummy/receive $inport $request
> +    echo "${request}"
> +}
> +
> +send_rarp() {
> +    local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
> +    local
request=${eth_dst}${eth_src}80350001080006040001${eth_src}${spa}${eth_dst}${tpa}
> +    as ${hv} ovs-appctl netdev-dummy/receive $inport $request
> +    echo "${request}"
> +}
> +
> +reset_pcap_file() {
> +    local hv=$1
> +    local iface=$2
> +    local pcap_file=$3
> +    as $hv check ovs-vsctl -- set Interface $iface
options:tx_pcap=dummy-tx.pcap \
> +
options:rxq_pcap=dummy-rx.pcap
> +    check rm -f ${pcap_file}*.pcap
> +    as $hv check ovs-vsctl -- set Interface $iface
options:tx_pcap=${pcap_file}-tx.pcap \
> +
options:rxq_pcap=${pcap_file}-rx.pcap
> +}
> +
> +reset_env() {
> +    reset_pcap_file hv1 migrator hv1/migrator
> +    reset_pcap_file hv2 migrator hv2/migrator
> +    reset_pcap_file hv3 outside hv3/outside
> +
> +    for port in hv1/migrator hv2/migrator hv3/outside; do
> +        : > $port.expected
> +    done
> +}
> +
> +check_packets() {
> +    OVN_CHECK_PACKETS([hv1/migrator-tx.pcap], [hv1/migrator.expected])
> +    OVN_CHECK_PACKETS([hv2/migrator-tx.pcap], [hv2/migrator.expected])
> +    OVN_CHECK_PACKETS([hv3/outside-tx.pcap], [hv3/outside.expected])
> +}
> +
> +migrator_spa=$(ip_to_hex 10 0 0 1)
> +outside_spa=$(ip_to_hex 10 0 0 2)
> +
> +reset_env
> +
> +# Packet from hv3:Outside arrives to hv1:Migrator
> +# hv3:Outside cannot reach hv2:Migrator because it is blocked by RARP
strategy
> +request=$(send_arp hv3 outside 000000000002 000000000001 $outside_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +
> +check_packets
> +reset_env
> +
> +# Packet from hv1:Migrator arrives to hv3:Outside
> +request=$(send_arp hv1 migrator 000000000001 000000000002 $migrator_spa
$outside_spa)
> +echo $request >> hv3/outside.expected
> +
> +check_packets
> +reset_env
> +
> +# hv2:Migrator cannot reach to hv3:Outside because it is blocked by RARP
strategy
> +request=$(send_arp hv2 migrator 000000000001 000000000002 $migrator_spa
$outside_spa)
> +
> +check_packets
> +reset_env
> +
> +AT_CHECK([ovn-sbctl find port_binding logical_port=migrator | grep -q
additional-chassis-activated], [1])
> +
> +# Now activate hv2:Migrator location
> +request=$(send_rarp hv2 migrator 000000000001 ffffffffffff $migrator_spa
$migrator_spa)
> +
> +# RARP was reinjected into the pipeline
> +echo $request >> hv3/outside.expected
> +
> +check_packets
> +reset_env
> +
> +pb_uuid=$(ovn-sbctl --bare --columns _uuid find Port_Binding
logical_port=migrator)
> +OVS_WAIT_UNTIL([test xhv2 = x$(ovn-sbctl get Port_Binding $pb_uuid
options:additional-chassis-activated | tr -d '""')])
> +
> +# Now packet arrives to both locations
> +request=$(send_arp hv3 outside 000000000002 000000000001 $outside_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +echo $request >> hv2/migrator.expected
> +
> +check_packets
> +reset_env
> +
> +# Packet from hv1:Migrator still arrives to hv3:Outside
> +request=$(send_arp hv1 migrator 000000000001 000000000002 $migrator_spa
$outside_spa)
> +echo $request >> hv3/outside.expected
> +
> +check_packets
> +reset_env
> +
> +# hv2:Migrator can now reach to hv3:Outside because RARP strategy
activated it
> +request=$(send_arp hv2 migrator 000000000001 000000000002 $migrator_spa
$outside_spa)
> +echo $request >> hv3/outside.expected
> +
> +check_packets
> +
> +# complete port migration and check that -activated flag is reset
> +check ovn-nbctl lsp-set-options migrator requested-chassis=hv2
> +OVS_WAIT_UNTIL([test x = x$(ovn-sbctl get Port_Binding $pb_uuid
options:additional-chassis-activated)])
> +
> +OVN_CLEANUP([hv1],[hv2])
> +
> +AT_CLEANUP
> +])
> +
>  OVN_FOR_EACH_NORTHD([
>  AT_SETUP([options:requested-chassis for logical port])
>  ovn_start
> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
> index b5116e969..ef0bd566f 100644
> --- a/utilities/ovn-trace.c
> +++ b/utilities/ovn-trace.c
> @@ -2885,6 +2885,9 @@ trace_actions(const struct ovnact *ovnacts, size_t
ovnacts_len,
>              /* Nothing to do for tracing. */
>              break;
>
> +        case OVNACT_ACTIVATION_STRATEGY_RARP:
> +            break;
> +
>          case OVNACT_GET_FDB:
>              execute_get_fdb(ovnact_get_GET_FDB(a), dp, uflow);
>              break;
> --
> 2.34.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Ihar Hrachyshka May 26, 2022, 12:37 a.m. UTC | #2
I'm sending another version of the series with some of your comments
handled (plus rebase), while others are still pending clarifications.
See below.

On Fri, May 13, 2022 at 2:37 AM Han Zhou <hzhou@ovn.org> wrote:
>
>
>
> On Thu, May 5, 2022 at 6:38 AM Ihar Hrachyshka <ihrachys@redhat.com> wrote:
> >
> > When options:activation-strategy is set to "rarp" for LSP, when used in
> > combination with multiple chassis names listed in
> > options:requested-chassis, additional chassis will install special flows
> > that would block all ingress and egress traffic for the port until a
> > special activation event happens.
> >
> > For "rarp" strategy, an observation of a RARP packet sent from the port
> > on the additional chassis is such an event. When it occurs, a special
> > flow passes control to a controller() action handler that removes the
> > installed blocking flows and also marks the port as
> > options:additional-chassis-activated in southbound db. Once vswitchd
> > processes the flow mod request, the port is ready to communicate from
> > a new location.
> >
> > This feature is useful in live migration scenarios where it's not
> > advisable to unlock the destination port location prematurily to avoid
> > duplicate packets originating from the port.
> >
> > Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
> > ---
> >  NEWS                  |   2 +
> >  controller/physical.c |  79 +++++++++++++++
> >  controller/pinctrl.c  | 226 +++++++++++++++++++++++++++++++++++++++++-
> >  controller/pinctrl.h  |   5 +
> >  include/ovn/actions.h |   9 ++
> >  lib/actions.c         |  40 +++++++-
> >  northd/northd.c       |  10 ++
> >  northd/ovn-northd.c   |   5 +-
> >  ovn-nb.xml            |  11 ++
> >  ovn-sb.xml            |  15 +++
> >  tests/ovn.at          | 166 +++++++++++++++++++++++++++++++
> >  utilities/ovn-trace.c |   3 +
> >  12 files changed, 567 insertions(+), 4 deletions(-)
> >
> > diff --git a/NEWS b/NEWS
> > index 3fedcfeed..b5dbda89c 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -8,6 +8,8 @@ Post v22.03.0
> >    - Add global option (NB_Global.options:default_acl_drop) to enable
> >      implicit drop behavior on logical switches with ACLs applied.
> >    - Support list of chassis for Logical_Switch_Port:options:requested-chassis.
> > +  - Support Logical_Switch_Port:options:activation-strategy for live migration
> > +    scenarios.
> >
> >  OVN v22.03.0 - 11 Mar 2022
> >  --------------------------
> > diff --git a/controller/physical.c b/controller/physical.c
> > index 6399d1fce..ce0ff24ba 100644
> > --- a/controller/physical.c
> > +++ b/controller/physical.c
> > @@ -40,7 +40,9 @@
> >  #include "lib/mcast-group-index.h"
> >  #include "lib/ovn-sb-idl.h"
> >  #include "lib/ovn-util.h"
> > +#include "ovn/actions.h"
> >  #include "physical.h"
> > +#include "pinctrl.h"
> >  #include "openvswitch/shash.h"
> >  #include "simap.h"
> >  #include "smap.h"
> > @@ -1030,6 +1032,59 @@ get_additional_tunnels(const struct sbrec_port_binding *binding,
> >      return additional_tunnels;
> >  }
> >
> > +static void
> > +setup_rarp_activation_strategy(const struct sbrec_port_binding *binding,
> > +                               ofp_port_t ofport, struct zone_ids *zone_ids,
> > +                               struct ovn_desired_flow_table *flow_table,
> > +                               struct ofpbuf *ofpacts_p)
> > +{
> > +    struct match match = MATCH_CATCHALL_INITIALIZER;
> > +
> > +    /* Unblock the port on ingress RARP. */
> > +    match_set_dl_type(&match, htons(ETH_TYPE_RARP));
> > +    match_set_in_port(&match, ofport);
> > +    ofpbuf_clear(ofpacts_p);
> > +
> > +    load_logical_ingress_metadata(binding, zone_ids, ofpacts_p);
> > +
> > +    size_t ofs = ofpacts_p->size;
> > +    struct ofpact_controller *oc = ofpact_put_CONTROLLER(ofpacts_p);
> > +    oc->max_len = UINT16_MAX;
> > +    oc->reason = OFPR_ACTION;
> > +
> > +    struct action_header ah = {
> > +        .opcode = htonl(ACTION_OPCODE_ACTIVATION_STRATEGY_RARP)
> > +    };
> > +    ofpbuf_put(ofpacts_p, &ah, sizeof ah);
> > +
> > +    ofpacts_p->header = oc;
> > +    oc->userdata_len = ofpacts_p->size - (ofs + sizeof *oc);
> > +    ofpact_finish_CONTROLLER(ofpacts_p, &oc);
> > +
> > +    ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 1010,
> > +                    binding->header_.uuid.parts[0],
> > +                    &match, ofpacts_p, &binding->header_.uuid);
> > +    ofpbuf_clear(ofpacts_p);
> > +
> > +    /* Block all non-RARP traffic for the port, both directions. */
> > +    match_init_catchall(&match);
> > +    match_set_in_port(&match, ofport);
> > +
> > +    ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 1000,
> > +                    binding->header_.uuid.parts[0],
> > +                    &match, ofpacts_p, &binding->header_.uuid);
> > +
> > +    match_init_catchall(&match);
> > +    uint32_t dp_key = binding->datapath->tunnel_key;
> > +    uint32_t port_key = binding->tunnel_key;
> > +    match_set_metadata(&match, htonll(dp_key));
> > +    match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
>
> What if another LSP on the same chassis (an additional chassis of the migrating LSP) wants to send a packet to the migrating LSP on the main chassis? Would it be blocked, too?

Packets are cloned to all chassis, delivered to all of them, and each
of them decides on its own if it's ok to deliver it (depending on
whether activation flows are still present). I'll update the test case
to cover the scenario you describe.

> In addition, this module (physical.c) is better to add flows to physical pipelines only. Shall we instead add a flow to LOG_TO_PHY?
>
> > +
> > +    ofctrl_add_flow(flow_table, OFTABLE_LOG_EGRESS_PIPELINE, 1000,
> > +                    binding->header_.uuid.parts[0],
> > +                    &match, ofpacts_p, &binding->header_.uuid);
> > +}
> > +
> >  static void
> >  consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> >                        enum mf_field_id mff_ovn_geneve,
> > @@ -1307,6 +1362,30 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> >              }
> >          }
> >
> > +        for (int i = 0; i < binding->n_additional_chassis; i++) {
> > +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> > +            if (binding->additional_chassis[i] == chassis) {
> > +                const char *strategy = smap_get(&binding->options,
> > +                                                "activation-strategy");
> > +                if (strategy
> > +                        && !db_is_port_activated(binding, chassis)
> > +                        && !pinctrl_is_port_activated(dp_key, port_key)) {
> > +                    if (!strcmp(strategy, "rarp")) {
> > +                        setup_rarp_activation_strategy(binding, ofport,
> > +                                                       &zone_ids, flow_table,
> > +                                                       ofpacts_p);
> > +                    } else {
> > +                        VLOG_WARN_RL(&rl,
> > +                                     "Unknown activation strategy defined for "
> > +                                     "port %s: %s",
> > +                                     binding->logical_port, strategy);
> > +                        goto out;
> > +                    }
> > +                }
> > +            break;
> > +            }
> > +        }
> > +
> >          /* Remember the size with just strip vlan added so far,
> >           * as we're going to remove this with ofpbuf_pull() later. */
> >          uint32_t ofpacts_orig_size = ofpacts_p->size;
> > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > index df9284e50..2c31688a4 100644
> > --- a/controller/pinctrl.c
> > +++ b/controller/pinctrl.c
> > @@ -29,10 +29,12 @@
> >  #include "lport.h"
> >  #include "mac-learn.h"
> >  #include "nx-match.h"
> > +#include "ofctrl.h"
> >  #include "latch.h"
> >  #include "lib/packets.h"
> >  #include "lib/sset.h"
> >  #include "openvswitch/ofp-actions.h"
> > +#include "openvswitch/ofp-flow.h"
> >  #include "openvswitch/ofp-msgs.h"
> >  #include "openvswitch/ofp-packet.h"
> >  #include "openvswitch/ofp-print.h"
> > @@ -152,8 +154,8 @@ VLOG_DEFINE_THIS_MODULE(pinctrl);
> >   *  and pinctrl_run().
> >   *  'pinctrl_handler_seq' is used by pinctrl_run() to
> >   *  wake up pinctrl_handler thread from poll_block() if any changes happened
> > - *  in 'send_garp_rarp_data', 'ipv6_ras' and 'buffered_mac_bindings'
> > - *  structures.
> > + *  in 'send_garp_rarp_data', 'ipv6_ras', 'activated_ports' and
> > + *  'buffered_mac_bindings' structures.
> >   *
> >   *  'pinctrl_main_seq' is used by pinctrl_handler() thread to wake up
> >   *  the main thread from poll_block() when mac bindings/igmp groups need to
> > @@ -198,6 +200,20 @@ static void wait_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn);
> >  static void send_mac_binding_buffered_pkts(struct rconn *swconn)
> >      OVS_REQUIRES(pinctrl_mutex);
> >
> > +static void pinctrl_rarp_activation_strategy_handler(struct rconn *swconn,
> > +                                                     const struct match *md,
> > +                                                     struct dp_packet *pkt_in);
> > +
> > +static void init_activated_ports(void);
> > +static void destroy_activated_ports(void);
> > +static void wait_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn);
> > +static void run_activated_ports(
> > +    struct ovsdb_idl_txn *ovnsb_idl_txn,
> > +    struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
> > +    struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > +    const struct sbrec_chassis *chassis)
> > +    OVS_REQUIRES(pinctrl_mutex);
> > +
> >  static void init_send_garps_rarps(void);
> >  static void destroy_send_garps_rarps(void);
> >  static void send_garp_rarp_wait(long long int send_garp_rarp_time);
> > @@ -522,6 +538,7 @@ pinctrl_init(void)
> >      init_ipv6_ras();
> >      init_ipv6_prefixd();
> >      init_buffered_packets_map();
> > +    init_activated_ports();
> >      init_event_table();
> >      ip_mcast_snoop_init();
> >      init_put_vport_bindings();
> > @@ -3244,6 +3261,13 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
> >          ovs_mutex_unlock(&pinctrl_mutex);
> >          break;
> >
> > +    case ACTION_OPCODE_ACTIVATION_STRATEGY_RARP:
> > +        ovs_mutex_lock(&pinctrl_mutex);
> > +        pinctrl_rarp_activation_strategy_handler(swconn, &pin.flow_metadata,
> > +                                                 &packet);
> > +        ovs_mutex_unlock(&pinctrl_mutex);
> > +        break;
> > +
> >      default:
> >          VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
> >                       ntohl(ah->opcode));
> > @@ -3508,6 +3532,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >      bfd_monitor_run(ovnsb_idl_txn, bfd_table, sbrec_port_binding_by_name,
> >                      chassis, active_tunnels);
> >      run_put_fdbs(ovnsb_idl_txn, sbrec_fdb_by_dp_key_mac);
> > +    run_activated_ports(ovnsb_idl_txn, sbrec_datapath_binding_by_key,
> > +                        sbrec_port_binding_by_key, chassis);
> >      ovs_mutex_unlock(&pinctrl_mutex);
> >  }
> >
> > @@ -4011,6 +4037,7 @@ pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn)
> >      int64_t new_seq = seq_read(pinctrl_main_seq);
> >      seq_wait(pinctrl_main_seq, new_seq);
> >      wait_put_fdbs(ovnsb_idl_txn);
> > +    wait_activated_ports(ovnsb_idl_txn);
> >  }
> >
> >  /* Called by ovn-controller. */
> > @@ -4025,6 +4052,7 @@ pinctrl_destroy(void)
> >      destroy_ipv6_ras();
> >      destroy_ipv6_prefixd();
> >      destroy_buffered_packets_map();
> > +    destroy_activated_ports();
> >      event_table_destroy();
> >      destroy_put_mac_bindings();
> >      destroy_put_vport_bindings();
> > @@ -7702,6 +7730,200 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
> >      }
> >  }
> >
> > +static struct ofpbuf *
> > +encode_flow_mod(struct ofputil_flow_mod *fm)
> > +{
> > +    fm->buffer_id = UINT32_MAX;
> > +    fm->out_port = OFPP_ANY;
> > +    fm->out_group = OFPG_ANY;
> > +    return ofputil_encode_flow_mod(fm, OFPUTIL_P_OF15_OXM);
> > +}
> > +
> > +struct port_pair {
> > +    uint32_t dp_key;
> > +    uint32_t port_key;
> > +    struct ovs_list list;
> > +};
> > +
> > +static struct ovs_list activated_ports;
> > +
> > +static void
> > +init_activated_ports(void)
> > +{
> > +    ovs_list_init(&activated_ports);
> > +}
> > +
> > +static void
> > +destroy_activated_ports(void)
> > +{
> > +    struct port_pair *pp;
> > +    LIST_FOR_EACH_POP (pp, list, &activated_ports) {
> > +        free(pp);
> > +    }
> > +}
> > +
> > +static void
> > +wait_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn)
> > +{
> > +    if (ovnsb_idl_txn && !ovs_list_is_empty(&activated_ports)) {
> > +        poll_immediate_wake();
> > +    }
> > +}
> > +
> > +bool
> > +db_is_port_activated(const struct sbrec_port_binding *pb,
> > +                     const struct sbrec_chassis *chassis)
> > +{
> > +    const char *activated_chassis = smap_get(&pb->options,
> > +                                             "additional-chassis-activated");
> > +    if (activated_chassis) {
> > +        char *save_ptr;
> > +        char *tokstr = xstrdup(activated_chassis);
> > +        for (const char *chassis_name = strtok_r(tokstr, ",", &save_ptr);
> > +             chassis_name != NULL;
> > +             chassis_name = strtok_r(NULL, ",", &save_ptr)) {
> > +            if (!strcmp(chassis_name, chassis->name)) {
> > +                free(tokstr);
> > +                return true;
> > +            }
> > +        }
> > +        free(tokstr);
> > +    }
> > +    return false;
> > +}
> > +
> > +static void
> > +run_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > +                    struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
> > +                    struct ovsdb_idl_index *sbrec_port_binding_by_key,
> > +                    const struct sbrec_chassis *chassis)
> > +    OVS_REQUIRES(pinctrl_mutex)
> > +{
> > +    if (!ovnsb_idl_txn) {
> > +        return;
> > +    }
> > +
> > +    const struct port_pair *pp;
> > +    LIST_FOR_EACH (pp, list, &activated_ports) {
> > +        const struct sbrec_port_binding *pb = lport_lookup_by_key(
> > +            sbrec_datapath_binding_by_key, sbrec_port_binding_by_key,
> > +            pp->dp_key, pp->port_key);
> > +        if (pb) {
> > +            if (!db_is_port_activated(pb, chassis)) {
> > +                const char *activated_chassis = smap_get(
> > +                    &pb->options, "additional-chassis-activated");
> > +                char *activated_str;
> > +                if (activated_chassis) {
> > +                    activated_str = xasprintf(
> > +                        "%s,%s", activated_chassis, chassis->name);
> > +                    sbrec_port_binding_update_options_setkey(
> > +                        pb, "additional-chassis-activated", activated_str);
> > +                    free(activated_str);
> > +                } else {
> > +                    sbrec_port_binding_update_options_setkey(
> > +                        pb, "additional-chassis-activated", chassis->name);
> > +                }
> > +            }
> > +        }
> > +    }
> > +    destroy_activated_ports();
>
> Do we really want to destroy it here? If it is destroyed, what's the purpose of calling pinctrl_is_port_activated() in physical.c? Just to remind that when this function ends, the SB DB is not updated yet.
>

You mean we should destroy after the db change is actually committed?
When does it happen? Is there an indication in the code flow somewhere
that a change to an object got into db? I don't want to leave the
activated_ports list filled with data for ports that were activated a
long time ago. Would e.g. popping only those entries that were already
set in db be any better? (-> in run_activated_ports, get the
-activated key, parse it and check if the current chassis is already
in the list; if so, pop from the list of activated ports?)

> > +}
> > +
> > +bool pinctrl_is_port_activated(int64_t dp_key, int64_t port_key)
> > +    OVS_REQUIRES(pinctrl_mutex)
> > +{
> > +    const struct port_pair *pp;
> > +    LIST_FOR_EACH (pp, list, &activated_ports) {
> > +        if (pp->dp_key == dp_key && pp->port_key == port_key) {
> > +            return true;
> > +        }
> > +    }
> > +    return false;
> > +}
> > +
> > +static void
> > +pinctrl_rarp_activation_strategy_handler(struct rconn *swconn,
> > +                                         const struct match *md,
> > +                                         struct dp_packet *pkt_in)
> > +    OVS_REQUIRES(pinctrl_mutex)
> > +{
> > +    /* Admitted; send RARP back to the pipeline. */
> > +    uint32_t port_key = md->flow.regs[MFF_LOG_INPORT - MFF_REG0];
> > +    uint32_t dp_key = ntohll(md->flow.metadata);
> > +
> > +    uint64_t ofpacts_stub[4096 / 8];
> > +    struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> > +    enum ofp_version version = rconn_get_version(swconn);
> > +    put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
> > +    put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
> > +    struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
> > +    resubmit->in_port = OFPP_CONTROLLER;
> > +    resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
> > +
> > +    struct ofputil_packet_out po = {
> > +        .packet = dp_packet_data(pkt_in),
> > +        .packet_len = dp_packet_size(pkt_in),
> > +        .buffer_id = UINT32_MAX,
> > +        .ofpacts = ofpacts.data,
> > +        .ofpacts_len = ofpacts.size,
> > +    };
> > +    match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> > +
> > +    enum ofputil_protocol proto;
> > +    proto = ofputil_protocol_from_ofp_version(version);
> > +    queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
> > +    ofpbuf_uninit(&ofpacts);
> > +
> > +    /* Delete ingress/egress drop flows to unblock the port. */
> > +    struct match match;
> > +    struct minimatch mmatch;
> > +
> > +    match_init_catchall(&match);
> > +    match_set_metadata(&match, md->flow.metadata);
> > +    match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0,
> > +                  md->flow.regs[MFF_LOG_INPORT - MFF_REG0]);
> > +    minimatch_init(&mmatch, &match);
> > +
> > +    struct ofputil_flow_mod fm = {
> > +        .match = mmatch,
> > +        .priority = 1000,
> > +        .table_id = OFTABLE_LOG_INGRESS_PIPELINE,
>
> This table_id doesn't match the one added in physical.c (OFTABLE_PHY_TO_LOG). (probably something missed in the test cases as well?)
>

Indeed. This worked because db change eventually triggers flow
recalculation. We need a test scenario that stops db processing
temporarily to check that activation flows are cleared without waiting
for db. But when I tried to implement one, it seems that it's not as
easy because, for reason that is not clear to me, when I kill /
SIGSTOP ovsdb-server that serves the additional chassis and then send
the activation RARP packet, the packet is not sent to the controller
for handling. It seems that vswitchd requires ovsdb-server to process
a packet via controller() action? Is it a bug in vswitchd or
ovn-controller? It doesn't seem right that behavior changes when
ovsdb-server is temporarily down. Thoughts?

Anyway, while I was working on the test scenario, I found some more
bugs in the code that cleans the flows (e.g. we were not resetting
.match for each new flow_mod operation, which resulting in invalid
commands as observed in vswitchd log. Code wise / log wire the updated
version doesn't have issues with this part of code anymore, but if we
could somehow test controller() rarp handler with ovsdb-server down,
that would be nice...

> > +        .command = OFPFC_DELETE_STRICT,
> > +    };
> > +    queue_msg(swconn, encode_flow_mod(&fm));
> > +    minimatch_destroy(&mmatch);
> > +
> > +    match_init_catchall(&match);
> > +    match_set_metadata(&match, md->flow.metadata);
> > +    match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
> > +                  md->flow.regs[MFF_LOG_INPORT - MFF_REG0]);
> > +    minimatch_init(&mmatch, &match);
> > +
> > +    fm.table_id = OFTABLE_LOG_EGRESS_PIPELINE;
> > +    queue_msg(swconn, encode_flow_mod(&fm));
> > +    minimatch_destroy(&mmatch);
> > +
> > +    /* Delete inport controller flow (the one that got us here */
> > +    match_init_catchall(&match);
> > +    match_set_metadata(&match, md->flow.metadata);
> > +    match_set_dl_type(&match, htons(ETH_TYPE_RARP));
> > +    minimatch_init(&mmatch, &match);
> > +
> > +    fm.priority = 1010;
> > +    fm.table_id = OFTABLE_PHY_TO_LOG;
> > +    queue_msg(swconn, encode_flow_mod(&fm));
> > +    minimatch_destroy(&mmatch);
> > +
> > +    /* Tag the port as activated in-memory. */
> > +    struct port_pair *pp = xmalloc(sizeof *pp);
> > +    pp->port_key = port_key;
> > +    pp->dp_key = dp_key;
> > +    ovs_list_push_front(&activated_ports, &pp->list);
> > +
> > +    /* Notify main thread on pending additional-chassis-activated updates. */
> > +    notify_pinctrl_main();
> > +}
> > +
>
> As discussed in a previous OVN meeting, it is not good to delete OF flows by the pinctrl thread.
> Firstly, here it doesn't call ofctrl module, while physical module adds flows by calling ofctrl, which not only modifies flows in OVS, but also maintains "desired_flows" and "installed_flows". ofctrl module would never know these flows are deleted already, and would send flow deletion for the same flows again later. Maybe it is not harmful, but it would better to have ofctrl handle all the flow operations to OVS.
> Secondly, it is easier for code maintenance if we put the add/delete for these flows closer in the same file, probably with wrapper functions to avoid mismatch between add & delete.
> I think we can pass the port-binding information to the main thread, and trigger the deletion by pinctrl_run()->run_activated_ports()->physical_handle_flows_for_lport().
>

I agree in general. Though I can't understand how run_activated_ports
would call to physical_handle_flows_from_lport since the latter
requires access to desired_flows and physical_ctx. Could you please
elaborate?

> In the meeting we also discussed adding it to the I-P engine, but with my understanding of this feature I think it is ok to leave the deletion out of the engine. Here is my consideration. The pinctrl module would update the port-binding in SB DB, which is an input that would eventually lead to correct flow generation by the I-P engine - this can be considered the *official* flow generation, and will be correct even if ovn-controller is restarted/recomputed. Now here in the pinctrl module, we just want to do it faster before SB DB is updated, for performance reasons, so just think about it as a short-cut, and it is eventually consistent with what I-P engine would do.
>
> >  static struct hmap put_fdbs;
> >
> >  /* MAC learning (fdb) related functions.  Runs within the main
> > diff --git a/controller/pinctrl.h b/controller/pinctrl.h
> > index 88f18e983..1704bc6ba 100644
> > --- a/controller/pinctrl.h
> > +++ b/controller/pinctrl.h
> > @@ -33,6 +33,7 @@ struct sbrec_dns_table;
> >  struct sbrec_controller_event_table;
> >  struct sbrec_service_monitor_table;
> >  struct sbrec_bfd_table;
> > +struct sbrec_port_binding;
> >
> >  void pinctrl_init(void);
> >  void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > @@ -56,4 +57,8 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >  void pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn);
> >  void pinctrl_destroy(void);
> >  void pinctrl_set_br_int_name(char *br_int_name);
> > +
> > +bool pinctrl_is_port_activated(int64_t dp_key, int64_t port_key);
> > +bool db_is_port_activated(const struct sbrec_port_binding *pb,
> > +                          const struct sbrec_chassis *chassis);
> >  #endif /* controller/pinctrl.h */
> > diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> > index f55d77d47..0a1865741 100644
> > --- a/include/ovn/actions.h
> > +++ b/include/ovn/actions.h
> > @@ -116,6 +116,7 @@ struct ovn_extend_table;
> >      OVNACT(PUT_FDB,           ovnact_put_fdb)         \
> >      OVNACT(GET_FDB,           ovnact_get_fdb)         \
> >      OVNACT(LOOKUP_FDB,        ovnact_lookup_fdb)      \
> > +    OVNACT(ACTIVATION_STRATEGY_RARP, ovnact_activation_strategy_rarp) \
>
> I am confused why we need to add a logical flow action for this. I think it is not used, and doesn't seem to be used in the future. All we need is just an action-code that is used in pinctrl handler.
>
> Thanks,
> Han
>
> >
> >  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
> >  enum OVS_PACKED_ENUM ovnact_type {
> > @@ -420,6 +421,11 @@ struct ovnact_handle_svc_check {
> >      struct expr_field port;     /* Logical port name. */
> >  };
> >
> > +/* OVNACT_ACTIVATION_STRATEGY_RARP. */
> > +struct ovnact_activation_strategy_rarp {
> > +    struct ovnact ovnact;
> > +};
> > +
> >  /* OVNACT_FWD_GROUP. */
> >  struct ovnact_fwd_group {
> >      struct ovnact ovnact;
> > @@ -681,6 +687,9 @@ enum action_opcode {
> >      /* put_fdb(inport, eth.src).
> >       */
> >      ACTION_OPCODE_PUT_FDB,
> > +
> > +    /* activation_strategy_rarp() */
> > +    ACTION_OPCODE_ACTIVATION_STRATEGY_RARP,
> >  };
> >
> >  /* Header. */
> > diff --git a/lib/actions.c b/lib/actions.c
> > index a3cf747db..4766b3227 100644
> > --- a/lib/actions.c
> > +++ b/lib/actions.c
> > @@ -3822,6 +3822,41 @@ ovnact_handle_svc_check_free(struct ovnact_handle_svc_check *sc OVS_UNUSED)
> >  {
> >  }
> >
> > +static void
> > +parse_activation_strategy_rarp(struct action_context *ctx OVS_UNUSED)
> > +{
> > +     if (!lexer_force_match(ctx->lexer, LEX_T_LPAREN)) {
> > +        return;
> > +    }
> > +
> > +    ovnact_put_ACTIVATION_STRATEGY_RARP(ctx->ovnacts);
> > +    lexer_force_match(ctx->lexer, LEX_T_RPAREN);
> > +}
> > +
> > +static void
> > +format_ACTIVATION_STRATEGY_RARP(
> > +        const struct ovnact_activation_strategy_rarp *activation OVS_UNUSED,
> > +        struct ds *s)
> > +{
> > +    ds_put_cstr(s, "activation_strategy_rarp();");
> > +}
> > +
> > +static void
> > +encode_ACTIVATION_STRATEGY_RARP(
> > +        const struct ovnact_activation_strategy_rarp *activation OVS_UNUSED,
> > +        const struct ovnact_encode_params *ep,
> > +        struct ofpbuf *ofpacts)
> > +{
> > +    encode_controller_op(ACTION_OPCODE_ACTIVATION_STRATEGY_RARP,
> > +                         ep->ctrl_meter_id, ofpacts);
> > +}
> > +
> > +static void
> > +ovnact_activation_strategy_rarp_free(
> > +    struct ovnact_activation_strategy_rarp *activation OVS_UNUSED)
> > +{
> > +}
> > +
> >  static void
> >  parse_fwd_group_action(struct action_context *ctx)
> >  {
> > @@ -4376,6 +4411,8 @@ parse_action(struct action_context *ctx)
> >          parse_bind_vport(ctx);
> >      } else if (lexer_match_id(ctx->lexer, "handle_svc_check")) {
> >          parse_handle_svc_check(ctx);
> > +    } else if (lexer_match_id(ctx->lexer, "activation_strategy_rarp")) {
> > +        parse_activation_strategy_rarp(ctx);
> >      } else if (lexer_match_id(ctx->lexer, "fwd_group")) {
> >          parse_fwd_group_action(ctx);
> >      } else if (lexer_match_id(ctx->lexer, "handle_dhcpv6_reply")) {
> > @@ -4619,7 +4656,8 @@ ovnact_op_to_string(uint32_t ovnact_opc)
> >          ACTION_OPCODE(BIND_VPORT)                   \
> >          ACTION_OPCODE(DHCP6_SERVER)                 \
> >          ACTION_OPCODE(HANDLE_SVC_CHECK)             \
> > -        ACTION_OPCODE(BFD_MSG)
> > +        ACTION_OPCODE(BFD_MSG)                      \
> > +        ACTION_OPCODE(ACTIVATION_STRATEGY_RARP)
> >  #define ACTION_OPCODE(ENUM) \
> >      case ACTION_OPCODE_##ENUM: return xstrdup(#ENUM);
> >      ACTION_OPCODES
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 55919188e..4fb63edf8 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -3385,6 +3385,16 @@ ovn_port_update_sbrec(struct northd_input *input_data,
> >                  smap_add(&options, "vlan-passthru", "true");
> >              }
> >
> > +            /* Retain activated chassis flags. */
> > +            if (op->sb->requested_additional_chassis) {
> > +                const char *activated_str = smap_get(
> > +                    &op->sb->options, "additional-chassis-activated");
> > +                if (activated_str) {
> > +                    smap_add(&options, "additional-chassis-activated",
> > +                             activated_str);
> > +                }
> > +            }
> > +
> >              sbrec_port_binding_set_options(op->sb, &options);
> >              smap_destroy(&options);
> >              if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
> > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> > index 1cfc2024d..b9ae81acb 100644
> > --- a/northd/ovn-northd.c
> > +++ b/northd/ovn-northd.c
> > @@ -102,7 +102,10 @@ static const char *rbac_port_binding_auth[] =
> >  static const char *rbac_port_binding_update[] =
> >      {"chassis", "additional_chassis",
> >       "encap", "additional_encap",
> > -     "up", "virtual_parent"};
> > +     "up", "virtual_parent",
> > +     /* NOTE: we only need to update the additional-chassis-activated key,
> > +      * but RBAC_Role doesn't support mutate operation for subkeys. */
> > +     "options"};
> >
> >  static const char *rbac_mac_binding_auth[] =
> >      {""};
> > diff --git a/ovn-nb.xml b/ovn-nb.xml
> > index df17f0cbe..97600e2c2 100644
> > --- a/ovn-nb.xml
> > +++ b/ovn-nb.xml
> > @@ -1045,6 +1045,17 @@
> >            </p>
> >          </column>
> >
> > +        <column name="options" key="activation-strategy">
> > +          If used with multiple chassis set in
> > +          <ref column="requested-chassis"/>, specifies an activation strategy
> > +          for all additional chassis. By default, no activation strategy is
> > +          used, meaning additional port locations are immediately available for
> > +          use. When set to "rarp", the port is blocked for ingress and egress
> > +          communication until a RARP packet is sent from a new location. The
> > +          "rarp" strategy is useful in live migration scenarios for virtual
> > +          machines.
> > +        </column>
> > +
> >          <column name="options" key="iface-id-ver">
> >            If set, this port will be bound by <code>ovn-controller</code>
> >            only if this same key and value is configured in the
> > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > index 78db5b7c5..5a1fdbe72 100644
> > --- a/ovn-sb.xml
> > +++ b/ovn-sb.xml
> > @@ -3300,6 +3300,21 @@ tcp.flags = RST;
> >          </p>
> >        </column>
> >
> > +      <column name="options" key="activation-strategy">
> > +        If used with multiple chassis set in <ref column="requested-chassis"/>,
> > +        specifies an activation strategy for all additional chassis. By
> > +        default, no activation strategy is used, meaning additional port
> > +        locations are immediately available for use. When set to "rarp", the
> > +        port is blocked for ingress and egress communication until a RARP
> > +        packet is sent from a new location. The "rarp" strategy is useful
> > +        in live migration scenarios for virtual machines.
> > +      </column>
> > +
> > +      <column name="options" key="additional-chassis-activated">
> > +        When <ref column="activation-strategy"/> is set, this option indicates
> > +        that the port was activated using the strategy specified.
> > +      </column>
> > +
> >        <column name="options" key="iface-id-ver">
> >          If set, this port will be bound by <code>ovn-controller</code>
> >          only if this same key and value is configured in the
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index 1154fdc05..92821d6d4 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -14699,6 +14699,172 @@ OVN_CLEANUP([hv1],[hv2],[hv3])
> >  AT_CLEANUP
> >  ])
> >
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([options:activation-strategy for logical port])
> > +ovn_start
> > +
> > +net_add n1
> > +
> > +sim_add hv1
> > +as hv1
> > +check ovs-vsctl add-br br-phys
> > +ovn_attach n1 br-phys 192.168.0.11
> > +
> > +sim_add hv2
> > +as hv2
> > +check ovs-vsctl add-br br-phys
> > +ovn_attach n1 br-phys 192.168.0.12
> > +
> > +sim_add hv3
> > +as hv3
> > +check ovs-vsctl add-br br-phys
> > +ovn_attach n1 br-phys 192.168.0.13
> > +
> > +# Disable local ARP responder to pass ARP requests through tunnels
> > +check ovn-nbctl ls-add ls0 -- add Logical_Switch ls0 other_config vlan-passthru=true
> > +
> > +check ovn-nbctl lsp-add ls0 migrator
> > +check ovn-nbctl lsp-set-options migrator requested-chassis=hv1,hv2 \
> > +                                         activation-strategy=rarp
> > +
> > +check ovn-nbctl lsp-add ls0 outside
> > +check ovn-nbctl lsp-set-options outside requested-chassis=hv3
> > +
> > +check ovn-nbctl lsp-set-addresses migrator "00:00:00:00:00:01 10.0.0.1"
> > +check ovn-nbctl lsp-set-addresses outside "00:00:00:00:00:02 10.0.0.2"
> > +
> > +for hv in hv1 hv2; do
> > +    as $hv check ovs-vsctl -- add-port br-int migrator -- \
> > +        set Interface migrator external-ids:iface-id=migrator \
> > +                               options:tx_pcap=$hv/migrator-tx.pcap \
> > +                               options:rxq_pcap=$hv/migrator-rx.pcap
> > +done
> > +as hv3 check ovs-vsctl -- add-port br-int outside -- \
> > +    set Interface outside external-ids:iface-id=outside
> > +
> > +wait_row_count Chassis 1 name=hv1
> > +wait_row_count Chassis 1 name=hv2
> > +hv1_uuid=$(fetch_column Chassis _uuid name=hv1)
> > +hv2_uuid=$(fetch_column Chassis _uuid name=hv2)
> > +
> > +wait_column "$hv1_uuid" Port_Binding chassis logical_port=migrator
> > +wait_column "$hv1_uuid" Port_Binding requested_chassis logical_port=migrator
> > +wait_column "$hv2_uuid" Port_Binding additional_chassis logical_port=migrator
> > +wait_column "$hv2_uuid" Port_Binding requested_additional_chassis logical_port=migrator
> > +
> > +OVN_POPULATE_ARP
> > +
> > +send_arp() {
> > +    local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
> > +    local request=${eth_dst}${eth_src}08060001080006040001${eth_src}${spa}${eth_dst}${tpa}
> > +    as ${hv} ovs-appctl netdev-dummy/receive $inport $request
> > +    echo "${request}"
> > +}
> > +
> > +send_rarp() {
> > +    local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
> > +    local request=${eth_dst}${eth_src}80350001080006040001${eth_src}${spa}${eth_dst}${tpa}
> > +    as ${hv} ovs-appctl netdev-dummy/receive $inport $request
> > +    echo "${request}"
> > +}
> > +
> > +reset_pcap_file() {
> > +    local hv=$1
> > +    local iface=$2
> > +    local pcap_file=$3
> > +    as $hv check ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \
> > +                                                   options:rxq_pcap=dummy-rx.pcap
> > +    check rm -f ${pcap_file}*.pcap
> > +    as $hv check ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap \
> > +                                                   options:rxq_pcap=${pcap_file}-rx.pcap
> > +}
> > +
> > +reset_env() {
> > +    reset_pcap_file hv1 migrator hv1/migrator
> > +    reset_pcap_file hv2 migrator hv2/migrator
> > +    reset_pcap_file hv3 outside hv3/outside
> > +
> > +    for port in hv1/migrator hv2/migrator hv3/outside; do
> > +        : > $port.expected
> > +    done
> > +}
> > +
> > +check_packets() {
> > +    OVN_CHECK_PACKETS([hv1/migrator-tx.pcap], [hv1/migrator.expected])
> > +    OVN_CHECK_PACKETS([hv2/migrator-tx.pcap], [hv2/migrator.expected])
> > +    OVN_CHECK_PACKETS([hv3/outside-tx.pcap], [hv3/outside.expected])
> > +}
> > +
> > +migrator_spa=$(ip_to_hex 10 0 0 1)
> > +outside_spa=$(ip_to_hex 10 0 0 2)
> > +
> > +reset_env
> > +
> > +# Packet from hv3:Outside arrives to hv1:Migrator
> > +# hv3:Outside cannot reach hv2:Migrator because it is blocked by RARP strategy
> > +request=$(send_arp hv3 outside 000000000002 000000000001 $outside_spa $migrator_spa)
> > +echo $request >> hv1/migrator.expected
> > +
> > +check_packets
> > +reset_env
> > +
> > +# Packet from hv1:Migrator arrives to hv3:Outside
> > +request=$(send_arp hv1 migrator 000000000001 000000000002 $migrator_spa $outside_spa)
> > +echo $request >> hv3/outside.expected
> > +
> > +check_packets
> > +reset_env
> > +
> > +# hv2:Migrator cannot reach to hv3:Outside because it is blocked by RARP strategy
> > +request=$(send_arp hv2 migrator 000000000001 000000000002 $migrator_spa $outside_spa)
> > +
> > +check_packets
> > +reset_env
> > +
> > +AT_CHECK([ovn-sbctl find port_binding logical_port=migrator | grep -q additional-chassis-activated], [1])
> > +
> > +# Now activate hv2:Migrator location
> > +request=$(send_rarp hv2 migrator 000000000001 ffffffffffff $migrator_spa $migrator_spa)
> > +
> > +# RARP was reinjected into the pipeline
> > +echo $request >> hv3/outside.expected
> > +
> > +check_packets
> > +reset_env
> > +
> > +pb_uuid=$(ovn-sbctl --bare --columns _uuid find Port_Binding logical_port=migrator)
> > +OVS_WAIT_UNTIL([test xhv2 = x$(ovn-sbctl get Port_Binding $pb_uuid options:additional-chassis-activated | tr -d '""')])
> > +
> > +# Now packet arrives to both locations
> > +request=$(send_arp hv3 outside 000000000002 000000000001 $outside_spa $migrator_spa)
> > +echo $request >> hv1/migrator.expected
> > +echo $request >> hv2/migrator.expected
> > +
> > +check_packets
> > +reset_env
> > +
> > +# Packet from hv1:Migrator still arrives to hv3:Outside
> > +request=$(send_arp hv1 migrator 000000000001 000000000002 $migrator_spa $outside_spa)
> > +echo $request >> hv3/outside.expected
> > +
> > +check_packets
> > +reset_env
> > +
> > +# hv2:Migrator can now reach to hv3:Outside because RARP strategy activated it
> > +request=$(send_arp hv2 migrator 000000000001 000000000002 $migrator_spa $outside_spa)
> > +echo $request >> hv3/outside.expected
> > +
> > +check_packets
> > +
> > +# complete port migration and check that -activated flag is reset
> > +check ovn-nbctl lsp-set-options migrator requested-chassis=hv2
> > +OVS_WAIT_UNTIL([test x = x$(ovn-sbctl get Port_Binding $pb_uuid options:additional-chassis-activated)])
> > +
> > +OVN_CLEANUP([hv1],[hv2])
> > +
> > +AT_CLEANUP
> > +])
> > +
> >  OVN_FOR_EACH_NORTHD([
> >  AT_SETUP([options:requested-chassis for logical port])
> >  ovn_start
> > diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
> > index b5116e969..ef0bd566f 100644
> > --- a/utilities/ovn-trace.c
> > +++ b/utilities/ovn-trace.c
> > @@ -2885,6 +2885,9 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
> >              /* Nothing to do for tracing. */
> >              break;
> >
> > +        case OVNACT_ACTIVATION_STRATEGY_RARP:
> > +            break;
> > +
> >          case OVNACT_GET_FDB:
> >              execute_get_fdb(ovnact_get_GET_FDB(a), dp, uflow);
> >              break;
> > --
> > 2.34.1
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 3fedcfeed..b5dbda89c 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,8 @@  Post v22.03.0
   - Add global option (NB_Global.options:default_acl_drop) to enable
     implicit drop behavior on logical switches with ACLs applied.
   - Support list of chassis for Logical_Switch_Port:options:requested-chassis.
+  - Support Logical_Switch_Port:options:activation-strategy for live migration
+    scenarios.
 
 OVN v22.03.0 - 11 Mar 2022
 --------------------------
diff --git a/controller/physical.c b/controller/physical.c
index 6399d1fce..ce0ff24ba 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -40,7 +40,9 @@ 
 #include "lib/mcast-group-index.h"
 #include "lib/ovn-sb-idl.h"
 #include "lib/ovn-util.h"
+#include "ovn/actions.h"
 #include "physical.h"
+#include "pinctrl.h"
 #include "openvswitch/shash.h"
 #include "simap.h"
 #include "smap.h"
@@ -1030,6 +1032,59 @@  get_additional_tunnels(const struct sbrec_port_binding *binding,
     return additional_tunnels;
 }
 
+static void
+setup_rarp_activation_strategy(const struct sbrec_port_binding *binding,
+                               ofp_port_t ofport, struct zone_ids *zone_ids,
+                               struct ovn_desired_flow_table *flow_table,
+                               struct ofpbuf *ofpacts_p)
+{
+    struct match match = MATCH_CATCHALL_INITIALIZER;
+
+    /* Unblock the port on ingress RARP. */
+    match_set_dl_type(&match, htons(ETH_TYPE_RARP));
+    match_set_in_port(&match, ofport);
+    ofpbuf_clear(ofpacts_p);
+
+    load_logical_ingress_metadata(binding, zone_ids, ofpacts_p);
+
+    size_t ofs = ofpacts_p->size;
+    struct ofpact_controller *oc = ofpact_put_CONTROLLER(ofpacts_p);
+    oc->max_len = UINT16_MAX;
+    oc->reason = OFPR_ACTION;
+
+    struct action_header ah = {
+        .opcode = htonl(ACTION_OPCODE_ACTIVATION_STRATEGY_RARP)
+    };
+    ofpbuf_put(ofpacts_p, &ah, sizeof ah);
+
+    ofpacts_p->header = oc;
+    oc->userdata_len = ofpacts_p->size - (ofs + sizeof *oc);
+    ofpact_finish_CONTROLLER(ofpacts_p, &oc);
+
+    ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 1010,
+                    binding->header_.uuid.parts[0],
+                    &match, ofpacts_p, &binding->header_.uuid);
+    ofpbuf_clear(ofpacts_p);
+
+    /* Block all non-RARP traffic for the port, both directions. */
+    match_init_catchall(&match);
+    match_set_in_port(&match, ofport);
+
+    ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 1000,
+                    binding->header_.uuid.parts[0],
+                    &match, ofpacts_p, &binding->header_.uuid);
+
+    match_init_catchall(&match);
+    uint32_t dp_key = binding->datapath->tunnel_key;
+    uint32_t port_key = binding->tunnel_key;
+    match_set_metadata(&match, htonll(dp_key));
+    match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
+
+    ofctrl_add_flow(flow_table, OFTABLE_LOG_EGRESS_PIPELINE, 1000,
+                    binding->header_.uuid.parts[0],
+                    &match, ofpacts_p, &binding->header_.uuid);
+}
+
 static void
 consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
                       enum mf_field_id mff_ovn_geneve,
@@ -1307,6 +1362,30 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
             }
         }
 
+        for (int i = 0; i < binding->n_additional_chassis; i++) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+            if (binding->additional_chassis[i] == chassis) {
+                const char *strategy = smap_get(&binding->options,
+                                                "activation-strategy");
+                if (strategy
+                        && !db_is_port_activated(binding, chassis)
+                        && !pinctrl_is_port_activated(dp_key, port_key)) {
+                    if (!strcmp(strategy, "rarp")) {
+                        setup_rarp_activation_strategy(binding, ofport,
+                                                       &zone_ids, flow_table,
+                                                       ofpacts_p);
+                    } else {
+                        VLOG_WARN_RL(&rl,
+                                     "Unknown activation strategy defined for "
+                                     "port %s: %s",
+                                     binding->logical_port, strategy);
+                        goto out;
+                    }
+                }
+            break;
+            }
+        }
+
         /* Remember the size with just strip vlan added so far,
          * as we're going to remove this with ofpbuf_pull() later. */
         uint32_t ofpacts_orig_size = ofpacts_p->size;
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index df9284e50..2c31688a4 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -29,10 +29,12 @@ 
 #include "lport.h"
 #include "mac-learn.h"
 #include "nx-match.h"
+#include "ofctrl.h"
 #include "latch.h"
 #include "lib/packets.h"
 #include "lib/sset.h"
 #include "openvswitch/ofp-actions.h"
+#include "openvswitch/ofp-flow.h"
 #include "openvswitch/ofp-msgs.h"
 #include "openvswitch/ofp-packet.h"
 #include "openvswitch/ofp-print.h"
@@ -152,8 +154,8 @@  VLOG_DEFINE_THIS_MODULE(pinctrl);
  *  and pinctrl_run().
  *  'pinctrl_handler_seq' is used by pinctrl_run() to
  *  wake up pinctrl_handler thread from poll_block() if any changes happened
- *  in 'send_garp_rarp_data', 'ipv6_ras' and 'buffered_mac_bindings'
- *  structures.
+ *  in 'send_garp_rarp_data', 'ipv6_ras', 'activated_ports' and
+ *  'buffered_mac_bindings' structures.
  *
  *  'pinctrl_main_seq' is used by pinctrl_handler() thread to wake up
  *  the main thread from poll_block() when mac bindings/igmp groups need to
@@ -198,6 +200,20 @@  static void wait_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn);
 static void send_mac_binding_buffered_pkts(struct rconn *swconn)
     OVS_REQUIRES(pinctrl_mutex);
 
+static void pinctrl_rarp_activation_strategy_handler(struct rconn *swconn,
+                                                     const struct match *md,
+                                                     struct dp_packet *pkt_in);
+
+static void init_activated_ports(void);
+static void destroy_activated_ports(void);
+static void wait_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn);
+static void run_activated_ports(
+    struct ovsdb_idl_txn *ovnsb_idl_txn,
+    struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
+    struct ovsdb_idl_index *sbrec_port_binding_by_name,
+    const struct sbrec_chassis *chassis)
+    OVS_REQUIRES(pinctrl_mutex);
+
 static void init_send_garps_rarps(void);
 static void destroy_send_garps_rarps(void);
 static void send_garp_rarp_wait(long long int send_garp_rarp_time);
@@ -522,6 +538,7 @@  pinctrl_init(void)
     init_ipv6_ras();
     init_ipv6_prefixd();
     init_buffered_packets_map();
+    init_activated_ports();
     init_event_table();
     ip_mcast_snoop_init();
     init_put_vport_bindings();
@@ -3244,6 +3261,13 @@  process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
         ovs_mutex_unlock(&pinctrl_mutex);
         break;
 
+    case ACTION_OPCODE_ACTIVATION_STRATEGY_RARP:
+        ovs_mutex_lock(&pinctrl_mutex);
+        pinctrl_rarp_activation_strategy_handler(swconn, &pin.flow_metadata,
+                                                 &packet);
+        ovs_mutex_unlock(&pinctrl_mutex);
+        break;
+
     default:
         VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
                      ntohl(ah->opcode));
@@ -3508,6 +3532,8 @@  pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
     bfd_monitor_run(ovnsb_idl_txn, bfd_table, sbrec_port_binding_by_name,
                     chassis, active_tunnels);
     run_put_fdbs(ovnsb_idl_txn, sbrec_fdb_by_dp_key_mac);
+    run_activated_ports(ovnsb_idl_txn, sbrec_datapath_binding_by_key,
+                        sbrec_port_binding_by_key, chassis);
     ovs_mutex_unlock(&pinctrl_mutex);
 }
 
@@ -4011,6 +4037,7 @@  pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn)
     int64_t new_seq = seq_read(pinctrl_main_seq);
     seq_wait(pinctrl_main_seq, new_seq);
     wait_put_fdbs(ovnsb_idl_txn);
+    wait_activated_ports(ovnsb_idl_txn);
 }
 
 /* Called by ovn-controller. */
@@ -4025,6 +4052,7 @@  pinctrl_destroy(void)
     destroy_ipv6_ras();
     destroy_ipv6_prefixd();
     destroy_buffered_packets_map();
+    destroy_activated_ports();
     event_table_destroy();
     destroy_put_mac_bindings();
     destroy_put_vport_bindings();
@@ -7702,6 +7730,200 @@  pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
     }
 }
 
+static struct ofpbuf *
+encode_flow_mod(struct ofputil_flow_mod *fm)
+{
+    fm->buffer_id = UINT32_MAX;
+    fm->out_port = OFPP_ANY;
+    fm->out_group = OFPG_ANY;
+    return ofputil_encode_flow_mod(fm, OFPUTIL_P_OF15_OXM);
+}
+
+struct port_pair {
+    uint32_t dp_key;
+    uint32_t port_key;
+    struct ovs_list list;
+};
+
+static struct ovs_list activated_ports;
+
+static void
+init_activated_ports(void)
+{
+    ovs_list_init(&activated_ports);
+}
+
+static void
+destroy_activated_ports(void)
+{
+    struct port_pair *pp;
+    LIST_FOR_EACH_POP (pp, list, &activated_ports) {
+        free(pp);
+    }
+}
+
+static void
+wait_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn)
+{
+    if (ovnsb_idl_txn && !ovs_list_is_empty(&activated_ports)) {
+        poll_immediate_wake();
+    }
+}
+
+bool
+db_is_port_activated(const struct sbrec_port_binding *pb,
+                     const struct sbrec_chassis *chassis)
+{
+    const char *activated_chassis = smap_get(&pb->options,
+                                             "additional-chassis-activated");
+    if (activated_chassis) {
+        char *save_ptr;
+        char *tokstr = xstrdup(activated_chassis);
+        for (const char *chassis_name = strtok_r(tokstr, ",", &save_ptr);
+             chassis_name != NULL;
+             chassis_name = strtok_r(NULL, ",", &save_ptr)) {
+            if (!strcmp(chassis_name, chassis->name)) {
+                free(tokstr);
+                return true;
+            }
+        }
+        free(tokstr);
+    }
+    return false;
+}
+
+static void
+run_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn,
+                    struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
+                    struct ovsdb_idl_index *sbrec_port_binding_by_key,
+                    const struct sbrec_chassis *chassis)
+    OVS_REQUIRES(pinctrl_mutex)
+{
+    if (!ovnsb_idl_txn) {
+        return;
+    }
+
+    const struct port_pair *pp;
+    LIST_FOR_EACH (pp, list, &activated_ports) {
+        const struct sbrec_port_binding *pb = lport_lookup_by_key(
+            sbrec_datapath_binding_by_key, sbrec_port_binding_by_key,
+            pp->dp_key, pp->port_key);
+        if (pb) {
+            if (!db_is_port_activated(pb, chassis)) {
+                const char *activated_chassis = smap_get(
+                    &pb->options, "additional-chassis-activated");
+                char *activated_str;
+                if (activated_chassis) {
+                    activated_str = xasprintf(
+                        "%s,%s", activated_chassis, chassis->name);
+                    sbrec_port_binding_update_options_setkey(
+                        pb, "additional-chassis-activated", activated_str);
+                    free(activated_str);
+                } else {
+                    sbrec_port_binding_update_options_setkey(
+                        pb, "additional-chassis-activated", chassis->name);
+                }
+            }
+        }
+    }
+    destroy_activated_ports();
+}
+
+bool pinctrl_is_port_activated(int64_t dp_key, int64_t port_key)
+    OVS_REQUIRES(pinctrl_mutex)
+{
+    const struct port_pair *pp;
+    LIST_FOR_EACH (pp, list, &activated_ports) {
+        if (pp->dp_key == dp_key && pp->port_key == port_key) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static void
+pinctrl_rarp_activation_strategy_handler(struct rconn *swconn,
+                                         const struct match *md,
+                                         struct dp_packet *pkt_in)
+    OVS_REQUIRES(pinctrl_mutex)
+{
+    /* Admitted; send RARP back to the pipeline. */
+    uint32_t port_key = md->flow.regs[MFF_LOG_INPORT - MFF_REG0];
+    uint32_t dp_key = ntohll(md->flow.metadata);
+
+    uint64_t ofpacts_stub[4096 / 8];
+    struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
+    enum ofp_version version = rconn_get_version(swconn);
+    put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
+    put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
+    struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
+    resubmit->in_port = OFPP_CONTROLLER;
+    resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
+
+    struct ofputil_packet_out po = {
+        .packet = dp_packet_data(pkt_in),
+        .packet_len = dp_packet_size(pkt_in),
+        .buffer_id = UINT32_MAX,
+        .ofpacts = ofpacts.data,
+        .ofpacts_len = ofpacts.size,
+    };
+    match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
+
+    enum ofputil_protocol proto;
+    proto = ofputil_protocol_from_ofp_version(version);
+    queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
+    ofpbuf_uninit(&ofpacts);
+
+    /* Delete ingress/egress drop flows to unblock the port. */
+    struct match match;
+    struct minimatch mmatch;
+
+    match_init_catchall(&match);
+    match_set_metadata(&match, md->flow.metadata);
+    match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0,
+                  md->flow.regs[MFF_LOG_INPORT - MFF_REG0]);
+    minimatch_init(&mmatch, &match);
+
+    struct ofputil_flow_mod fm = {
+        .match = mmatch,
+        .priority = 1000,
+        .table_id = OFTABLE_LOG_INGRESS_PIPELINE,
+        .command = OFPFC_DELETE_STRICT,
+    };
+    queue_msg(swconn, encode_flow_mod(&fm));
+    minimatch_destroy(&mmatch);
+
+    match_init_catchall(&match);
+    match_set_metadata(&match, md->flow.metadata);
+    match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
+                  md->flow.regs[MFF_LOG_INPORT - MFF_REG0]);
+    minimatch_init(&mmatch, &match);
+
+    fm.table_id = OFTABLE_LOG_EGRESS_PIPELINE;
+    queue_msg(swconn, encode_flow_mod(&fm));
+    minimatch_destroy(&mmatch);
+
+    /* Delete inport controller flow (the one that got us here */
+    match_init_catchall(&match);
+    match_set_metadata(&match, md->flow.metadata);
+    match_set_dl_type(&match, htons(ETH_TYPE_RARP));
+    minimatch_init(&mmatch, &match);
+
+    fm.priority = 1010;
+    fm.table_id = OFTABLE_PHY_TO_LOG;
+    queue_msg(swconn, encode_flow_mod(&fm));
+    minimatch_destroy(&mmatch);
+
+    /* Tag the port as activated in-memory. */
+    struct port_pair *pp = xmalloc(sizeof *pp);
+    pp->port_key = port_key;
+    pp->dp_key = dp_key;
+    ovs_list_push_front(&activated_ports, &pp->list);
+
+    /* Notify main thread on pending additional-chassis-activated updates. */
+    notify_pinctrl_main();
+}
+
 static struct hmap put_fdbs;
 
 /* MAC learning (fdb) related functions.  Runs within the main
diff --git a/controller/pinctrl.h b/controller/pinctrl.h
index 88f18e983..1704bc6ba 100644
--- a/controller/pinctrl.h
+++ b/controller/pinctrl.h
@@ -33,6 +33,7 @@  struct sbrec_dns_table;
 struct sbrec_controller_event_table;
 struct sbrec_service_monitor_table;
 struct sbrec_bfd_table;
+struct sbrec_port_binding;
 
 void pinctrl_init(void);
 void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
@@ -56,4 +57,8 @@  void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
 void pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn);
 void pinctrl_destroy(void);
 void pinctrl_set_br_int_name(char *br_int_name);
+
+bool pinctrl_is_port_activated(int64_t dp_key, int64_t port_key);
+bool db_is_port_activated(const struct sbrec_port_binding *pb,
+                          const struct sbrec_chassis *chassis);
 #endif /* controller/pinctrl.h */
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index f55d77d47..0a1865741 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -116,6 +116,7 @@  struct ovn_extend_table;
     OVNACT(PUT_FDB,           ovnact_put_fdb)         \
     OVNACT(GET_FDB,           ovnact_get_fdb)         \
     OVNACT(LOOKUP_FDB,        ovnact_lookup_fdb)      \
+    OVNACT(ACTIVATION_STRATEGY_RARP, ovnact_activation_strategy_rarp) \
 
 /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
 enum OVS_PACKED_ENUM ovnact_type {
@@ -420,6 +421,11 @@  struct ovnact_handle_svc_check {
     struct expr_field port;     /* Logical port name. */
 };
 
+/* OVNACT_ACTIVATION_STRATEGY_RARP. */
+struct ovnact_activation_strategy_rarp {
+    struct ovnact ovnact;
+};
+
 /* OVNACT_FWD_GROUP. */
 struct ovnact_fwd_group {
     struct ovnact ovnact;
@@ -681,6 +687,9 @@  enum action_opcode {
     /* put_fdb(inport, eth.src).
      */
     ACTION_OPCODE_PUT_FDB,
+
+    /* activation_strategy_rarp() */
+    ACTION_OPCODE_ACTIVATION_STRATEGY_RARP,
 };
 
 /* Header. */
diff --git a/lib/actions.c b/lib/actions.c
index a3cf747db..4766b3227 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -3822,6 +3822,41 @@  ovnact_handle_svc_check_free(struct ovnact_handle_svc_check *sc OVS_UNUSED)
 {
 }
 
+static void
+parse_activation_strategy_rarp(struct action_context *ctx OVS_UNUSED)
+{
+     if (!lexer_force_match(ctx->lexer, LEX_T_LPAREN)) {
+        return;
+    }
+
+    ovnact_put_ACTIVATION_STRATEGY_RARP(ctx->ovnacts);
+    lexer_force_match(ctx->lexer, LEX_T_RPAREN);
+}
+
+static void
+format_ACTIVATION_STRATEGY_RARP(
+        const struct ovnact_activation_strategy_rarp *activation OVS_UNUSED,
+        struct ds *s)
+{
+    ds_put_cstr(s, "activation_strategy_rarp();");
+}
+
+static void
+encode_ACTIVATION_STRATEGY_RARP(
+        const struct ovnact_activation_strategy_rarp *activation OVS_UNUSED,
+        const struct ovnact_encode_params *ep,
+        struct ofpbuf *ofpacts)
+{
+    encode_controller_op(ACTION_OPCODE_ACTIVATION_STRATEGY_RARP,
+                         ep->ctrl_meter_id, ofpacts);
+}
+
+static void
+ovnact_activation_strategy_rarp_free(
+    struct ovnact_activation_strategy_rarp *activation OVS_UNUSED)
+{
+}
+
 static void
 parse_fwd_group_action(struct action_context *ctx)
 {
@@ -4376,6 +4411,8 @@  parse_action(struct action_context *ctx)
         parse_bind_vport(ctx);
     } else if (lexer_match_id(ctx->lexer, "handle_svc_check")) {
         parse_handle_svc_check(ctx);
+    } else if (lexer_match_id(ctx->lexer, "activation_strategy_rarp")) {
+        parse_activation_strategy_rarp(ctx);
     } else if (lexer_match_id(ctx->lexer, "fwd_group")) {
         parse_fwd_group_action(ctx);
     } else if (lexer_match_id(ctx->lexer, "handle_dhcpv6_reply")) {
@@ -4619,7 +4656,8 @@  ovnact_op_to_string(uint32_t ovnact_opc)
         ACTION_OPCODE(BIND_VPORT)                   \
         ACTION_OPCODE(DHCP6_SERVER)                 \
         ACTION_OPCODE(HANDLE_SVC_CHECK)             \
-        ACTION_OPCODE(BFD_MSG)
+        ACTION_OPCODE(BFD_MSG)                      \
+        ACTION_OPCODE(ACTIVATION_STRATEGY_RARP)
 #define ACTION_OPCODE(ENUM) \
     case ACTION_OPCODE_##ENUM: return xstrdup(#ENUM);
     ACTION_OPCODES
diff --git a/northd/northd.c b/northd/northd.c
index 55919188e..4fb63edf8 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -3385,6 +3385,16 @@  ovn_port_update_sbrec(struct northd_input *input_data,
                 smap_add(&options, "vlan-passthru", "true");
             }
 
+            /* Retain activated chassis flags. */
+            if (op->sb->requested_additional_chassis) {
+                const char *activated_str = smap_get(
+                    &op->sb->options, "additional-chassis-activated");
+                if (activated_str) {
+                    smap_add(&options, "additional-chassis-activated",
+                             activated_str);
+                }
+            }
+
             sbrec_port_binding_set_options(op->sb, &options);
             smap_destroy(&options);
             if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1cfc2024d..b9ae81acb 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -102,7 +102,10 @@  static const char *rbac_port_binding_auth[] =
 static const char *rbac_port_binding_update[] =
     {"chassis", "additional_chassis",
      "encap", "additional_encap",
-     "up", "virtual_parent"};
+     "up", "virtual_parent",
+     /* NOTE: we only need to update the additional-chassis-activated key,
+      * but RBAC_Role doesn't support mutate operation for subkeys. */
+     "options"};
 
 static const char *rbac_mac_binding_auth[] =
     {""};
diff --git a/ovn-nb.xml b/ovn-nb.xml
index df17f0cbe..97600e2c2 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -1045,6 +1045,17 @@ 
           </p>
         </column>
 
+        <column name="options" key="activation-strategy">
+          If used with multiple chassis set in
+          <ref column="requested-chassis"/>, specifies an activation strategy
+          for all additional chassis. By default, no activation strategy is
+          used, meaning additional port locations are immediately available for
+          use. When set to "rarp", the port is blocked for ingress and egress
+          communication until a RARP packet is sent from a new location. The
+          "rarp" strategy is useful in live migration scenarios for virtual
+          machines.
+        </column>
+
         <column name="options" key="iface-id-ver">
           If set, this port will be bound by <code>ovn-controller</code>
           only if this same key and value is configured in the
diff --git a/ovn-sb.xml b/ovn-sb.xml
index 78db5b7c5..5a1fdbe72 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -3300,6 +3300,21 @@  tcp.flags = RST;
         </p>
       </column>
 
+      <column name="options" key="activation-strategy">
+        If used with multiple chassis set in <ref column="requested-chassis"/>,
+        specifies an activation strategy for all additional chassis. By
+        default, no activation strategy is used, meaning additional port
+        locations are immediately available for use. When set to "rarp", the
+        port is blocked for ingress and egress communication until a RARP
+        packet is sent from a new location. The "rarp" strategy is useful
+        in live migration scenarios for virtual machines.
+      </column>
+
+      <column name="options" key="additional-chassis-activated">
+        When <ref column="activation-strategy"/> is set, this option indicates
+        that the port was activated using the strategy specified.
+      </column>
+
       <column name="options" key="iface-id-ver">
         If set, this port will be bound by <code>ovn-controller</code>
         only if this same key and value is configured in the
diff --git a/tests/ovn.at b/tests/ovn.at
index 1154fdc05..92821d6d4 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -14699,6 +14699,172 @@  OVN_CLEANUP([hv1],[hv2],[hv3])
 AT_CLEANUP
 ])
 
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([options:activation-strategy for logical port])
+ovn_start
+
+net_add n1
+
+sim_add hv1
+as hv1
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.11
+
+sim_add hv2
+as hv2
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.12
+
+sim_add hv3
+as hv3
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.13
+
+# Disable local ARP responder to pass ARP requests through tunnels
+check ovn-nbctl ls-add ls0 -- add Logical_Switch ls0 other_config vlan-passthru=true
+
+check ovn-nbctl lsp-add ls0 migrator
+check ovn-nbctl lsp-set-options migrator requested-chassis=hv1,hv2 \
+                                         activation-strategy=rarp
+
+check ovn-nbctl lsp-add ls0 outside
+check ovn-nbctl lsp-set-options outside requested-chassis=hv3
+
+check ovn-nbctl lsp-set-addresses migrator "00:00:00:00:00:01 10.0.0.1"
+check ovn-nbctl lsp-set-addresses outside "00:00:00:00:00:02 10.0.0.2"
+
+for hv in hv1 hv2; do
+    as $hv check ovs-vsctl -- add-port br-int migrator -- \
+        set Interface migrator external-ids:iface-id=migrator \
+                               options:tx_pcap=$hv/migrator-tx.pcap \
+                               options:rxq_pcap=$hv/migrator-rx.pcap
+done
+as hv3 check ovs-vsctl -- add-port br-int outside -- \
+    set Interface outside external-ids:iface-id=outside
+
+wait_row_count Chassis 1 name=hv1
+wait_row_count Chassis 1 name=hv2
+hv1_uuid=$(fetch_column Chassis _uuid name=hv1)
+hv2_uuid=$(fetch_column Chassis _uuid name=hv2)
+
+wait_column "$hv1_uuid" Port_Binding chassis logical_port=migrator
+wait_column "$hv1_uuid" Port_Binding requested_chassis logical_port=migrator
+wait_column "$hv2_uuid" Port_Binding additional_chassis logical_port=migrator
+wait_column "$hv2_uuid" Port_Binding requested_additional_chassis logical_port=migrator
+
+OVN_POPULATE_ARP
+
+send_arp() {
+    local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
+    local request=${eth_dst}${eth_src}08060001080006040001${eth_src}${spa}${eth_dst}${tpa}
+    as ${hv} ovs-appctl netdev-dummy/receive $inport $request
+    echo "${request}"
+}
+
+send_rarp() {
+    local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
+    local request=${eth_dst}${eth_src}80350001080006040001${eth_src}${spa}${eth_dst}${tpa}
+    as ${hv} ovs-appctl netdev-dummy/receive $inport $request
+    echo "${request}"
+}
+
+reset_pcap_file() {
+    local hv=$1
+    local iface=$2
+    local pcap_file=$3
+    as $hv check ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \
+                                                   options:rxq_pcap=dummy-rx.pcap
+    check rm -f ${pcap_file}*.pcap
+    as $hv check ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap \
+                                                   options:rxq_pcap=${pcap_file}-rx.pcap
+}
+
+reset_env() {
+    reset_pcap_file hv1 migrator hv1/migrator
+    reset_pcap_file hv2 migrator hv2/migrator
+    reset_pcap_file hv3 outside hv3/outside
+
+    for port in hv1/migrator hv2/migrator hv3/outside; do
+        : > $port.expected
+    done
+}
+
+check_packets() {
+    OVN_CHECK_PACKETS([hv1/migrator-tx.pcap], [hv1/migrator.expected])
+    OVN_CHECK_PACKETS([hv2/migrator-tx.pcap], [hv2/migrator.expected])
+    OVN_CHECK_PACKETS([hv3/outside-tx.pcap], [hv3/outside.expected])
+}
+
+migrator_spa=$(ip_to_hex 10 0 0 1)
+outside_spa=$(ip_to_hex 10 0 0 2)
+
+reset_env
+
+# Packet from hv3:Outside arrives to hv1:Migrator
+# hv3:Outside cannot reach hv2:Migrator because it is blocked by RARP strategy
+request=$(send_arp hv3 outside 000000000002 000000000001 $outside_spa $migrator_spa)
+echo $request >> hv1/migrator.expected
+
+check_packets
+reset_env
+
+# Packet from hv1:Migrator arrives to hv3:Outside
+request=$(send_arp hv1 migrator 000000000001 000000000002 $migrator_spa $outside_spa)
+echo $request >> hv3/outside.expected
+
+check_packets
+reset_env
+
+# hv2:Migrator cannot reach to hv3:Outside because it is blocked by RARP strategy
+request=$(send_arp hv2 migrator 000000000001 000000000002 $migrator_spa $outside_spa)
+
+check_packets
+reset_env
+
+AT_CHECK([ovn-sbctl find port_binding logical_port=migrator | grep -q additional-chassis-activated], [1])
+
+# Now activate hv2:Migrator location
+request=$(send_rarp hv2 migrator 000000000001 ffffffffffff $migrator_spa $migrator_spa)
+
+# RARP was reinjected into the pipeline
+echo $request >> hv3/outside.expected
+
+check_packets
+reset_env
+
+pb_uuid=$(ovn-sbctl --bare --columns _uuid find Port_Binding logical_port=migrator)
+OVS_WAIT_UNTIL([test xhv2 = x$(ovn-sbctl get Port_Binding $pb_uuid options:additional-chassis-activated | tr -d '""')])
+
+# Now packet arrives to both locations
+request=$(send_arp hv3 outside 000000000002 000000000001 $outside_spa $migrator_spa)
+echo $request >> hv1/migrator.expected
+echo $request >> hv2/migrator.expected
+
+check_packets
+reset_env
+
+# Packet from hv1:Migrator still arrives to hv3:Outside
+request=$(send_arp hv1 migrator 000000000001 000000000002 $migrator_spa $outside_spa)
+echo $request >> hv3/outside.expected
+
+check_packets
+reset_env
+
+# hv2:Migrator can now reach to hv3:Outside because RARP strategy activated it
+request=$(send_arp hv2 migrator 000000000001 000000000002 $migrator_spa $outside_spa)
+echo $request >> hv3/outside.expected
+
+check_packets
+
+# complete port migration and check that -activated flag is reset
+check ovn-nbctl lsp-set-options migrator requested-chassis=hv2
+OVS_WAIT_UNTIL([test x = x$(ovn-sbctl get Port_Binding $pb_uuid options:additional-chassis-activated)])
+
+OVN_CLEANUP([hv1],[hv2])
+
+AT_CLEANUP
+])
+
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([options:requested-chassis for logical port])
 ovn_start
diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
index b5116e969..ef0bd566f 100644
--- a/utilities/ovn-trace.c
+++ b/utilities/ovn-trace.c
@@ -2885,6 +2885,9 @@  trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
             /* Nothing to do for tracing. */
             break;
 
+        case OVNACT_ACTIVATION_STRATEGY_RARP:
+            break;
+
         case OVNACT_GET_FDB:
             execute_get_fdb(ovnact_get_GET_FDB(a), dp, uflow);
             break;