[ovs-dev,v2,ovn,2/3] Forwarding group to load balance l2 traffic with liveness detection
diff mbox series

Message ID 20200111004027.21707-3-manoj.sharma@nutanix.com
State New
Headers show
Series
  • Forwarding group to load balance l2 traffic with liveness detection
Related show

Commit Message

Manoj Sharma Jan. 11, 2020, 12:40 a.m. UTC
Add forwarding group support for a logical switch. It will add a new OVN
action "fwd_group" with a virtual IP and virtual MAC. Any number of logical
switch ports of this switch can be added to this forwarding group.
If traffic has to be load balanced across these logical switch ports then
traffic should be sent to forwarding group's virtual IP.

If the logical switch ports correspond to tunnel interfaces and BFD
can be enabled on these tunnel interfaces, then liveness can be enabled
for the forwarding group so that if any of the tunnel is down then
the corresponding logical switch port will not be selected during
load balancing.

Signed-off-by: Manoj Sharma <manoj.sharma@nutanix.com>
---
 controller/lflow.c    |  20 +++++++++
 controller/physical.c |  13 ++++++
 controller/physical.h |   4 ++
 include/ovn/actions.h |  19 +++++++-
 lib/actions.c         | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++
 northd/ovn-northd.c   |  63 ++++++++++++++++++++++++++
 utilities/ovn-trace.c |   3 ++
 7 files changed, 243 insertions(+), 1 deletion(-)

Comments

Numan Siddique Jan. 16, 2020, 3:53 p.m. UTC | #1
On Sat, Jan 11, 2020 at 6:11 AM Manoj Sharma <manoj.sharma@nutanix.com>
wrote:

> Add forwarding group support for a logical switch. It will add a new OVN
> action "fwd_group" with a virtual IP and virtual MAC. Any number of logical
> switch ports of this switch can be added to this forwarding group.
> If traffic has to be load balanced across these logical switch ports then
> traffic should be sent to forwarding group's virtual IP.
>
> If the logical switch ports correspond to tunnel interfaces and BFD
> can be enabled on these tunnel interfaces, then liveness can be enabled
> for the forwarding group so that if any of the tunnel is down then
> the corresponding logical switch port will not be selected during
> load balancing.
>
> Signed-off-by: Manoj Sharma <manoj.sharma@nutanix.com>
>

Hi Manoj,

As I mentioned in the earlier email, please add the detailed description in
patch 0 to this patch.

How about defining the action - fwd_group as - fwd_group(liveliness=true,
childports=p1,p2,...) ?

Normally in the OVN action, we use ";" for inner OpenFlow actions, but
these are more like arguments
to the action - fwd_group.

In your setup, you would manually set the child port's chassis yourself ?
i.e ovn-sbctl lsp-bind <child port> <external router chassis>  right ?

You need to add the relevant documentation in ovn-northd.8.xml for the
logical flows added

You need to add documentation for the action - fwd_group in ovn-sb.xml.

Also please update the NEWS file about this feature.

Please see below for few comments.

---
>  controller/lflow.c    |  20 +++++++++
>  controller/physical.c |  13 ++++++
>  controller/physical.h |   4 ++
>  include/ovn/actions.h |  19 +++++++-
>  lib/actions.c         | 122
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  northd/ovn-northd.c   |  63 ++++++++++++++++++++++++++
>  utilities/ovn-trace.c |   3 ++
>  7 files changed, 243 insertions(+), 1 deletion(-)
>
> diff --git a/controller/lflow.c b/controller/lflow.c
> index a689320..49dfa06 100644
> --- a/controller/lflow.c
> +++ b/controller/lflow.c
> @@ -103,6 +103,25 @@ lookup_port_cb(const void *aux_, const char
> *port_name, unsigned int *portp)
>      return false;
>  }
>
> +/* Given the OVN port name, get its openflow port */
> +static bool
> +tunnel_ofport_cb(const void *aux_, const char *port_name, ofp_port_t
> *ofport)
> +{
> +    const struct lookup_port_aux *aux = aux_;
> +
> +    const struct sbrec_port_binding *pb
> +        = lport_lookup_by_name(aux->sbrec_port_binding_by_name,
> port_name);
> +    if (!pb || (pb->datapath != aux->dp) || !pb->chassis) {
> +        return false;
> +    }
> +
> +    if (!get_tunnel_ofport(pb->chassis->name, NULL, ofport)) {
> +        return false;
> +    }
> +
> +    return true;
> +}
> +
>  static bool
>  is_chassis_resident_cb(const void *c_aux_, const char *port_name)
>  {
> @@ -681,6 +700,7 @@ consider_logical_flow(
>      struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
>      struct ovnact_encode_params ep = {
>          .lookup_port = lookup_port_cb,
> +        .tunnel_ofport = tunnel_ofport_cb,
>          .aux = &aux,
>          .is_switch = is_switch(ldp),
>          .group_table = group_table,
> diff --git a/controller/physical.c b/controller/physical.c
> index 500d419..af1d10f 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -1794,3 +1794,16 @@ physical_run(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
>
>      simap_destroy(&new_tunnel_to_ofport);
>  }
> +
> +bool
> +get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t
> *ofport)
> +{
> +    struct chassis_tunnel *tun = NULL;
> +    tun = chassis_tunnel_find(chassis_name, encap_ip);
> +    if (!tun) {
> +        return false;
> +    }
> +
> +    *ofport = tun->ofport;
> +    return true;
> +}
> diff --git a/controller/physical.h b/controller/physical.h
> index c93f6b1..c0e17cd 100644
> --- a/controller/physical.h
> +++ b/controller/physical.h
> @@ -72,4 +72,8 @@ void physical_handle_mc_group_changes(
>          const struct simap *ct_zones,
>          const struct hmap *local_datapaths,
>          struct ovn_desired_flow_table *);
> +bool get_tunnel_ofport(
> +        const char *chassis_name,
> +        char *encap_ip,
> +        ofp_port_t *ofport);
>  #endif /* controller/physical.h */
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index 047a8d7..2d39239 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -89,7 +89,8 @@ struct ovn_extend_table;
>      OVNACT(CHECK_PKT_LARGER,  ovnact_check_pkt_larger) \
>      OVNACT(TRIGGER_EVENT,     ovnact_controller_event) \
>      OVNACT(BIND_VPORT,        ovnact_bind_vport)       \
> -    OVNACT(HANDLE_SVC_CHECK,  ovnact_handle_svc_check)
> +    OVNACT(HANDLE_SVC_CHECK,  ovnact_handle_svc_check) \
> +    OVNACT(FWD_GROUP,         ovnact_fwd_group)
>
>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>  enum OVS_PACKED_ENUM ovnact_type {
> @@ -359,6 +360,15 @@ struct ovnact_handle_svc_check {
>      struct expr_field port;     /* Logical port name. */
>  };
>
> +/* OVNACT_FWD_GROUP. */
> +struct ovnact_fwd_group {
> +    struct ovnact ovnact;
> +    bool liveness;
> +    char **child_ports;       /* Logical ports */
> +    size_t n_child_ports;
> +    uint8_t ltable;           /* Logical table ID of next table. */
> +};
> +
>  /* Internal use by the helpers below. */
>  void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
>  void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
> @@ -620,6 +630,13 @@ struct ovnact_encode_params {
>       * '*portp' and returns true; otherwise, returns false. */
>      bool (*lookup_port)(const void *aux, const char *port_name,
>                          unsigned int *portp);
> +
> +    /* Looks up tunnel port to a chassis by its port name.  If found,
> stores
> +     * its openflow port number in '*ofport' and returns true;
> +     * otherwise, returns false. */
> +    bool (*tunnel_ofport)(const void *aux, const char *port_name,
> +                          ofp_port_t *ofport);
> +
>      const void *aux;
>
>      /* 'true' if the flow is for a switch. */
> diff --git a/lib/actions.c b/lib/actions.c
> index 051e6c8..73dade9 100644
> --- a/lib/actions.c
> +++ b/lib/actions.c
> @@ -2854,6 +2854,126 @@ ovnact_handle_svc_check_free(struct
> ovnact_handle_svc_check *sc OVS_UNUSED)
>  {
>  }
>
> +static void
> +parse_fwd_group_action(struct action_context *ctx)
> +{
> +    char *child_port, **child_port_list = NULL;
> +    size_t allocated_ports = 0;
> +    size_t n_child_ports = 0;
> +    bool liveness = false;
> +
> +    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> +        if (lexer_match_id(ctx->lexer, "liveness")) {
> +            liveness = true;
> +            lexer_force_match(ctx->lexer, LEX_T_SEMICOLON);
> +        }
> +        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> +            if (ctx->lexer->token.type != LEX_T_STRING) {
> +                lexer_syntax_error(ctx->lexer,
> +                                   "expecting logical switch port");
> +                return;
>

I think there is a memory leak here, if it returns in the middle of parsing.


> +            }
> +            /* Parse child's logical ports */
> +            child_port = xstrdup(ctx->lexer->token.s);
> +            lexer_get(ctx->lexer);
> +            lexer_match(ctx->lexer, LEX_T_COMMA);
> +
> +            if (n_child_ports >= allocated_ports) {
> +                child_port_list = x2nrealloc(child_port_list,
> &allocated_ports,
> +                                             sizeof *child_port_list);
> +            }
> +            child_port_list[n_child_ports++] = child_port;
> +        }
> +    }
> +
> +    struct ovnact_fwd_group *fwd_group =
> ovnact_put_FWD_GROUP(ctx->ovnacts);
> +    fwd_group->ltable = ctx->pp->cur_ltable + 1;
> +    fwd_group->liveness = liveness;
> +    fwd_group->child_ports = child_port_list;
> +    fwd_group->n_child_ports = n_child_ports;
> +}
> +
> +static void
> +format_FWD_GROUP(const struct ovnact_fwd_group *fwd_group, struct ds *s)
> +{
> +    ds_put_cstr(s, "fwd_group(");
> +    if (fwd_group->liveness) {
> +        ds_put_cstr(s, "liveness;");
> +    }
> +    if (fwd_group->n_child_ports) {
> +        for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
> +            if (i) {
> +                ds_put_cstr(s, ", ");
> +            }
> +
> +            ds_put_format(s, "%s", fwd_group->child_ports[i]);
> +        }
> +    }
> +    ds_put_cstr(s, ");");
> +}
> +
> +static void
> +encode_FWD_GROUP(const struct ovnact_fwd_group *fwd_group,
> +                 const struct ovnact_encode_params *ep,
> +                 struct ofpbuf *ofpacts)
> +{
> +    if (!fwd_group->n_child_ports) {
> +        /* Nothing to do without child ports */
> +        return;
> +    }
> +
> +    uint32_t reg_index = MFF_LOG_OUTPORT - MFF_REG0;
> +    struct ds ds = DS_EMPTY_INITIALIZER;
> +
> +    ds_put_format(&ds, "type=select,selection_method=dp_hash");
> +
> +    for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
> +        uint32_t  port_tunnel_key;
> +        ofp_port_t ofport;
> +
> +        const char *port_name = fwd_group->child_ports[i];
> +
> +        /* Find the tunnel key of the logical port */
> +        if (!ep->lookup_port(ep->aux, port_name, &port_tunnel_key)) {
> +            return;
> +        }
> +        ds_put_format(&ds, ",bucket=");
> +
> +        if (fwd_group->liveness) {
> +            /* Find the openflow port number of the tunnel port */
> +            if (!ep->tunnel_ofport(ep->aux, port_name, &ofport)) {
> +                return;
> +            }
> +
> +            /* Watch port for failure, used with BFD */
> +            ds_put_format(&ds, "watch_port:%d,", ofport);
> +        }
> +
> +        ds_put_format(&ds, "load=0x%d->NXM_NX_REG%d[0..15]",
> +                      port_tunnel_key, reg_index);
> +        ds_put_format(&ds, ",resubmit(,%d)", ep->output_ptable);
> +    }
> +
> +    uint32_t table_id = 0;
> +    struct ofpact_group *og;
> +    table_id = ovn_extend_table_assign_id(ep->group_table, ds_cstr(&ds),
> +                                          ep->lflow_uuid);
> +    ds_destroy(&ds);
> +    if (table_id == EXT_TABLE_ID_INVALID) {
> +        return;
> +    }
> +
> +    /* Create an action to set the group */
> +    og = ofpact_put_GROUP(ofpacts);
> +    og->group_id = table_id;
> +}
> +
> +static void
> +ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
> +{
> +    free(fwd_group->child_ports);
> +}
> +
>  /* Parses an assignment or exchange or put_dhcp_opts action. */
>  static void
>  parse_set_action(struct action_context *ctx)
> @@ -2973,6 +3093,8 @@ parse_action(struct action_context *ctx)
>          parse_bind_vport(ctx);
>      } else if (lexer_match_id(ctx->lexer, "handle_svc_check")) {
>          parse_handle_svc_check(ctx);
> +    } else if (lexer_match_id(ctx->lexer, "fwd_group")) {
> +        parse_fwd_group_action(ctx);
>      } else {
>          lexer_syntax_error(ctx->lexer, "expecting action");
>      }
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index b6dc809..29bb27a 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -5419,6 +5419,60 @@ build_stateful(struct ovn_datapath *od, struct hmap
> *lflows, struct hmap *lbs)
>  }
>
>  static void
> +build_fwd_group_lflows(struct ovn_datapath *od, struct hmap *lflows)
> +{
> +    struct ds match = DS_EMPTY_INITIALIZER;
> +    struct ds actions = DS_EMPTY_INITIALIZER;
> +
> +    for (int i = 0; i < od->nbs->n_forwarding_groups; ++i) {
> +        const struct nbrec_forwarding_group *fwd_group = NULL;
> +        fwd_group = od->nbs->forwarding_groups[i];
> +        if (!fwd_group || (fwd_group->n_child_port == 0)) {
> +            continue;
> +        }
> +
> +        /* ARP responder for the forwarding group's virtual IP */
> +        ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
> +                      fwd_group->vip);
> +        ds_put_format(&actions,
> +            "eth.dst = eth.src; "
> +            "eth.src = %s; "
> +            "arp.op = 2; /* ARP reply */ "
> +            "arp.tha = arp.sha; "
> +            "arp.sha = %s; "
> +            "arp.tpa = arp.spa; "
> +            "arp.spa = %s; "
> +            "outport = inport; "
> +            "flags.loopback = 1; "
> +            "output;",
> +            fwd_group->vmac, fwd_group->vmac, fwd_group->vip);
> +
> +        ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 50,
> +                      ds_cstr(&match), ds_cstr(&actions));
> +
> +        /* L2 lookup for the forwarding group's virtual MAC */
> +        ds_clear(&match);
> +        ds_put_format(&match, "eth.dst == %s", fwd_group->vmac);
> +
> +        /* Create a comma separated string of child ports */
> +        struct ds group_ports = DS_EMPTY_INITIALIZER;
> +        if (fwd_group->liveness) {
> +            ds_put_cstr(&group_ports, "liveness;");
> +        }
> +        for (i = 0; i < (fwd_group->n_child_port - 1); ++i) {
> +            ds_put_format(&group_ports, "\"%s\",",
> fwd_group->child_port[i]);
> +        }
> +        ds_put_format(&group_ports, "\"%s\"",
> +                      fwd_group->child_port[fwd_group->n_child_port - 1]);
> +
> +        ds_clear(&actions);
> +        ds_put_format(&actions, "fwd_group(%s);", group_ports.string);
>

Can you please use "ds_cstr()" instead of directly accessing the string.



> +        ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 50,
> +                      ds_cstr(&match), ds_cstr(&actions));
> +    }
> +}
> +
> +static void
>  build_lrouter_groups__(struct hmap *ports, struct ovn_datapath *od)
>  {
>      ovs_assert((od && od->nbr && od->lr_group));
> @@ -5718,6 +5772,15 @@ build_lswitch_flows(struct hmap *datapaths, struct
> hmap *ports,
>          build_stateful(od, lflows, lbs);
>      }
>
> +    /* Build logical flows for the forwarding groups */
> +    HMAP_FOR_EACH (od, key_node, datapaths) {
> +        if (!od->nbs) {
>
+            continue;
> +        }

How about
           if(!ods->nbs || !ods->n_forwarding_groups)  {
                 continue;
           }

+
> +        build_fwd_group_lflows(od, lflows);
> +    }
> +
>      /* Logical switch ingress table 0: Admission control framework
> (priority
>       * 100). */
>      HMAP_FOR_EACH (od, key_node, datapaths) {
> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
> index 2645438..a40700a 100644
> --- a/utilities/ovn-trace.c
> +++ b/utilities/ovn-trace.c
> @@ -2224,6 +2224,9 @@ trace_actions(const struct ovnact *ovnacts, size_t
> ovnacts_len,
>
>          case OVNACT_HANDLE_SVC_CHECK:
>              break;
> +
> +        case OVNACT_FWD_GROUP:
> +            break;
>

It would be nice if you could add ovn-trace support for this if its
possible.
A follow up patch would do too. Otherwise this would become a technical
debt  :).



>          }
>      }
>      ds_destroy(&s);
> --
> 1.8.3.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Manoj Sharma Jan. 20, 2020, 2:46 p.m. UTC | #2
Hi Numan,

Please see my answers inline.

From: Numan Siddique <numans@ovn.org>
Date: Thursday, January 16, 2020 at 7:53 AM
To: Manoj Sharma <manoj.sharma@nutanix.com>
Cc: "ovs-dev@openvswitch.org" <ovs-dev@openvswitch.org>
Subject: Re: [ovs-dev] [PATCH v2 ovn 2/3] Forwarding group to load balance l2 traffic with liveness detection



On Sat, Jan 11, 2020 at 6:11 AM Manoj Sharma <manoj.sharma@nutanix.com<mailto:manoj.sharma@nutanix.com>> wrote:
Add forwarding group support for a logical switch. It will add a new OVN
action "fwd_group" with a virtual IP and virtual MAC. Any number of logical
switch ports of this switch can be added to this forwarding group.
If traffic has to be load balanced across these logical switch ports then
traffic should be sent to forwarding group's virtual IP.

If the logical switch ports correspond to tunnel interfaces and BFD
can be enabled on these tunnel interfaces, then liveness can be enabled
for the forwarding group so that if any of the tunnel is down then
the corresponding logical switch port will not be selected during
load balancing.

Signed-off-by: Manoj Sharma <manoj.sharma@nutanix.com<mailto:manoj.sharma@nutanix.com>>

Hi Manoj,

As I mentioned in the earlier email, please add the detailed description in patch 0 to this patch.

How about defining the action - fwd_group as - fwd_group(liveliness=true, childports=p1,p2,...) ?

Normally in the OVN action, we use ";" for inner OpenFlow actions, but these are more like arguments
to the action - fwd_group.

Thank you for the suggestion. I am sending the new changes in v3.

In your setup, you would manually set the child port's chassis yourself ?
i.e ovn-sbctl lsp-bind <child port> <external router chassis>  right ?

Yes, if the chassis is not running ovn-controller.

You need to add the relevant documentation in ovn-northd.8.xml for the logical flows added

You need to add documentation for the action - fwd_group in ovn-sb.xml.

Also please update the NEWS file about this feature.

Done

Please see below for few comments.

---
 controller/lflow.c    |  20 +++++++++
 controller/physical.c |  13 ++++++
 controller/physical.h |   4 ++
 include/ovn/actions.h |  19 +++++++-
 lib/actions.c         | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++
 northd/ovn-northd.c   |  63 ++++++++++++++++++++++++++
 utilities/ovn-trace.c |   3 ++
 7 files changed, 243 insertions(+), 1 deletion(-)

diff --git a/controller/lflow.c b/controller/lflow.c
index a689320..49dfa06 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -103,6 +103,25 @@ lookup_port_cb(const void *aux_, const char *port_name, unsigned int *portp)
     return false;
 }

+/* Given the OVN port name, get its openflow port */
+static bool
+tunnel_ofport_cb(const void *aux_, const char *port_name, ofp_port_t *ofport)
+{
+    const struct lookup_port_aux *aux = aux_;
+
+    const struct sbrec_port_binding *pb
+        = lport_lookup_by_name(aux->sbrec_port_binding_by_name, port_name);
+    if (!pb || (pb->datapath != aux->dp) || !pb->chassis) {
+        return false;
+    }
+
+    if (!get_tunnel_ofport(pb->chassis->name, NULL, ofport)) {
+        return false;
+    }
+
+    return true;
+}
+
 static bool
 is_chassis_resident_cb(const void *c_aux_, const char *port_name)
 {
@@ -681,6 +700,7 @@ consider_logical_flow(
     struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
     struct ovnact_encode_params ep = {
         .lookup_port = lookup_port_cb,
+        .tunnel_ofport = tunnel_ofport_cb,
         .aux = &aux,
         .is_switch = is_switch(ldp),
         .group_table = group_table,
diff --git a/controller/physical.c b/controller/physical.c
index 500d419..af1d10f 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -1794,3 +1794,16 @@ physical_run(struct ovsdb_idl_index *sbrec_port_binding_by_name,

     simap_destroy(&new_tunnel_to_ofport);
 }
+
+bool
+get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t *ofport)
+{
+    struct chassis_tunnel *tun = NULL;
+    tun = chassis_tunnel_find(chassis_name, encap_ip);
+    if (!tun) {
+        return false;
+    }
+
+    *ofport = tun->ofport;
+    return true;
+}
diff --git a/controller/physical.h b/controller/physical.h
index c93f6b1..c0e17cd 100644
--- a/controller/physical.h
+++ b/controller/physical.h
@@ -72,4 +72,8 @@ void physical_handle_mc_group_changes(
         const struct simap *ct_zones,
         const struct hmap *local_datapaths,
         struct ovn_desired_flow_table *);
+bool get_tunnel_ofport(
+        const char *chassis_name,
+        char *encap_ip,
+        ofp_port_t *ofport);
 #endif /* controller/physical.h */
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 047a8d7..2d39239 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -89,7 +89,8 @@ struct ovn_extend_table;
     OVNACT(CHECK_PKT_LARGER,  ovnact_check_pkt_larger) \
     OVNACT(TRIGGER_EVENT,     ovnact_controller_event) \
     OVNACT(BIND_VPORT,        ovnact_bind_vport)       \
-    OVNACT(HANDLE_SVC_CHECK,  ovnact_handle_svc_check)
+    OVNACT(HANDLE_SVC_CHECK,  ovnact_handle_svc_check) \
+    OVNACT(FWD_GROUP,         ovnact_fwd_group)

 /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
 enum OVS_PACKED_ENUM ovnact_type {
@@ -359,6 +360,15 @@ struct ovnact_handle_svc_check {
     struct expr_field port;     /* Logical port name. */
 };

+/* OVNACT_FWD_GROUP. */
+struct ovnact_fwd_group {
+    struct ovnact ovnact;
+    bool liveness;
+    char **child_ports;       /* Logical ports */
+    size_t n_child_ports;
+    uint8_t ltable;           /* Logical table ID of next table. */
+};
+
 /* Internal use by the helpers below. */
 void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
 void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
@@ -620,6 +630,13 @@ struct ovnact_encode_params {
      * '*portp' and returns true; otherwise, returns false. */
     bool (*lookup_port)(const void *aux, const char *port_name,
                         unsigned int *portp);
+
+    /* Looks up tunnel port to a chassis by its port name.  If found, stores
+     * its openflow port number in '*ofport' and returns true;
+     * otherwise, returns false. */
+    bool (*tunnel_ofport)(const void *aux, const char *port_name,
+                          ofp_port_t *ofport);
+
     const void *aux;

     /* 'true' if the flow is for a switch. */
diff --git a/lib/actions.c b/lib/actions.c
index 051e6c8..73dade9 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -2854,6 +2854,126 @@ ovnact_handle_svc_check_free(struct ovnact_handle_svc_check *sc OVS_UNUSED)
 {
 }

+static void
+parse_fwd_group_action(struct action_context *ctx)
+{
+    char *child_port, **child_port_list = NULL;
+    size_t allocated_ports = 0;
+    size_t n_child_ports = 0;
+    bool liveness = false;
+
+    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
+        if (lexer_match_id(ctx->lexer, "liveness")) {
+            liveness = true;
+            lexer_force_match(ctx->lexer, LEX_T_SEMICOLON);
+        }
+        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
+            if (ctx->lexer->token.type != LEX_T_STRING) {
+                lexer_syntax_error(ctx->lexer,
+                                   "expecting logical switch port");
+                return;

I think there is a memory leak here, if it returns in the middle of parsing.

It should get freed at the end of the command. I have not seen any explicit free in case of lexer error.

+            }
+            /* Parse child's logical ports */
+            child_port = xstrdup(ctx->lexer->token.s);
+            lexer_get(ctx->lexer);
+            lexer_match(ctx->lexer, LEX_T_COMMA);
+
+            if (n_child_ports >= allocated_ports) {
+                child_port_list = x2nrealloc(child_port_list, &allocated_ports,
+                                             sizeof *child_port_list);
+            }
+            child_port_list[n_child_ports++] = child_port;
+        }
+    }
+
+    struct ovnact_fwd_group *fwd_group = ovnact_put_FWD_GROUP(ctx->ovnacts);
+    fwd_group->ltable = ctx->pp->cur_ltable + 1;
+    fwd_group->liveness = liveness;
+    fwd_group->child_ports = child_port_list;
+    fwd_group->n_child_ports = n_child_ports;
+}
+
+static void
+format_FWD_GROUP(const struct ovnact_fwd_group *fwd_group, struct ds *s)
+{
+    ds_put_cstr(s, "fwd_group(");
+    if (fwd_group->liveness) {
+        ds_put_cstr(s, "liveness;");
+    }
+    if (fwd_group->n_child_ports) {
+        for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
+            if (i) {
+                ds_put_cstr(s, ", ");
+            }
+
+            ds_put_format(s, "%s", fwd_group->child_ports[i]);
+        }
+    }
+    ds_put_cstr(s, ");");
+}
+
+static void
+encode_FWD_GROUP(const struct ovnact_fwd_group *fwd_group,
+                 const struct ovnact_encode_params *ep,
+                 struct ofpbuf *ofpacts)
+{
+    if (!fwd_group->n_child_ports) {
+        /* Nothing to do without child ports */
+        return;
+    }
+
+    uint32_t reg_index = MFF_LOG_OUTPORT - MFF_REG0;
+    struct ds ds = DS_EMPTY_INITIALIZER;
+
+    ds_put_format(&ds, "type=select,selection_method=dp_hash");
+
+    for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
+        uint32_t  port_tunnel_key;
+        ofp_port_t ofport;
+
+        const char *port_name = fwd_group->child_ports[i];
+
+        /* Find the tunnel key of the logical port */
+        if (!ep->lookup_port(ep->aux, port_name, &port_tunnel_key)) {
+            return;
+        }
+        ds_put_format(&ds, ",bucket=");
+
+        if (fwd_group->liveness) {
+            /* Find the openflow port number of the tunnel port */
+            if (!ep->tunnel_ofport(ep->aux, port_name, &ofport)) {
+                return;
+            }
+
+            /* Watch port for failure, used with BFD */
+            ds_put_format(&ds, "watch_port:%d,", ofport);
+        }
+
+        ds_put_format(&ds, "load=0x%d->NXM_NX_REG%d[0..15]",
+                      port_tunnel_key, reg_index);
+        ds_put_format(&ds, ",resubmit(,%d)", ep->output_ptable);
+    }
+
+    uint32_t table_id = 0;
+    struct ofpact_group *og;
+    table_id = ovn_extend_table_assign_id(ep->group_table, ds_cstr(&ds),
+                                          ep->lflow_uuid);
+    ds_destroy(&ds);
+    if (table_id == EXT_TABLE_ID_INVALID) {
+        return;
+    }
+
+    /* Create an action to set the group */
+    og = ofpact_put_GROUP(ofpacts);
+    og->group_id = table_id;
+}
+
+static void
+ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
+{
+    free(fwd_group->child_ports);
+}
+
 /* Parses an assignment or exchange or put_dhcp_opts action. */
 static void
 parse_set_action(struct action_context *ctx)
@@ -2973,6 +3093,8 @@ parse_action(struct action_context *ctx)
         parse_bind_vport(ctx);
     } else if (lexer_match_id(ctx->lexer, "handle_svc_check")) {
         parse_handle_svc_check(ctx);
+    } else if (lexer_match_id(ctx->lexer, "fwd_group")) {
+        parse_fwd_group_action(ctx);
     } else {
         lexer_syntax_error(ctx->lexer, "expecting action");
     }
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index b6dc809..29bb27a 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -5419,6 +5419,60 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
 }

 static void
+build_fwd_group_lflows(struct ovn_datapath *od, struct hmap *lflows)
+{
+    struct ds match = DS_EMPTY_INITIALIZER;
+    struct ds actions = DS_EMPTY_INITIALIZER;
+
+    for (int i = 0; i < od->nbs->n_forwarding_groups; ++i) {
+        const struct nbrec_forwarding_group *fwd_group = NULL;
+        fwd_group = od->nbs->forwarding_groups[i];
+        if (!fwd_group || (fwd_group->n_child_port == 0)) {
+            continue;
+        }
+
+        /* ARP responder for the forwarding group's virtual IP */
+        ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
+                      fwd_group->vip);
+        ds_put_format(&actions,
+            "eth.dst = eth.src; "
+            "eth.src = %s; "
+            "arp.op = 2; /* ARP reply */ "
+            "arp.tha = arp.sha; "
+            "arp.sha = %s; "
+            "arp.tpa = arp.spa; "
+            "arp.spa = %s; "
+            "outport = inport; "
+            "flags.loopback = 1; "
+            "output;",
+            fwd_group->vmac, fwd_group->vmac, fwd_group->vip);
+
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 50,
+                      ds_cstr(&match), ds_cstr(&actions));
+
+        /* L2 lookup for the forwarding group's virtual MAC */
+        ds_clear(&match);
+        ds_put_format(&match, "eth.dst == %s", fwd_group->vmac);
+
+        /* Create a comma separated string of child ports */
+        struct ds group_ports = DS_EMPTY_INITIALIZER;
+        if (fwd_group->liveness) {
+            ds_put_cstr(&group_ports, "liveness;");
+        }
+        for (i = 0; i < (fwd_group->n_child_port - 1); ++i) {
+            ds_put_format(&group_ports, "\"%s\",", fwd_group->child_port[i]);
+        }
+        ds_put_format(&group_ports, "\"%s\"",
+                      fwd_group->child_port[fwd_group->n_child_port - 1]);
+
+        ds_clear(&actions);
+        ds_put_format(&actions, "fwd_group(%s);", group_ports.string);

Can you please use "ds_cstr()" instead of directly accessing the string.
Done

+        ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 50,
+                      ds_cstr(&match), ds_cstr(&actions));
+    }
+}
+
+static void
 build_lrouter_groups__(struct hmap *ports, struct ovn_datapath *od)
 {
     ovs_assert((od && od->nbr && od->lr_group));
@@ -5718,6 +5772,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
         build_stateful(od, lflows, lbs);
     }

+    /* Build logical flows for the forwarding groups */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbs) {
+            continue;
+        }
How about
           if(!ods->nbs || !ods->n_forwarding_groups)  {
                 continue;
           }
Right, done.

+
+        build_fwd_group_lflows(od, lflows);
+    }
+
     /* Logical switch ingress table 0: Admission control framework (priority
      * 100). */
     HMAP_FOR_EACH (od, key_node, datapaths) {
diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
index 2645438..a40700a 100644
--- a/utilities/ovn-trace.c
+++ b/utilities/ovn-trace.c
@@ -2224,6 +2224,9 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,

         case OVNACT_HANDLE_SVC_CHECK:
             break;
+
+        case OVNACT_FWD_GROUP:
+            break;

It would be nice if you could add ovn-trace support for this if its possible.
A follow up patch would do too. Otherwise this would become a technical debt  :).

Sure, I will add it in separate patch.

         }
     }
     ds_destroy(&s);
--
1.8.3.1
Numan Siddique Jan. 20, 2020, 3:34 p.m. UTC | #3
On Mon, Jan 20, 2020 at 8:16 PM Manoj Sharma <manoj.sharma@nutanix.com> wrote:
>
> Hi Numan,
>
> Please see my answers inline.
>
> From: Numan Siddique <numans@ovn.org>
> Date: Thursday, January 16, 2020 at 7:53 AM
> To: Manoj Sharma <manoj.sharma@nutanix.com>
> Cc: "ovs-dev@openvswitch.org" <ovs-dev@openvswitch.org>
> Subject: Re: [ovs-dev] [PATCH v2 ovn 2/3] Forwarding group to load balance l2 traffic with liveness detection
>
>
>
> On Sat, Jan 11, 2020 at 6:11 AM Manoj Sharma <manoj.sharma@nutanix.com<mailto:manoj.sharma@nutanix.com>> wrote:
> Add forwarding group support for a logical switch. It will add a new OVN
> action "fwd_group" with a virtual IP and virtual MAC. Any number of logical
> switch ports of this switch can be added to this forwarding group.
> If traffic has to be load balanced across these logical switch ports then
> traffic should be sent to forwarding group's virtual IP.
>
> If the logical switch ports correspond to tunnel interfaces and BFD
> can be enabled on these tunnel interfaces, then liveness can be enabled
> for the forwarding group so that if any of the tunnel is down then
> the corresponding logical switch port will not be selected during
> load balancing.
>
> Signed-off-by: Manoj Sharma <manoj.sharma@nutanix.com<mailto:manoj.sharma@nutanix.com>>
>
> Hi Manoj,
>
> As I mentioned in the earlier email, please add the detailed description in patch 0 to this patch.
>
> How about defining the action - fwd_group as - fwd_group(liveliness=true, childports=p1,p2,...) ?
>
> Normally in the OVN action, we use ";" for inner OpenFlow actions, but these are more like arguments
> to the action - fwd_group.
>
> Thank you for the suggestion. I am sending the new changes in v3.
>
> In your setup, you would manually set the child port's chassis yourself ?
> i.e ovn-sbctl lsp-bind <child port> <external router chassis>  right ?
>
> Yes, if the chassis is not running ovn-controller.
>
> You need to add the relevant documentation in ovn-northd.8.xml for the logical flows added
>
> You need to add documentation for the action - fwd_group in ovn-sb.xml.
>
> Also please update the NEWS file about this feature.
>
> Done
>
> Please see below for few comments.
>
> ---
>  controller/lflow.c    |  20 +++++++++
>  controller/physical.c |  13 ++++++
>  controller/physical.h |   4 ++
>  include/ovn/actions.h |  19 +++++++-
>  lib/actions.c         | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  northd/ovn-northd.c   |  63 ++++++++++++++++++++++++++
>  utilities/ovn-trace.c |   3 ++
>  7 files changed, 243 insertions(+), 1 deletion(-)
>
> diff --git a/controller/lflow.c b/controller/lflow.c
> index a689320..49dfa06 100644
> --- a/controller/lflow.c
> +++ b/controller/lflow.c
> @@ -103,6 +103,25 @@ lookup_port_cb(const void *aux_, const char *port_name, unsigned int *portp)
>      return false;
>  }
>
> +/* Given the OVN port name, get its openflow port */
> +static bool
> +tunnel_ofport_cb(const void *aux_, const char *port_name, ofp_port_t *ofport)
> +{
> +    const struct lookup_port_aux *aux = aux_;
> +
> +    const struct sbrec_port_binding *pb
> +        = lport_lookup_by_name(aux->sbrec_port_binding_by_name, port_name);
> +    if (!pb || (pb->datapath != aux->dp) || !pb->chassis) {
> +        return false;
> +    }
> +
> +    if (!get_tunnel_ofport(pb->chassis->name, NULL, ofport)) {
> +        return false;
> +    }
> +
> +    return true;
> +}
> +
>  static bool
>  is_chassis_resident_cb(const void *c_aux_, const char *port_name)
>  {
> @@ -681,6 +700,7 @@ consider_logical_flow(
>      struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
>      struct ovnact_encode_params ep = {
>          .lookup_port = lookup_port_cb,
> +        .tunnel_ofport = tunnel_ofport_cb,
>          .aux = &aux,
>          .is_switch = is_switch(ldp),
>          .group_table = group_table,
> diff --git a/controller/physical.c b/controller/physical.c
> index 500d419..af1d10f 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -1794,3 +1794,16 @@ physical_run(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>
>      simap_destroy(&new_tunnel_to_ofport);
>  }
> +
> +bool
> +get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t *ofport)
> +{
> +    struct chassis_tunnel *tun = NULL;
> +    tun = chassis_tunnel_find(chassis_name, encap_ip);
> +    if (!tun) {
> +        return false;
> +    }
> +
> +    *ofport = tun->ofport;
> +    return true;
> +}
> diff --git a/controller/physical.h b/controller/physical.h
> index c93f6b1..c0e17cd 100644
> --- a/controller/physical.h
> +++ b/controller/physical.h
> @@ -72,4 +72,8 @@ void physical_handle_mc_group_changes(
>          const struct simap *ct_zones,
>          const struct hmap *local_datapaths,
>          struct ovn_desired_flow_table *);
> +bool get_tunnel_ofport(
> +        const char *chassis_name,
> +        char *encap_ip,
> +        ofp_port_t *ofport);
>  #endif /* controller/physical.h */
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index 047a8d7..2d39239 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -89,7 +89,8 @@ struct ovn_extend_table;
>      OVNACT(CHECK_PKT_LARGER,  ovnact_check_pkt_larger) \
>      OVNACT(TRIGGER_EVENT,     ovnact_controller_event) \
>      OVNACT(BIND_VPORT,        ovnact_bind_vport)       \
> -    OVNACT(HANDLE_SVC_CHECK,  ovnact_handle_svc_check)
> +    OVNACT(HANDLE_SVC_CHECK,  ovnact_handle_svc_check) \
> +    OVNACT(FWD_GROUP,         ovnact_fwd_group)
>
>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>  enum OVS_PACKED_ENUM ovnact_type {
> @@ -359,6 +360,15 @@ struct ovnact_handle_svc_check {
>      struct expr_field port;     /* Logical port name. */
>  };
>
> +/* OVNACT_FWD_GROUP. */
> +struct ovnact_fwd_group {
> +    struct ovnact ovnact;
> +    bool liveness;
> +    char **child_ports;       /* Logical ports */
> +    size_t n_child_ports;
> +    uint8_t ltable;           /* Logical table ID of next table. */
> +};
> +
>  /* Internal use by the helpers below. */
>  void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
>  void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
> @@ -620,6 +630,13 @@ struct ovnact_encode_params {
>       * '*portp' and returns true; otherwise, returns false. */
>      bool (*lookup_port)(const void *aux, const char *port_name,
>                          unsigned int *portp);
> +
> +    /* Looks up tunnel port to a chassis by its port name.  If found, stores
> +     * its openflow port number in '*ofport' and returns true;
> +     * otherwise, returns false. */
> +    bool (*tunnel_ofport)(const void *aux, const char *port_name,
> +                          ofp_port_t *ofport);
> +
>      const void *aux;
>
>      /* 'true' if the flow is for a switch. */
> diff --git a/lib/actions.c b/lib/actions.c
> index 051e6c8..73dade9 100644
> --- a/lib/actions.c
> +++ b/lib/actions.c
> @@ -2854,6 +2854,126 @@ ovnact_handle_svc_check_free(struct ovnact_handle_svc_check *sc OVS_UNUSED)
>  {
>  }
>
> +static void
> +parse_fwd_group_action(struct action_context *ctx)
> +{
> +    char *child_port, **child_port_list = NULL;
> +    size_t allocated_ports = 0;
> +    size_t n_child_ports = 0;
> +    bool liveness = false;
> +
> +    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> +        if (lexer_match_id(ctx->lexer, "liveness")) {
> +            liveness = true;
> +            lexer_force_match(ctx->lexer, LEX_T_SEMICOLON);
> +        }
> +        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> +            if (ctx->lexer->token.type != LEX_T_STRING) {
> +                lexer_syntax_error(ctx->lexer,
> +                                   "expecting logical switch port");
> +                return;
>
> I think there is a memory leak here, if it returns in the middle of parsing.
>
> It should get freed at the end of the command. I have not seen any explicit free in case of lexer error.

I mean the memory allocated via x2nrealloc() below for the
child_port_list and which gets assigned to fwd_group->child_ports.

I know that the function ovnact_fwd_group_free() would free this
memory at the end.
If in case parse_fwd_group_action() sets error via
lexer_syntax_error(), I don't think ovnact_fwd_group_free()
is called, since the parsed actions are not added via
ovnact_put_FWD_GROUP() to ovnacts buffer.

Can you please verify once.

Thanks
Numan

>
> +            }
> +            /* Parse child's logical ports */
> +            child_port = xstrdup(ctx->lexer->token.s);
> +            lexer_get(ctx->lexer);
> +            lexer_match(ctx->lexer, LEX_T_COMMA);
> +
> +            if (n_child_ports >= allocated_ports) {
> +                child_port_list = x2nrealloc(child_port_list, &allocated_ports,
> +                                             sizeof *child_port_list);
> +            }
> +            child_port_list[n_child_ports++] = child_port;
> +        }
> +    }
> +
> +    struct ovnact_fwd_group *fwd_group = ovnact_put_FWD_GROUP(ctx->ovnacts);
> +    fwd_group->ltable = ctx->pp->cur_ltable + 1;
> +    fwd_group->liveness = liveness;
> +    fwd_group->child_ports = child_port_list;
> +    fwd_group->n_child_ports = n_child_ports;
> +}
> +
> +static void
> +format_FWD_GROUP(const struct ovnact_fwd_group *fwd_group, struct ds *s)
> +{
> +    ds_put_cstr(s, "fwd_group(");
> +    if (fwd_group->liveness) {
> +        ds_put_cstr(s, "liveness;");
> +    }
> +    if (fwd_group->n_child_ports) {
> +        for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
> +            if (i) {
> +                ds_put_cstr(s, ", ");
> +            }
> +
> +            ds_put_format(s, "%s", fwd_group->child_ports[i]);
> +        }
> +    }
> +    ds_put_cstr(s, ");");
> +}
> +
> +static void
> +encode_FWD_GROUP(const struct ovnact_fwd_group *fwd_group,
> +                 const struct ovnact_encode_params *ep,
> +                 struct ofpbuf *ofpacts)
> +{
> +    if (!fwd_group->n_child_ports) {
> +        /* Nothing to do without child ports */
> +        return;
> +    }
> +
> +    uint32_t reg_index = MFF_LOG_OUTPORT - MFF_REG0;
> +    struct ds ds = DS_EMPTY_INITIALIZER;
> +
> +    ds_put_format(&ds, "type=select,selection_method=dp_hash");
> +
> +    for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
> +        uint32_t  port_tunnel_key;
> +        ofp_port_t ofport;
> +
> +        const char *port_name = fwd_group->child_ports[i];
> +
> +        /* Find the tunnel key of the logical port */
> +        if (!ep->lookup_port(ep->aux, port_name, &port_tunnel_key)) {
> +            return;
> +        }
> +        ds_put_format(&ds, ",bucket=");
> +
> +        if (fwd_group->liveness) {
> +            /* Find the openflow port number of the tunnel port */
> +            if (!ep->tunnel_ofport(ep->aux, port_name, &ofport)) {
> +                return;
> +            }
> +
> +            /* Watch port for failure, used with BFD */
> +            ds_put_format(&ds, "watch_port:%d,", ofport);
> +        }
> +
> +        ds_put_format(&ds, "load=0x%d->NXM_NX_REG%d[0..15]",
> +                      port_tunnel_key, reg_index);
> +        ds_put_format(&ds, ",resubmit(,%d)", ep->output_ptable);
> +    }
> +
> +    uint32_t table_id = 0;
> +    struct ofpact_group *og;
> +    table_id = ovn_extend_table_assign_id(ep->group_table, ds_cstr(&ds),
> +                                          ep->lflow_uuid);
> +    ds_destroy(&ds);
> +    if (table_id == EXT_TABLE_ID_INVALID) {
> +        return;
> +    }
> +
> +    /* Create an action to set the group */
> +    og = ofpact_put_GROUP(ofpacts);
> +    og->group_id = table_id;
> +}
> +
> +static void
> +ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
> +{
> +    free(fwd_group->child_ports);
> +}
> +
>  /* Parses an assignment or exchange or put_dhcp_opts action. */
>  static void
>  parse_set_action(struct action_context *ctx)
> @@ -2973,6 +3093,8 @@ parse_action(struct action_context *ctx)
>          parse_bind_vport(ctx);
>      } else if (lexer_match_id(ctx->lexer, "handle_svc_check")) {
>          parse_handle_svc_check(ctx);
> +    } else if (lexer_match_id(ctx->lexer, "fwd_group")) {
> +        parse_fwd_group_action(ctx);
>      } else {
>          lexer_syntax_error(ctx->lexer, "expecting action");
>      }
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index b6dc809..29bb27a 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -5419,6 +5419,60 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
>  }
>
>  static void
> +build_fwd_group_lflows(struct ovn_datapath *od, struct hmap *lflows)
> +{
> +    struct ds match = DS_EMPTY_INITIALIZER;
> +    struct ds actions = DS_EMPTY_INITIALIZER;
> +
> +    for (int i = 0; i < od->nbs->n_forwarding_groups; ++i) {
> +        const struct nbrec_forwarding_group *fwd_group = NULL;
> +        fwd_group = od->nbs->forwarding_groups[i];
> +        if (!fwd_group || (fwd_group->n_child_port == 0)) {
> +            continue;
> +        }
> +
> +        /* ARP responder for the forwarding group's virtual IP */
> +        ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
> +                      fwd_group->vip);
> +        ds_put_format(&actions,
> +            "eth.dst = eth.src; "
> +            "eth.src = %s; "
> +            "arp.op = 2; /* ARP reply */ "
> +            "arp.tha = arp.sha; "
> +            "arp.sha = %s; "
> +            "arp.tpa = arp.spa; "
> +            "arp.spa = %s; "
> +            "outport = inport; "
> +            "flags.loopback = 1; "
> +            "output;",
> +            fwd_group->vmac, fwd_group->vmac, fwd_group->vip);
> +
> +        ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 50,
> +                      ds_cstr(&match), ds_cstr(&actions));
> +
> +        /* L2 lookup for the forwarding group's virtual MAC */
> +        ds_clear(&match);
> +        ds_put_format(&match, "eth.dst == %s", fwd_group->vmac);
> +
> +        /* Create a comma separated string of child ports */
> +        struct ds group_ports = DS_EMPTY_INITIALIZER;
> +        if (fwd_group->liveness) {
> +            ds_put_cstr(&group_ports, "liveness;");
> +        }
> +        for (i = 0; i < (fwd_group->n_child_port - 1); ++i) {
> +            ds_put_format(&group_ports, "\"%s\",", fwd_group->child_port[i]);
> +        }
> +        ds_put_format(&group_ports, "\"%s\"",
> +                      fwd_group->child_port[fwd_group->n_child_port - 1]);
> +
> +        ds_clear(&actions);
> +        ds_put_format(&actions, "fwd_group(%s);", group_ports.string);
>
> Can you please use "ds_cstr()" instead of directly accessing the string.
> Done
>
> +        ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 50,
> +                      ds_cstr(&match), ds_cstr(&actions));
> +    }
> +}
> +
> +static void
>  build_lrouter_groups__(struct hmap *ports, struct ovn_datapath *od)
>  {
>      ovs_assert((od && od->nbr && od->lr_group));
> @@ -5718,6 +5772,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
>          build_stateful(od, lflows, lbs);
>      }
>
> +    /* Build logical flows for the forwarding groups */
> +    HMAP_FOR_EACH (od, key_node, datapaths) {
> +        if (!od->nbs) {
> +            continue;
> +        }
> How about
>            if(!ods->nbs || !ods->n_forwarding_groups)  {
>                  continue;
>            }
> Right, done.
>
> +
> +        build_fwd_group_lflows(od, lflows);
> +    }
> +
>      /* Logical switch ingress table 0: Admission control framework (priority
>       * 100). */
>      HMAP_FOR_EACH (od, key_node, datapaths) {
> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
> index 2645438..a40700a 100644
> --- a/utilities/ovn-trace.c
> +++ b/utilities/ovn-trace.c
> @@ -2224,6 +2224,9 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
>
>          case OVNACT_HANDLE_SVC_CHECK:
>              break;
> +
> +        case OVNACT_FWD_GROUP:
> +            break;
>
> It would be nice if you could add ovn-trace support for this if its possible.
> A follow up patch would do too. Otherwise this would become a technical debt  :).
>
> Sure, I will add it in separate patch.
>
>          }
>      }
>      ds_destroy(&s);
> --
> 1.8.3.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org<mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev [mail.openvswitch.org]<https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwMFaQ&c=s883GpUCOChKOHiocYtGcg&r=9SN4tlEcxQzh-CvuC81YSi9I6ERNBOWQbg-05pnXlNw&m=qHMqdUI30B2OQvMDCELNR-4Qrn9KZl-ur1GFQQ7WQCU&s=P7MufuELa-MXYQMnFzNAITSeDY1Qyhmn37vnqoSQ0QE&e=>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

Patch
diff mbox series

diff --git a/controller/lflow.c b/controller/lflow.c
index a689320..49dfa06 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -103,6 +103,25 @@  lookup_port_cb(const void *aux_, const char *port_name, unsigned int *portp)
     return false;
 }
 
+/* Given the OVN port name, get its openflow port */
+static bool
+tunnel_ofport_cb(const void *aux_, const char *port_name, ofp_port_t *ofport)
+{
+    const struct lookup_port_aux *aux = aux_;
+
+    const struct sbrec_port_binding *pb
+        = lport_lookup_by_name(aux->sbrec_port_binding_by_name, port_name);
+    if (!pb || (pb->datapath != aux->dp) || !pb->chassis) {
+        return false;
+    }
+
+    if (!get_tunnel_ofport(pb->chassis->name, NULL, ofport)) {
+        return false;
+    }
+
+    return true;
+}
+
 static bool
 is_chassis_resident_cb(const void *c_aux_, const char *port_name)
 {
@@ -681,6 +700,7 @@  consider_logical_flow(
     struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
     struct ovnact_encode_params ep = {
         .lookup_port = lookup_port_cb,
+        .tunnel_ofport = tunnel_ofport_cb,
         .aux = &aux,
         .is_switch = is_switch(ldp),
         .group_table = group_table,
diff --git a/controller/physical.c b/controller/physical.c
index 500d419..af1d10f 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -1794,3 +1794,16 @@  physical_run(struct ovsdb_idl_index *sbrec_port_binding_by_name,
 
     simap_destroy(&new_tunnel_to_ofport);
 }
+
+bool
+get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t *ofport)
+{
+    struct chassis_tunnel *tun = NULL;
+    tun = chassis_tunnel_find(chassis_name, encap_ip);
+    if (!tun) {
+        return false;
+    }
+
+    *ofport = tun->ofport;
+    return true;
+}
diff --git a/controller/physical.h b/controller/physical.h
index c93f6b1..c0e17cd 100644
--- a/controller/physical.h
+++ b/controller/physical.h
@@ -72,4 +72,8 @@  void physical_handle_mc_group_changes(
         const struct simap *ct_zones,
         const struct hmap *local_datapaths,
         struct ovn_desired_flow_table *);
+bool get_tunnel_ofport(
+        const char *chassis_name,
+        char *encap_ip,
+        ofp_port_t *ofport);
 #endif /* controller/physical.h */
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 047a8d7..2d39239 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -89,7 +89,8 @@  struct ovn_extend_table;
     OVNACT(CHECK_PKT_LARGER,  ovnact_check_pkt_larger) \
     OVNACT(TRIGGER_EVENT,     ovnact_controller_event) \
     OVNACT(BIND_VPORT,        ovnact_bind_vport)       \
-    OVNACT(HANDLE_SVC_CHECK,  ovnact_handle_svc_check)
+    OVNACT(HANDLE_SVC_CHECK,  ovnact_handle_svc_check) \
+    OVNACT(FWD_GROUP,         ovnact_fwd_group)
 
 /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
 enum OVS_PACKED_ENUM ovnact_type {
@@ -359,6 +360,15 @@  struct ovnact_handle_svc_check {
     struct expr_field port;     /* Logical port name. */
 };
 
+/* OVNACT_FWD_GROUP. */
+struct ovnact_fwd_group {
+    struct ovnact ovnact;
+    bool liveness;
+    char **child_ports;       /* Logical ports */
+    size_t n_child_ports;
+    uint8_t ltable;           /* Logical table ID of next table. */
+};
+
 /* Internal use by the helpers below. */
 void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
 void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
@@ -620,6 +630,13 @@  struct ovnact_encode_params {
      * '*portp' and returns true; otherwise, returns false. */
     bool (*lookup_port)(const void *aux, const char *port_name,
                         unsigned int *portp);
+
+    /* Looks up tunnel port to a chassis by its port name.  If found, stores
+     * its openflow port number in '*ofport' and returns true;
+     * otherwise, returns false. */
+    bool (*tunnel_ofport)(const void *aux, const char *port_name,
+                          ofp_port_t *ofport);
+
     const void *aux;
 
     /* 'true' if the flow is for a switch. */
diff --git a/lib/actions.c b/lib/actions.c
index 051e6c8..73dade9 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -2854,6 +2854,126 @@  ovnact_handle_svc_check_free(struct ovnact_handle_svc_check *sc OVS_UNUSED)
 {
 }
 
+static void
+parse_fwd_group_action(struct action_context *ctx)
+{
+    char *child_port, **child_port_list = NULL;
+    size_t allocated_ports = 0;
+    size_t n_child_ports = 0;
+    bool liveness = false;
+
+    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
+        if (lexer_match_id(ctx->lexer, "liveness")) {
+            liveness = true;
+            lexer_force_match(ctx->lexer, LEX_T_SEMICOLON);
+        }
+        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
+            if (ctx->lexer->token.type != LEX_T_STRING) {
+                lexer_syntax_error(ctx->lexer,
+                                   "expecting logical switch port");
+                return;
+            }
+            /* Parse child's logical ports */
+            child_port = xstrdup(ctx->lexer->token.s);
+            lexer_get(ctx->lexer);
+            lexer_match(ctx->lexer, LEX_T_COMMA);
+
+            if (n_child_ports >= allocated_ports) {
+                child_port_list = x2nrealloc(child_port_list, &allocated_ports,
+                                             sizeof *child_port_list);
+            }
+            child_port_list[n_child_ports++] = child_port;
+        }
+    }
+
+    struct ovnact_fwd_group *fwd_group = ovnact_put_FWD_GROUP(ctx->ovnacts);
+    fwd_group->ltable = ctx->pp->cur_ltable + 1;
+    fwd_group->liveness = liveness;
+    fwd_group->child_ports = child_port_list;
+    fwd_group->n_child_ports = n_child_ports;
+}
+
+static void
+format_FWD_GROUP(const struct ovnact_fwd_group *fwd_group, struct ds *s)
+{
+    ds_put_cstr(s, "fwd_group(");
+    if (fwd_group->liveness) {
+        ds_put_cstr(s, "liveness;");
+    }
+    if (fwd_group->n_child_ports) {
+        for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
+            if (i) {
+                ds_put_cstr(s, ", ");
+            }
+
+            ds_put_format(s, "%s", fwd_group->child_ports[i]);
+        }
+    }
+    ds_put_cstr(s, ");");
+}
+
+static void
+encode_FWD_GROUP(const struct ovnact_fwd_group *fwd_group,
+                 const struct ovnact_encode_params *ep,
+                 struct ofpbuf *ofpacts)
+{
+    if (!fwd_group->n_child_ports) {
+        /* Nothing to do without child ports */
+        return;
+    }
+
+    uint32_t reg_index = MFF_LOG_OUTPORT - MFF_REG0;
+    struct ds ds = DS_EMPTY_INITIALIZER;
+
+    ds_put_format(&ds, "type=select,selection_method=dp_hash");
+
+    for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
+        uint32_t  port_tunnel_key;
+        ofp_port_t ofport;
+
+        const char *port_name = fwd_group->child_ports[i];
+
+        /* Find the tunnel key of the logical port */
+        if (!ep->lookup_port(ep->aux, port_name, &port_tunnel_key)) {
+            return;
+        }
+        ds_put_format(&ds, ",bucket=");
+
+        if (fwd_group->liveness) {
+            /* Find the openflow port number of the tunnel port */
+            if (!ep->tunnel_ofport(ep->aux, port_name, &ofport)) {
+                return;
+            }
+
+            /* Watch port for failure, used with BFD */
+            ds_put_format(&ds, "watch_port:%d,", ofport);
+        }
+
+        ds_put_format(&ds, "load=0x%d->NXM_NX_REG%d[0..15]",
+                      port_tunnel_key, reg_index);
+        ds_put_format(&ds, ",resubmit(,%d)", ep->output_ptable);
+    }
+
+    uint32_t table_id = 0;
+    struct ofpact_group *og;
+    table_id = ovn_extend_table_assign_id(ep->group_table, ds_cstr(&ds),
+                                          ep->lflow_uuid);
+    ds_destroy(&ds);
+    if (table_id == EXT_TABLE_ID_INVALID) {
+        return;
+    }
+
+    /* Create an action to set the group */
+    og = ofpact_put_GROUP(ofpacts);
+    og->group_id = table_id;
+}
+
+static void
+ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
+{
+    free(fwd_group->child_ports);
+}
+
 /* Parses an assignment or exchange or put_dhcp_opts action. */
 static void
 parse_set_action(struct action_context *ctx)
@@ -2973,6 +3093,8 @@  parse_action(struct action_context *ctx)
         parse_bind_vport(ctx);
     } else if (lexer_match_id(ctx->lexer, "handle_svc_check")) {
         parse_handle_svc_check(ctx);
+    } else if (lexer_match_id(ctx->lexer, "fwd_group")) {
+        parse_fwd_group_action(ctx);
     } else {
         lexer_syntax_error(ctx->lexer, "expecting action");
     }
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index b6dc809..29bb27a 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -5419,6 +5419,60 @@  build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
 }
 
 static void
+build_fwd_group_lflows(struct ovn_datapath *od, struct hmap *lflows)
+{
+    struct ds match = DS_EMPTY_INITIALIZER;
+    struct ds actions = DS_EMPTY_INITIALIZER;
+
+    for (int i = 0; i < od->nbs->n_forwarding_groups; ++i) {
+        const struct nbrec_forwarding_group *fwd_group = NULL;
+        fwd_group = od->nbs->forwarding_groups[i];
+        if (!fwd_group || (fwd_group->n_child_port == 0)) {
+            continue;
+        }
+
+        /* ARP responder for the forwarding group's virtual IP */
+        ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
+                      fwd_group->vip);
+        ds_put_format(&actions,
+            "eth.dst = eth.src; "
+            "eth.src = %s; "
+            "arp.op = 2; /* ARP reply */ "
+            "arp.tha = arp.sha; "
+            "arp.sha = %s; "
+            "arp.tpa = arp.spa; "
+            "arp.spa = %s; "
+            "outport = inport; "
+            "flags.loopback = 1; "
+            "output;",
+            fwd_group->vmac, fwd_group->vmac, fwd_group->vip);
+
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 50,
+                      ds_cstr(&match), ds_cstr(&actions));
+
+        /* L2 lookup for the forwarding group's virtual MAC */
+        ds_clear(&match);
+        ds_put_format(&match, "eth.dst == %s", fwd_group->vmac);
+
+        /* Create a comma separated string of child ports */
+        struct ds group_ports = DS_EMPTY_INITIALIZER;
+        if (fwd_group->liveness) {
+            ds_put_cstr(&group_ports, "liveness;");
+        }
+        for (i = 0; i < (fwd_group->n_child_port - 1); ++i) {
+            ds_put_format(&group_ports, "\"%s\",", fwd_group->child_port[i]);
+        }
+        ds_put_format(&group_ports, "\"%s\"",
+                      fwd_group->child_port[fwd_group->n_child_port - 1]);
+
+        ds_clear(&actions);
+        ds_put_format(&actions, "fwd_group(%s);", group_ports.string);
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 50,
+                      ds_cstr(&match), ds_cstr(&actions));
+    }
+}
+
+static void
 build_lrouter_groups__(struct hmap *ports, struct ovn_datapath *od)
 {
     ovs_assert((od && od->nbr && od->lr_group));
@@ -5718,6 +5772,15 @@  build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
         build_stateful(od, lflows, lbs);
     }
 
+    /* Build logical flows for the forwarding groups */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbs) {
+            continue;
+        }
+
+        build_fwd_group_lflows(od, lflows);
+    }
+
     /* Logical switch ingress table 0: Admission control framework (priority
      * 100). */
     HMAP_FOR_EACH (od, key_node, datapaths) {
diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
index 2645438..a40700a 100644
--- a/utilities/ovn-trace.c
+++ b/utilities/ovn-trace.c
@@ -2224,6 +2224,9 @@  trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
 
         case OVNACT_HANDLE_SVC_CHECK:
             break;
+
+        case OVNACT_FWD_GROUP:
+            break;
         }
     }
     ds_destroy(&s);