diff mbox

[ovs-dev,6/7] ovn-controller: Add support for load balancing.

Message ID 1467188231-10194-7-git-send-email-guru@ovn.org
State Accepted
Headers show

Commit Message

Gurucharan Shetty June 29, 2016, 8:17 a.m. UTC
ovn-controller now supports 2 new logical actions.

1. ct_lb;
Sends the packet through the conntrack zone to NAT
packets. Packets that are part of established connection
will automatically get NATed based on the NAT arguments
supplied to conntrack when the first packet was committed.

2. ct_lb(192.168.1.2, 192.168.1.3);
   ct_lb(192.168.1.2:80, 192.168.1.3:80);
Creates an OpenFlow group with multiple buckets and equal weights
that changes the destination IP address (and port number) of the packet
statefully to one of the options provided inside the parenthesis.

Signed-off-by: Gurucharan Shetty <guru@ovn.org>
---
 ovn/controller/lflow.c          |   5 +-
 ovn/controller/lflow.h          |   2 +
 ovn/controller/ofctrl.c         | 160 +++++++++++++++++++++++++++++++++++++++-
 ovn/controller/ofctrl.h         |   3 +-
 ovn/controller/ovn-controller.c |  26 ++++++-
 ovn/lib/actions.c               | 135 +++++++++++++++++++++++++++++++++
 ovn/lib/actions.h               |  20 +++++
 ovn/lib/lex.c                   |  13 ++++
 ovn/lib/lex.h                   |   1 +
 ovn/ovn-sb.xml                  |  24 ++++++
 tests/ovn.at                    |   8 ++
 tests/test-ovn.c                |   8 ++
 12 files changed, 400 insertions(+), 5 deletions(-)

Comments

Gurucharan Shetty July 3, 2016, 11 p.m. UTC | #1
On 3 July 2016 at 10:23, Ben Pfaff <blp@ovn.org> wrote:

> On Wed, Jun 29, 2016 at 01:17:10AM -0700, Gurucharan Shetty wrote:
> > ovn-controller now supports 2 new logical actions.
> >
> > 1. ct_lb;
> > Sends the packet through the conntrack zone to NAT
> > packets. Packets that are part of established connection
> > will automatically get NATed based on the NAT arguments
> > supplied to conntrack when the first packet was committed.
> >
> > 2. ct_lb(192.168.1.2, 192.168.1.3);
> >    ct_lb(192.168.1.2:80, 192.168.1.3:80);
> > Creates an OpenFlow group with multiple buckets and equal weights
> > that changes the destination IP address (and port number) of the packet
> > statefully to one of the options provided inside the parenthesis.
> >
> > Signed-off-by: Gurucharan Shetty <guru@ovn.org>
>
> Thanks for implementing this!
>
> It looks to me like the VLOG_ERR calls in ofctrl_put() could get to be
> very repetitive if there is a bug that produces bad group strings.  I'd
> rate-limit them.
>
I did that.


>
> I don't think that the 'group' member of struct group_info needs to be a
> pointer.
>
> I'm still bothered by putting the ct_lb() argument into a quoted
> string.  It doesn't seem right to me.
>
> I'm appending an incremental, followed by a full patch, that implements
> the changes I suggest except for the rate-limiting.  Either way, it
> requires the patch that I just sent out to be applied first, this one
> here:
>         https://patchwork.ozlabs.org/patch/643797/


I looked at the above patch, sanity tested it with your provided
incremental for this patch to see that groups are getting created properly
in br-int. (There were a few merge issues with Ryan's incremental
processing patches, but that looked trivial).

Thank you for providing the below incremental and making this better. I
applied it with couple of changes
1. Removed a unnecessary free from ovn_group_table_clear()
2. Removed lexer_get_string() from lex.c as it was not needed any more.

And applied this.


>
>
> If you're happy with my suggestions, then:
> Acked-by: Ben Pfaff <blp@ovn.org>
>
> --8<--------------------------cut here-------------------------->8--
>
> diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
> index b8175d3..6335f88 100644
> --- a/ovn/controller/ofctrl.c
> +++ b/ovn/controller/ofctrl.c
> @@ -1,4 +1,4 @@
> -/* Copyright (c) 2015 Nicira, Inc.
> +/* Copyright (c) 2015, 2016 Nicira, Inc.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -648,8 +648,8 @@ ovn_group_table_clear(struct group_table *group_table,
> bool existing)
>      HMAP_FOR_EACH_SAFE (g, next, hmap_node, target_group) {
>          hmap_remove(target_group, &g->hmap_node);
>          bitmap_set0(group_table->group_ids, g->group_id);
> -        ds_destroy(g->group);
> -        free(g->group);
> +        ds_destroy(&g->group);
> +        free(&g->group);
>          free(g);
>      }
>  }
> @@ -705,7 +705,7 @@ ofctrl_put(struct hmap *flow_table, struct group_table
> *group_table)
>              char *error;
>              struct ds group_string = DS_EMPTY_INITIALIZER;
>              ds_put_format(&group_string, "group_id=%u,%s",
> -                          desired->group_id, ds_cstr(desired->group));
> +                          desired->group_id, ds_cstr(&desired->group));
>
>              error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD,
>                                              ds_cstr(&group_string),
> @@ -818,8 +818,7 @@ ofctrl_put(struct hmap *flow_table, struct group_table
> *group_table)
>
>              /* Remove 'installed' from 'group_table->existing_groups' */
>              hmap_remove(&group_table->existing_groups,
> &installed->hmap_node);
> -            ds_destroy(installed->group);
> -            free(installed->group);
> +            ds_destroy(&installed->group);
>
>              /* Dealloc group_id. */
>              bitmap_set0(group_table->group_ids, installed->group_id);
> @@ -835,8 +834,7 @@ ofctrl_put(struct hmap *flow_table, struct group_table
> *group_table)
>              hmap_insert(&group_table->existing_groups,
> &desired->hmap_node,
>                          desired->hmap_node.hash);
>          } else {
> -            ds_destroy(desired->group);
> -            free(desired->group);
> +            ds_destroy(&desired->group);
>              free(desired);
>          }
>      }
> diff --git a/ovn/controller/ovn-controller.c
> b/ovn/controller/ovn-controller.c
> index 2e397f3..fbaf630 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -544,8 +544,7 @@ main(int argc, char *argv[])
>      HMAP_FOR_EACH_SAFE(installed, next_group, hmap_node,
>                         &group_table.existing_groups) {
>          hmap_remove(&group_table.existing_groups, &installed->hmap_node);
> -        ds_destroy(installed->group);
> -        free(installed->group);
> +        ds_destroy(&installed->group);
>          free(installed);
>      }
>      hmap_destroy(&group_table.existing_groups);
> diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
> index 81d809f..0d18adc 100644
> --- a/ovn/lib/actions.c
> +++ b/ovn/lib/actions.c
> @@ -626,6 +626,21 @@ parse_put_dhcp_opts_action(struct action_context *ctx,
>      finish_controller_op(ctx->ofpacts, oc_offset);
>  }
>
> +static bool
> +action_parse_port(struct action_context *ctx, uint16_t *port)
> +{
> +    if (lexer_is_int(ctx->lexer)) {
> +        int value = ntohll(ctx->lexer->token.value.integer);
> +        if (value <= UINT16_MAX) {
> +            *port = value;
> +            lexer_get(ctx->lexer);
> +            return true;
> +        }
> +    }
> +    action_syntax_error(ctx, "expecting port number");
> +    return false;
> +}
> +
>  static void
>  parse_ct_lb_action(struct action_context *ctx)
>  {
> @@ -638,7 +653,7 @@ parse_ct_lb_action(struct action_context *ctx)
>      }
>
>      if (!lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> -        /* ct_lb without parenthesis means that this is an established
> +        /* ct_lb without parentheses means that this is an established
>           * connection and we just need to do a NAT. */
>          const size_t ct_offset = ctx->ofpacts->size;
>          ofpbuf_pull(ctx->ofpacts, ct_offset);
> @@ -669,47 +684,51 @@ parse_ct_lb_action(struct action_context *ctx)
>          return;
>      }
>
> -    char *ips, *ip, *save = NULL;
>      uint32_t group_id = 0, bucket_id = 0, hash;
>      struct group_info *group_info;
>      struct ofpact_group *og;
> -    struct ds *ds;
>
> -    if (!lexer_get_string(ctx->lexer, &ips) || !ips[0]) {
> -        action_error(ctx, "ct_lb has missing ip parameters.");
> -        return;
> -    }
> +    struct ds ds = DS_EMPTY_INITIALIZER;
> +    ds_put_format(&ds, "type=select");
>
> -    ds = xmalloc(sizeof *ds);
> -    ds_init(ds);
> -    ds_put_format(ds, "type=select");
> +    BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0);
> +    BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS);
> +    do {
> +        if (ctx->lexer->token.type != LEX_T_INTEGER
> +            || mf_subvalue_width(&ctx->lexer->token.value) > 32) {
> +            action_syntax_error(ctx, "expecting IPv4 address");
> +            ds_destroy(&ds);
> +            return;
> +        }
> +        ovs_be32 ip = ctx->lexer->token.value.ipv4;
> +        lexer_get(ctx->lexer);
> +
> +        uint16_t port = 0;
> +        if (lexer_match(ctx->lexer, LEX_T_COLON)
> +            && !action_parse_port(ctx, &port)) {
> +            ds_destroy(&ds);
> +            return;
> +        }
>
> -    ip = strtok_r(ips, ",", &save);
> -    ds_put_format(ds, ",bucket=bucket_id=%u,weight:100,actions="
> -
> "ct(nat(dst=%s),commit,table=%d,zone=NXM_NX_REG5[0..15])"
> -                  ,bucket_id, ip, recirc_table);
> -    while ((ip = strtok_r(NULL, ",", &save)) != NULL) {
>          bucket_id++;
> -        ds_put_format(ds, ",bucket=bucket_id=%u,weight:100,actions="
> -                      "ct(nat(dst=%s),commit,table=%d,zone="
> -                      "NXM_NX_REG5[0..15])", bucket_id, ip, recirc_table);
> -    }
> +        ds_put_format(&ds, ",bucket=bucket_id=%u,weight:100,actions="
> +                      "ct(nat(dst="IP_FMT, bucket_id, IP_ARGS(ip));
> +        if (port) {
> +            ds_put_format(&ds, ":%"PRIu16, port);
> +        }
> +        ds_put_format(&ds, "),commit,table=%d,zone=NXM_NX_REG%d[0..15])",
> +                      recirc_table, MFF_LOG_CT_ZONE - MFF_REG0);
>
> -    free(ips);
> -    if (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> -        ds_destroy(ds);
> -        free(ds);
> -        action_syntax_error(ctx, "expecting `)'");
> -        return;
> -    }
> +        lexer_match(ctx->lexer, LEX_T_COMMA);
> +    } while (!lexer_match(ctx->lexer, LEX_T_RPAREN));
>      add_prerequisite(ctx, "ip");
>
> -    hash = hash_string(ds_cstr(ds), 0);
> +    hash = hash_string(ds_cstr(&ds), 0);
>
>      /* Check whether we have non installed but allocated group_id. */
>      HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash,
>                               &ctx->ap->group_table->desired_groups) {
> -        if (!strcmp(ds_cstr(group_info->group), ds_cstr(ds))) {
> +        if (!strcmp(ds_cstr(&group_info->group), ds_cstr(&ds))) {
>              group_id = group_info->group_id;
>              break;
>          }
> @@ -720,7 +739,7 @@ parse_ct_lb_action(struct action_context *ctx)
>           * combination. */
>          HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash,
>                                   &ctx->ap->group_table->existing_groups) {
> -            if (!strcmp(ds_cstr(group_info->group), ds_cstr(ds))) {
> +            if (!strcmp(ds_cstr(&group_info->group), ds_cstr(&ds))) {
>                  group_id = group_info->group_id;
>              }
>          }
> @@ -732,8 +751,7 @@ parse_ct_lb_action(struct action_context *ctx)
>          }
>
>          if (group_id == MAX_OVN_GROUPS + 1) {
> -            ds_destroy(ds);
> -            free(ds);
> +            ds_destroy(&ds);
>              action_error(ctx, "out of group ids.");
>              return;
>          }
> @@ -747,8 +765,7 @@ parse_ct_lb_action(struct action_context *ctx)
>          hmap_insert(&ctx->ap->group_table->desired_groups,
>                      &group_info->hmap_node, group_info->hmap_node.hash);
>      } else {
> -        ds_destroy(ds);
> -        free(ds);
> +        ds_destroy(&ds);
>      }
>
>      /* Create an action to set the group. */
> diff --git a/ovn/lib/actions.h b/ovn/lib/actions.h
> index 7f3f478..4918900 100644
> --- a/ovn/lib/actions.h
> +++ b/ovn/lib/actions.h
> @@ -21,6 +21,7 @@
>  #include <stdint.h>
>  #include "compiler.h"
>  #include "hmap.h"
> +#include "openvswitch/dynamic-string.h"
>  #include "util.h"
>
>  struct expr;
> @@ -41,7 +42,7 @@ struct group_table {
>
>  struct group_info {
>      struct hmap_node hmap_node;
> -    struct ds *group;
> +    struct ds group;
>      uint32_t group_id;
>  };
>
> diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
> index 61b6c25..c5a4834 100644
> --- a/ovn/ovn-sb.xml
> +++ b/ovn/ovn-sb.xml
> @@ -965,7 +965,7 @@
>            <p>
>              <code>ct_dnat(<var>IP</var>)</code> sends the packet through
> the
>              DNAT zone to change the destination IP address of the packet
> to
> -            the one provided inside the parenthesis and commits the
> connection.
> +            the one provided inside the parentheses and commits the
> connection.
>              The packet is then automatically sent to the next tables as if
>              followed by <code>next;</code> action.  The next tables will
> see
>              the changes in the packet caused by the connection tracker.
> @@ -1110,26 +1110,27 @@
>          </dd>
>
>          <dt><code>ct_lb;</code></dt>
> -        <dt><code>ct_lb("<var>ip1[:port1],ip2[:port2]</var>");</code></dt>
> +
> <dt><code>ct_lb(</code><var>ip</var>[<code>:</code><var>port</var>]...<code>);</code></dt>
>          <dd>
>            <p>
> -            <code>ct_lb("<var>ip1[:port1],ip2[:port2]</var>,..")</code>
> -            commits the packet to the connection tracking table and DNATs
> the
> -            packet's destination IP address (and port) to any one of the
> -            provided IP address (and port) inside the parenthesis.  Each
> of
> -            the provided IP address is given equal weight while picking
> the
> -            DNAT address.  The processing automatically moves on to the
> next
> -            table and the next tables will see the changes in the packet
> caused
> -            by the connection tracker.  The connection tracking state is
> scoped
> -            by the logical port, so overlapping addresses may be used.
> -          </p>
> -          <p>
> -            <code>ct_lb</code> sends the packet to the connection tracking
> -            table to NAT the packets.  If the packet is part of an
> established
> -            connection that was previously committed to the connection
> tracker
> -            via
> <code>ct_lb("<var>ip1[:port1],ip2[:port2]</var>,..")</code>
> -            action, it will automatically get DNATed to the same IP
> address
> -            as the first packet in that connection.
> +            With one or more arguments, <code>ct_lb</code> commits the
> packet
> +            to the connection tracking table and DNATs the packet's
> destination
> +            IP address (and port) to the IP address or addresses (and
> optional
> +            ports) specified in the string.  If multiple comma-separated
> IP
> +            addresses are specified, each is given equal weight for
> picking the
> +            DNAT address.  Processing automatically moves on to the next
> table,
> +            as if <code>next;</code> were specified, and later tables act
> on
> +            the packet as modified by the connection tracker.  Connection
> +            tracking state is scoped by the logical port, so overlapping
> +            addresses may be used.
> +          </p>
> +          <p>
> +            Without arguments, <code>ct_lb</code> sends the packet to the
> +            connection tracking table to NAT the packets.  If the packet
> is
> +            part of an established connection that was previously
> committed to
> +            the connection tracker via
> <code>ct_lb(</code>...<code>)</code>, it
> +            will automatically get DNATed to the same IP address as the
> first
> +            packet in that connection.
>            </p>
>          </dd>
>        </dl>
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 8728b45..7a25402 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -506,11 +506,12 @@ ip.ttl => Syntax error at end of input expecting
> `--'.
>
>  # load balancing.
>  ct_lb; => actions=ct(table=27,zone=NXM_NX_REG5[0..15],nat), prereqs=ip
> -ct_lb(); => ct_lb has missing ip parameters.
> -ct_lb(""); => ct_lb has missing ip parameters.
> -ct_lb("192.168.1.2:80, 192.168.1.3:80"); => actions=group:1, prereqs=ip
> -ct_lb("192.168.1.2, 192.168.1.3"); => actions=group:2, prereqs=ip
> -ct_lb("foo"); => actions=group:3, prereqs=ip
> +ct_lb(); => Syntax error at `)' expecting IPv4 address.
> +ct_lb(192.168.1.2:80, 192.168.1.3:80); => actions=group:1, prereqs=ip
> +ct_lb(192.168.1.2, 192.168.1.3, ); => actions=group:2, prereqs=ip
> +ct_lb(192.168.1.2:); => Syntax error at `)' expecting port number.
> +ct_lb(192.168.1.2:123456); => Syntax error at `123456' expecting port
> number.
> +ct_lb(foo); => Syntax error at `foo' expecting IPv4 address.
>
>  # conntrack
>  ct_next; => actions=ct(table=27,zone=NXM_NX_REG5[0..15]), prereqs=ip
>
> --8<--------------------------cut here-------------------------->8--
>
> Author: Gurucharan Shetty <guru@ovn.org>
> Date:   Wed Jun 29 01:17:10 2016 -0700
>
>     ovn-controller: Add support for load balancing.
>
>     ovn-controller now supports 2 new logical actions.
>
>     1. ct_lb;
>     Sends the packet through the conntrack zone to NAT
>     packets. Packets that are part of established connection
>     will automatically get NATed based on the NAT arguments
>     supplied to conntrack when the first packet was committed.
>
>     2. ct_lb(192.168.1.2, 192.168.1.3);
>        ct_lb(192.168.1.2:80, 192.168.1.3:80);
>     Creates an OpenFlow group with multiple buckets and equal weights
>     that changes the destination IP address (and port number) of the packet
>     statefully to one of the options provided inside the parenthesis.
>
>     Signed-off-by: Gurucharan Shetty <guru@ovn.org>
>     Signed-off-by: Ben Pfaff <blp@ovn.org>
>
> diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
> index 52e6131..837e5c5 100644
> --- a/ovn/controller/lflow.c
> +++ b/ovn/controller/lflow.c
> @@ -200,6 +200,7 @@ add_logical_flows(struct controller_ctx *ctx, const
> struct lport_index *lports,
>                    const struct mcgroup_index *mcgroups,
>                    const struct hmap *local_datapaths,
>                    const struct hmap *patched_datapaths,
> +                  struct group_table *group_table,
>                    const struct simap *ct_zones, struct hmap *flow_table)
>  {
>      uint32_t conj_id_ofs = 1;
> @@ -286,6 +287,7 @@ add_logical_flows(struct controller_ctx *ctx, const
> struct lport_index *lports,
>              .lookup_port = lookup_port_cb,
>              .aux = &aux,
>              .ct_zones = ct_zones,
> +            .group_table = group_table,
>
>              .n_tables = LOG_PIPELINE_LEN,
>              .first_ptable = first_ptable,
> @@ -437,10 +439,11 @@ lflow_run(struct controller_ctx *ctx, const struct
> lport_index *lports,
>            const struct mcgroup_index *mcgroups,
>            const struct hmap *local_datapaths,
>            const struct hmap *patched_datapaths,
> +          struct group_table *group_table,
>            const struct simap *ct_zones, struct hmap *flow_table)
>  {
>      add_logical_flows(ctx, lports, mcgroups, local_datapaths,
> -                      patched_datapaths, ct_zones, flow_table);
> +                      patched_datapaths, group_table, ct_zones,
> flow_table);
>      add_neighbor_flows(ctx, lports, flow_table);
>  }
>
> diff --git a/ovn/controller/lflow.h b/ovn/controller/lflow.h
> index a3fc50c..e96a24b 100644
> --- a/ovn/controller/lflow.h
> +++ b/ovn/controller/lflow.h
> @@ -36,6 +36,7 @@
>  #include <stdint.h>
>
>  struct controller_ctx;
> +struct group_table;
>  struct hmap;
>  struct lport_index;
>  struct mcgroup_index;
> @@ -63,6 +64,7 @@ void lflow_run(struct controller_ctx *, const struct
> lport_index *,
>                 const struct mcgroup_index *,
>                 const struct hmap *local_datapaths,
>                 const struct hmap *patched_datapaths,
> +               struct group_table *group_table,
>                 const struct simap *ct_zones,
>                 struct hmap *flow_table);
>  void lflow_destroy(void);
> diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
> index f537bc0..6335f88 100644
> --- a/ovn/controller/ofctrl.c
> +++ b/ovn/controller/ofctrl.c
> @@ -1,4 +1,4 @@
> -/* Copyright (c) 2015 Nicira, Inc.
> +/* Copyright (c) 2015, 2016 Nicira, Inc.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -14,6 +14,7 @@
>   */
>
>  #include <config.h>
> +#include "bitmap.h"
>  #include "byte-order.h"
>  #include "dirs.h"
>  #include "hash.h"
> @@ -24,11 +25,13 @@
>  #include "openvswitch/match.h"
>  #include "openvswitch/ofp-actions.h"
>  #include "openvswitch/ofp-msgs.h"
> +#include "openvswitch/ofp-parse.h"
>  #include "openvswitch/ofp-print.h"
>  #include "openvswitch/ofp-util.h"
>  #include "openvswitch/ofpbuf.h"
>  #include "openvswitch/vlog.h"
>  #include "ovn-controller.h"
> +#include "ovn/lib/actions.h"
>  #include "physical.h"
>  #include "rconn.h"
>  #include "socket-util.h"
> @@ -63,6 +66,8 @@ static void queue_flow_mod(struct ofputil_flow_mod *);
>  /* OpenFlow connection to the switch. */
>  static struct rconn *swconn;
>
> +static void queue_group_mod(struct ofputil_group_mod *);
> +
>  /* Last seen sequence number for 'swconn'.  When this differs from
>   * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
>  static unsigned int seqno;
> @@ -95,6 +100,9 @@ static struct rconn_packet_counter *tx_counter;
>   * installed in the switch. */
>  static struct hmap installed_flows;
>
> +/* A reference to the group_table. */
> +static struct group_table *groups;
> +
>  /* MFF_* field ID for our Geneve option.  In S_TLV_TABLE_MOD_SENT, this is
>   * the option we requested (we don't know whether we obtained it yet).  In
>   * S_CLEAR_FLOWS or S_UPDATE_FLOWS, this is really the option we have. */
> @@ -103,6 +111,9 @@ static enum mf_field_id mff_ovn_geneve;
>  static void ovn_flow_table_clear(struct hmap *flow_table);
>  static void ovn_flow_table_destroy(struct hmap *flow_table);
>
> +static void ovn_group_table_clear(struct group_table *group_table,
> +                                  bool existing);
> +
>  static void ofctrl_recv(const struct ofp_header *, enum ofptype);
>
>  void
> @@ -312,9 +323,23 @@ run_S_CLEAR_FLOWS(void)
>      queue_flow_mod(&fm);
>      VLOG_DBG("clearing all flows");
>
> +    struct ofputil_group_mod gm;
> +    memset(&gm, 0, sizeof gm);
> +    gm.command = OFPGC11_DELETE;
> +    gm.group_id = OFPG_ALL;
> +    gm.command_bucket_id = OFPG15_BUCKET_ALL;
> +    ovs_list_init(&gm.buckets);
> +    queue_group_mod(&gm);
> +    ofputil_bucket_list_destroy(&gm.buckets);
> +
>      /* Clear installed_flows, to match the state of the switch. */
>      ovn_flow_table_clear(&installed_flows);
>
> +    /* Clear existing groups, to match the state of the switch. */
> +    if (groups) {
> +        ovn_group_table_clear(groups, true);
> +    }
> +
>      state = S_UPDATE_FLOWS;
>  }
>
> @@ -591,16 +616,71 @@ queue_flow_mod(struct ofputil_flow_mod *fm)
>      queue_msg(ofputil_encode_flow_mod(fm, OFPUTIL_P_OF13_OXM));
>  }
>
> +
> +/* group_table. */
> +
> +/* Finds and returns a group_info in 'existing_groups' whose key is
> identical
> + * to 'target''s key, or NULL if there is none. */
> +static struct group_info *
> +ovn_group_lookup(struct hmap *exisiting_groups,
> +                 const struct group_info *target)
> +{
> +    struct group_info *e;
> +
> +    HMAP_FOR_EACH_WITH_HASH(e, hmap_node, target->hmap_node.hash,
> +                            exisiting_groups) {
> +        if (e->group_id == target->group_id) {
> +            return e;
> +        }
> +   }
> +    return NULL;
> +}
> +
> +/* Clear either desired_groups or existing_groups in group_table. */
> +static void
> +ovn_group_table_clear(struct group_table *group_table, bool existing)
> +{
> +    struct group_info *g, *next;
> +    struct hmap *target_group = existing
> +                                ? &group_table->existing_groups
> +                                : &group_table->desired_groups;
> +
> +    HMAP_FOR_EACH_SAFE (g, next, hmap_node, target_group) {
> +        hmap_remove(target_group, &g->hmap_node);
> +        bitmap_set0(group_table->group_ids, g->group_id);
> +        ds_destroy(&g->group);
> +        free(&g->group);
> +        free(g);
> +    }
> +}
> +
> +static void
> +queue_group_mod(struct ofputil_group_mod *gm)
> +{
> +    queue_msg(ofputil_encode_group_mod(OFP13_VERSION, gm));
> +}
> +
> +
>  /* Replaces the flow table on the switch, if possible, by the flows in
>   * 'flow_table', which should have been added with ofctrl_add_flow().
>   * Regardless of whether the flow table is updated, this deletes all of
> the
>   * flows from 'flow_table' and frees them.  (The hmap itself isn't
>   * destroyed.)
>   *
> + * Replaces the group table on the switch, if possible, by the groups in
> + * 'group_table->desired_groups'. Regardless of whether the group table
> + * is updated, this deletes all the groups from the
> + * 'group_table->desired_groups' and frees them. (The hmap itself isn't
> + * destroyed.)
> + *
>   * This called be called be ofctrl_run() within the main loop. */
>  void
> -ofctrl_put(struct hmap *flow_table)
> +ofctrl_put(struct hmap *flow_table, struct group_table *group_table)
>  {
> +    if (!groups) {
> +        groups = group_table;
> +    }
> +
>      /* The flow table can be updated if the connection to the switch is
> up and
>       * in the correct state and not backlogged with existing flow_mods.
> (Our
>       * criteria for being backlogged appear very conservative, but the
> socket
> @@ -610,9 +690,37 @@ ofctrl_put(struct hmap *flow_table)
>      if (state != S_UPDATE_FLOWS
>          || rconn_packet_counter_n_packets(tx_counter)) {
>          ovn_flow_table_clear(flow_table);
> +        ovn_group_table_clear(group_table, false);
>          return;
>      }
>
> +    /* Iterate through all the desired groups. If there are new ones,
> +     * add them to the switch. */
> +    struct group_info *desired;
> +    HMAP_FOR_EACH(desired, hmap_node, &group_table->desired_groups) {
> +        if (!ovn_group_lookup(&group_table->existing_groups, desired)) {
> +            /* Create and install new group. */
> +            struct ofputil_group_mod gm;
> +            enum ofputil_protocol usable_protocols;
> +            char *error;
> +            struct ds group_string = DS_EMPTY_INITIALIZER;
> +            ds_put_format(&group_string, "group_id=%u,%s",
> +                          desired->group_id, ds_cstr(&desired->group));
> +
> +            error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD,
> +                                            ds_cstr(&group_string),
> +                                            &usable_protocols);
> +            if (!error) {
> +                queue_group_mod(&gm);
> +            } else {
> +                VLOG_ERR("new group %s %s", error,
> ds_cstr(&group_string));
> +                free(error);
> +            }
> +            ds_destroy(&group_string);
> +            ofputil_bucket_list_destroy(&gm.buckets);
> +        }
> +    }
> +
>      /* Iterate through all of the installed flows.  If any of them are no
>       * longer desired, delete them; if any of them should have different
>       * actions, update them. */
> @@ -682,4 +790,52 @@ ofctrl_put(struct hmap *flow_table)
>          hmap_remove(flow_table, &d->hmap_node);
>          hmap_insert(&installed_flows, &d->hmap_node, d->hmap_node.hash);
>      }
> +
> +    /* Iterate through the installed groups from previous runs. If they
> +     * are not needed delete them. */
> +    struct group_info *installed, *next_group;
> +    HMAP_FOR_EACH_SAFE(installed, next_group, hmap_node,
> +                       &group_table->existing_groups) {
> +        if (!ovn_group_lookup(&group_table->desired_groups, installed)) {
> +            /* Delete the group. */
> +            struct ofputil_group_mod gm;
> +            enum ofputil_protocol usable_protocols;
> +            char *error;
> +            struct ds group_string = DS_EMPTY_INITIALIZER;
> +            ds_put_format(&group_string, "group_id=%u",
> installed->group_id);
> +
> +            error = parse_ofp_group_mod_str(&gm, OFPGC11_DELETE,
> +                                            ds_cstr(&group_string),
> +                                            &usable_protocols);
> +            if (!error) {
> +                queue_group_mod(&gm);
> +            } else {
> +                VLOG_ERR("%s", error);
> +                free(error);
> +            }
> +            ds_destroy(&group_string);
> +            ofputil_bucket_list_destroy(&gm.buckets);
> +
> +            /* Remove 'installed' from 'group_table->existing_groups' */
> +            hmap_remove(&group_table->existing_groups,
> &installed->hmap_node);
> +            ds_destroy(&installed->group);
> +
> +            /* Dealloc group_id. */
> +            bitmap_set0(group_table->group_ids, installed->group_id);
> +            free(installed);
> +        }
> +    }
> +
> +    /* Move the contents of desired_groups to existing_groups. */
> +    HMAP_FOR_EACH_SAFE(desired, next_group, hmap_node,
> +                       &group_table->desired_groups) {
> +        hmap_remove(&group_table->desired_groups, &desired->hmap_node);
> +        if (!ovn_group_lookup(&group_table->existing_groups, desired)) {
> +            hmap_insert(&group_table->existing_groups,
> &desired->hmap_node,
> +                        desired->hmap_node.hash);
> +        } else {
> +            ds_destroy(&desired->group);
> +            free(desired);
> +        }
> +    }
>  }
> diff --git a/ovn/controller/ofctrl.h b/ovn/controller/ofctrl.h
> index bc9cfba..bf5dfd5 100644
> --- a/ovn/controller/ofctrl.h
> +++ b/ovn/controller/ofctrl.h
> @@ -26,11 +26,12 @@ struct hmap;
>  struct match;
>  struct ofpbuf;
>  struct ovsrec_bridge;
> +struct group_table;
>
>  /* Interface for OVN main loop. */
>  void ofctrl_init(void);
>  enum mf_field_id ofctrl_run(const struct ovsrec_bridge *br_int);
> -void ofctrl_put(struct hmap *flows);
> +void ofctrl_put(struct hmap *flows, struct group_table *group_table);
>  void ofctrl_wait(void);
>  void ofctrl_destroy(void);
>
> diff --git a/ovn/controller/ovn-controller.c
> b/ovn/controller/ovn-controller.c
> index 47e6824..fbaf630 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -39,6 +39,7 @@
>  #include "ofctrl.h"
>  #include "openvswitch/vconn.h"
>  #include "openvswitch/vlog.h"
> +#include "ovn/lib/actions.h"
>  #include "ovn/lib/ovn-sb-idl.h"
>  #include "ovn/lib/ovn-util.h"
>  #include "patch.h"
> @@ -351,6 +352,13 @@ main(int argc, char *argv[])
>      }
>      unixctl_command_register("exit", "", 0, 0, ovn_controller_exit,
> &exiting);
>
> +    /* Initialize group ids for loadbalancing. */
> +    struct group_table group_table;
> +    group_table.group_ids = bitmap_allocate(MAX_OVN_GROUPS);
> +    bitmap_set1(group_table.group_ids, 0); /* Group id 0 is invalid. */
> +    hmap_init(&group_table.desired_groups);
> +    hmap_init(&group_table.existing_groups);
> +
>      daemonize_complete();
>
>      ovsrec_init();
> @@ -462,13 +470,14 @@ main(int argc, char *argv[])
>
>              struct hmap flow_table = HMAP_INITIALIZER(&flow_table);
>              lflow_run(&ctx, &lports, &mcgroups, &local_datapaths,
> -                      &patched_datapaths, &ct_zones, &flow_table);
> +                      &patched_datapaths, &group_table, &ct_zones,
> +                      &flow_table);
>              if (chassis_id) {
>                  physical_run(&ctx, mff_ovn_geneve,
>                               br_int, chassis_id, &ct_zones, &flow_table,
>                               &local_datapaths, &patched_datapaths);
>              }
> -            ofctrl_put(&flow_table);
> +            ofctrl_put(&flow_table, &group_table);
>              hmap_destroy(&flow_table);
>          }
>
> @@ -528,6 +537,18 @@ main(int argc, char *argv[])
>
>      simap_destroy(&ct_zones);
>
> +    bitmap_free(group_table.group_ids);
> +    hmap_destroy(&group_table.desired_groups);
> +
> +    struct group_info *installed, *next_group;
> +    HMAP_FOR_EACH_SAFE(installed, next_group, hmap_node,
> +                       &group_table.existing_groups) {
> +        hmap_remove(&group_table.existing_groups, &installed->hmap_node);
> +        ds_destroy(&installed->group);
> +        free(installed);
> +    }
> +    hmap_destroy(&group_table.existing_groups);
> +
>      ovsdb_idl_loop_destroy(&ovs_idl_loop);
>      ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
>
> diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
> index 569970e..0d18adc 100644
> --- a/ovn/lib/actions.c
> +++ b/ovn/lib/actions.c
> @@ -18,10 +18,13 @@
>  #include <stdarg.h>
>  #include <stdbool.h>
>  #include "actions.h"
> +#include "bitmap.h"
>  #include "byte-order.h"
>  #include "compiler.h"
>  #include "ovn-dhcp.h"
>  #include "expr.h"
> +#include "hash.h"
> +#include "hmap.h"
>  #include "lex.h"
>  #include "logical-fields.h"
>  #include "nx-match.h"
> @@ -623,6 +626,153 @@ parse_put_dhcp_opts_action(struct action_context
> *ctx,
>      finish_controller_op(ctx->ofpacts, oc_offset);
>  }
>
> +static bool
> +action_parse_port(struct action_context *ctx, uint16_t *port)
> +{
> +    if (lexer_is_int(ctx->lexer)) {
> +        int value = ntohll(ctx->lexer->token.value.integer);
> +        if (value <= UINT16_MAX) {
> +            *port = value;
> +            lexer_get(ctx->lexer);
> +            return true;
> +        }
> +    }
> +    action_syntax_error(ctx, "expecting port number");
> +    return false;
> +}
> +
> +static void
> +parse_ct_lb_action(struct action_context *ctx)
> +{
> +    uint8_t recirc_table;
> +    if (ctx->ap->cur_ltable < ctx->ap->n_tables) {
> +        recirc_table = ctx->ap->first_ptable + ctx->ap->cur_ltable + 1;
> +    } else {
> +        action_error(ctx, "\"ct_lb\" action not allowed in last table.");
> +        return;
> +    }
> +
> +    if (!lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> +        /* ct_lb without parentheses means that this is an established
> +         * connection and we just need to do a NAT. */
> +        const size_t ct_offset = ctx->ofpacts->size;
> +        ofpbuf_pull(ctx->ofpacts, ct_offset);
> +
> +        struct ofpact_conntrack *ct = ofpact_put_CT(ctx->ofpacts);
> +        struct ofpact_nat *nat;
> +        size_t nat_offset;
> +        ct->zone_src.field = mf_from_id(MFF_LOG_CT_ZONE);
> +        ct->zone_src.ofs = 0;
> +        ct->zone_src.n_bits = 16;
> +        ct->flags = 0;
> +        ct->recirc_table = recirc_table;
> +        ct->alg = 0;
> +
> +        add_prerequisite(ctx, "ip");
> +
> +        nat_offset = ctx->ofpacts->size;
> +        ofpbuf_pull(ctx->ofpacts, nat_offset);
> +
> +        nat = ofpact_put_NAT(ctx->ofpacts);
> +        nat->flags = 0;
> +        nat->range_af = AF_UNSPEC;
> +
> +        ctx->ofpacts->header = ofpbuf_push_uninit(ctx->ofpacts,
> nat_offset);
> +        ct = ctx->ofpacts->header;
> +        ofpact_finish(ctx->ofpacts, &ct->ofpact);
> +        ofpbuf_push_uninit(ctx->ofpacts, ct_offset);
> +        return;
> +    }
> +
> +    uint32_t group_id = 0, bucket_id = 0, hash;
> +    struct group_info *group_info;
> +    struct ofpact_group *og;
> +
> +    struct ds ds = DS_EMPTY_INITIALIZER;
> +    ds_put_format(&ds, "type=select");
> +
> +    BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0);
> +    BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS);
> +    do {
> +        if (ctx->lexer->token.type != LEX_T_INTEGER
> +            || mf_subvalue_width(&ctx->lexer->token.value) > 32) {
> +            action_syntax_error(ctx, "expecting IPv4 address");
> +            ds_destroy(&ds);
> +            return;
> +        }
> +        ovs_be32 ip = ctx->lexer->token.value.ipv4;
> +        lexer_get(ctx->lexer);
> +
> +        uint16_t port = 0;
> +        if (lexer_match(ctx->lexer, LEX_T_COLON)
> +            && !action_parse_port(ctx, &port)) {
> +            ds_destroy(&ds);
> +            return;
> +        }
> +
> +        bucket_id++;
> +        ds_put_format(&ds, ",bucket=bucket_id=%u,weight:100,actions="
> +                      "ct(nat(dst="IP_FMT, bucket_id, IP_ARGS(ip));
> +        if (port) {
> +            ds_put_format(&ds, ":%"PRIu16, port);
> +        }
> +        ds_put_format(&ds, "),commit,table=%d,zone=NXM_NX_REG%d[0..15])",
> +                      recirc_table, MFF_LOG_CT_ZONE - MFF_REG0);
> +
> +        lexer_match(ctx->lexer, LEX_T_COMMA);
> +    } while (!lexer_match(ctx->lexer, LEX_T_RPAREN));
> +    add_prerequisite(ctx, "ip");
> +
> +    hash = hash_string(ds_cstr(&ds), 0);
> +
> +    /* Check whether we have non installed but allocated group_id. */
> +    HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash,
> +                             &ctx->ap->group_table->desired_groups) {
> +        if (!strcmp(ds_cstr(&group_info->group), ds_cstr(&ds))) {
> +            group_id = group_info->group_id;
> +            break;
> +        }
> +    }
> +
> +    if (!group_id) {
> +        /* Check whether we already have an installed entry for this
> +         * combination. */
> +        HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash,
> +                                 &ctx->ap->group_table->existing_groups) {
> +            if (!strcmp(ds_cstr(&group_info->group), ds_cstr(&ds))) {
> +                group_id = group_info->group_id;
> +            }
> +        }
> +
> +        if (!group_id) {
> +            /* Reserve a new group_id. */
> +            group_id = bitmap_scan(ctx->ap->group_table->group_ids, 0, 1,
> +                                   MAX_OVN_GROUPS + 1);
> +        }
> +
> +        if (group_id == MAX_OVN_GROUPS + 1) {
> +            ds_destroy(&ds);
> +            action_error(ctx, "out of group ids.");
> +            return;
> +        }
> +        bitmap_set1(ctx->ap->group_table->group_ids, group_id);
> +
> +        group_info = xmalloc(sizeof *group_info);
> +        group_info->group = ds;
> +        group_info->group_id = group_id;
> +        group_info->hmap_node.hash = hash;
> +
> +        hmap_insert(&ctx->ap->group_table->desired_groups,
> +                    &group_info->hmap_node, group_info->hmap_node.hash);
> +    } else {
> +        ds_destroy(&ds);
> +    }
> +
> +    /* Create an action to set the group. */
> +    og = ofpact_put_GROUP(ctx->ofpacts);
> +    og->group_id = group_id;
> +}
> +
>  static void
>  emit_ct(struct action_context *ctx, bool recirc_next, bool commit)
>  {
> @@ -762,6 +912,8 @@ parse_action(struct action_context *ctx)
>          parse_ct_nat(ctx, false);
>      } else if (lexer_match_id(ctx->lexer, "ct_snat")) {
>          parse_ct_nat(ctx, true);
> +    } else if (lexer_match_id(ctx->lexer, "ct_lb")) {
> +        parse_ct_lb_action(ctx);
>      } else if (lexer_match_id(ctx->lexer, "arp")) {
>          parse_arp_action(ctx);
>      } else if (lexer_match_id(ctx->lexer, "get_arp")) {
> diff --git a/ovn/lib/actions.h b/ovn/lib/actions.h
> index f49e15e..4918900 100644
> --- a/ovn/lib/actions.h
> +++ b/ovn/lib/actions.h
> @@ -20,6 +20,8 @@
>  #include <stdbool.h>
>  #include <stdint.h>
>  #include "compiler.h"
> +#include "hmap.h"
> +#include "openvswitch/dynamic-string.h"
>  #include "util.h"
>
>  struct expr;
> @@ -28,6 +30,22 @@ struct ofpbuf;
>  struct shash;
>  struct simap;
>
> +#define MAX_OVN_GROUPS 65535
> +
> +struct group_table {
> +    unsigned long *group_ids;  /* Used as a bitmap with value set
> +                                * for allocated group ids in either
> +                                * desired_groups or existing_groups. */
> +    struct hmap desired_groups;
> +    struct hmap existing_groups;
> +};
> +
> +struct group_info {
> +    struct hmap_node hmap_node;
> +    struct ds group;
> +    uint32_t group_id;
> +};
> +
>  enum action_opcode {
>      /* "arp { ...actions... }".
>       *
> @@ -80,6 +98,9 @@ struct action_params {
>      /* A map from a port name to its connection tracking zone. */
>      const struct simap *ct_zones;
>
> +    /* A struct to figure out the group_id for group actions. */
> +    struct group_table *group_table;
> +
>      /* OVN maps each logical flow table (ltable), one-to-one, onto a
> physical
>       * OpenFlow flow table (ptable).  A number of parameters describe this
>       * mapping and data related to flow tables:
> diff --git a/ovn/lib/lex.c b/ovn/lib/lex.c
> index 89da666..bd748d0 100644
> --- a/ovn/lib/lex.c
> +++ b/ovn/lib/lex.c
> @@ -864,3 +864,16 @@ lexer_get_int(struct lexer *lexer, int *value)
>          return false;
>      }
>  }
> +
> +bool
> +lexer_get_string(struct lexer *lexer, char **str)
> +{
> +    if (lexer->token.type == LEX_T_STRING) {
> +        *str = xstrdup(lexer->token.s);
> +        lexer_get(lexer);
> +        return true;
> +    } else {
> +        *str = NULL;
> +        return false;
> +    }
> +}
> diff --git a/ovn/lib/lex.h b/ovn/lib/lex.h
> index 22f2807..7701c93 100644
> --- a/ovn/lib/lex.h
> +++ b/ovn/lib/lex.h
> @@ -122,5 +122,6 @@ bool lexer_match(struct lexer *, enum lex_type);
>  bool lexer_match_id(struct lexer *, const char *id);
>  bool lexer_is_int(const struct lexer *);
>  bool lexer_get_int(struct lexer *, int *value);
> +bool lexer_get_string(struct lexer *, char  **str);
>
>  #endif /* ovn/lex.h */
> diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
> index f330374..c5a4834 100644
> --- a/ovn/ovn-sb.xml
> +++ b/ovn/ovn-sb.xml
> @@ -965,7 +965,7 @@
>            <p>
>              <code>ct_dnat(<var>IP</var>)</code> sends the packet through
> the
>              DNAT zone to change the destination IP address of the packet
> to
> -            the one provided inside the parenthesis and commits the
> connection.
> +            the one provided inside the parentheses and commits the
> connection.
>              The packet is then automatically sent to the next tables as if
>              followed by <code>next;</code> action.  The next tables will
> see
>              the changes in the packet caused by the connection tracker.
> @@ -1108,6 +1108,31 @@
>              </code>
>            </p>
>          </dd>
> +
> +        <dt><code>ct_lb;</code></dt>
> +
> <dt><code>ct_lb(</code><var>ip</var>[<code>:</code><var>port</var>]...<code>);</code></dt>
> +        <dd>
> +          <p>
> +            With one or more arguments, <code>ct_lb</code> commits the
> packet
> +            to the connection tracking table and DNATs the packet's
> destination
> +            IP address (and port) to the IP address or addresses (and
> optional
> +            ports) specified in the string.  If multiple comma-separated
> IP
> +            addresses are specified, each is given equal weight for
> picking the
> +            DNAT address.  Processing automatically moves on to the next
> table,
> +            as if <code>next;</code> were specified, and later tables act
> on
> +            the packet as modified by the connection tracker.  Connection
> +            tracking state is scoped by the logical port, so overlapping
> +            addresses may be used.
> +          </p>
> +          <p>
> +            Without arguments, <code>ct_lb</code> sends the packet to the
> +            connection tracking table to NAT the packets.  If the packet
> is
> +            part of an established connection that was previously
> committed to
> +            the connection tracker via
> <code>ct_lb(</code>...<code>)</code>, it
> +            will automatically get DNATed to the same IP address as the
> first
> +            packet in that connection.
> +          </p>
> +        </dd>
>        </dl>
>
>        <p>
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 89879ce..7a25402 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -504,6 +504,15 @@ reg0[0..7] <-> ip.proto; => Field ip.proto is not
> modifiable.
>  ip.ttl--; => actions=dec_ttl, prereqs=ip
>  ip.ttl => Syntax error at end of input expecting `--'.
>
> +# load balancing.
> +ct_lb; => actions=ct(table=27,zone=NXM_NX_REG5[0..15],nat), prereqs=ip
> +ct_lb(); => Syntax error at `)' expecting IPv4 address.
> +ct_lb(192.168.1.2:80, 192.168.1.3:80); => actions=group:1, prereqs=ip
> +ct_lb(192.168.1.2, 192.168.1.3, ); => actions=group:2, prereqs=ip
> +ct_lb(192.168.1.2:); => Syntax error at `)' expecting port number.
> +ct_lb(192.168.1.2:123456); => Syntax error at `123456' expecting port
> number.
> +ct_lb(foo); => Syntax error at `foo' expecting IPv4 address.
> +
>  # conntrack
>  ct_next; => actions=ct(table=27,zone=NXM_NX_REG5[0..15]), prereqs=ip
>  ct_commit; => actions=ct(commit,zone=NXM_NX_REG5[0..15]), prereqs=ip
> diff --git a/tests/test-ovn.c b/tests/test-ovn.c
> index 18e5aca..1c468d6 100644
> --- a/tests/test-ovn.c
> +++ b/tests/test-ovn.c
> @@ -1257,6 +1257,13 @@ test_parse_actions(struct ovs_cmdl_context *ctx
> OVS_UNUSED)
>      create_symtab(&symtab);
>      create_dhcp_opts(&dhcp_opts);
>
> +    /* Initialize group ids. */
> +    struct group_table group_table;
> +    group_table.group_ids = bitmap_allocate(MAX_OVN_GROUPS);
> +    bitmap_set1(group_table.group_ids, 0); /* Group id 0 is invalid. */
> +    hmap_init(&group_table.desired_groups);
> +    hmap_init(&group_table.existing_groups);
> +
>      simap_init(&ports);
>      simap_put(&ports, "eth0", 5);
>      simap_put(&ports, "eth1", 6);
> @@ -1277,6 +1284,7 @@ test_parse_actions(struct ovs_cmdl_context *ctx
> OVS_UNUSED)
>              .lookup_port = lookup_port_cb,
>              .aux = &ports,
>              .ct_zones = &ct_zones,
> +            .group_table = &group_table,
>
>              .n_tables = 16,
>              .first_ptable = 16,
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
Ben Pfaff July 3, 2016, 11:47 p.m. UTC | #2
On Sun, Jul 03, 2016 at 04:00:26PM -0700, Guru Shetty wrote:
> On 3 July 2016 at 10:23, Ben Pfaff <blp@ovn.org> wrote:
> 
> > On Wed, Jun 29, 2016 at 01:17:10AM -0700, Gurucharan Shetty wrote:
> > I'm appending an incremental, followed by a full patch, that implements
> > the changes I suggest except for the rate-limiting.  Either way, it
> > requires the patch that I just sent out to be applied first, this one
> > here:
> >         https://patchwork.ozlabs.org/patch/643797/
> 
> 
> I looked at the above patch, sanity tested it with your provided
> incremental for this patch to see that groups are getting created properly
> in br-int. (There were a few merge issues with Ryan's incremental
> processing patches, but that looked trivial).
> 
> Thank you for providing the below incremental and making this better. I
> applied it with couple of changes
> 1. Removed a unnecessary free from ovn_group_table_clear()
> 2. Removed lexer_get_string() from lex.c as it was not needed any more.
> 
> And applied this.

Sweet.  Progress!
diff mbox

Patch

diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
index 52e6131..837e5c5 100644
--- a/ovn/controller/lflow.c
+++ b/ovn/controller/lflow.c
@@ -200,6 +200,7 @@  add_logical_flows(struct controller_ctx *ctx, const struct lport_index *lports,
                   const struct mcgroup_index *mcgroups,
                   const struct hmap *local_datapaths,
                   const struct hmap *patched_datapaths,
+                  struct group_table *group_table,
                   const struct simap *ct_zones, struct hmap *flow_table)
 {
     uint32_t conj_id_ofs = 1;
@@ -286,6 +287,7 @@  add_logical_flows(struct controller_ctx *ctx, const struct lport_index *lports,
             .lookup_port = lookup_port_cb,
             .aux = &aux,
             .ct_zones = ct_zones,
+            .group_table = group_table,
 
             .n_tables = LOG_PIPELINE_LEN,
             .first_ptable = first_ptable,
@@ -437,10 +439,11 @@  lflow_run(struct controller_ctx *ctx, const struct lport_index *lports,
           const struct mcgroup_index *mcgroups,
           const struct hmap *local_datapaths,
           const struct hmap *patched_datapaths,
+          struct group_table *group_table,
           const struct simap *ct_zones, struct hmap *flow_table)
 {
     add_logical_flows(ctx, lports, mcgroups, local_datapaths,
-                      patched_datapaths, ct_zones, flow_table);
+                      patched_datapaths, group_table, ct_zones, flow_table);
     add_neighbor_flows(ctx, lports, flow_table);
 }
 
diff --git a/ovn/controller/lflow.h b/ovn/controller/lflow.h
index a3fc50c..e96a24b 100644
--- a/ovn/controller/lflow.h
+++ b/ovn/controller/lflow.h
@@ -36,6 +36,7 @@ 
 #include <stdint.h>
 
 struct controller_ctx;
+struct group_table;
 struct hmap;
 struct lport_index;
 struct mcgroup_index;
@@ -63,6 +64,7 @@  void lflow_run(struct controller_ctx *, const struct lport_index *,
                const struct mcgroup_index *,
                const struct hmap *local_datapaths,
                const struct hmap *patched_datapaths,
+               struct group_table *group_table,
                const struct simap *ct_zones,
                struct hmap *flow_table);
 void lflow_destroy(void);
diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
index f537bc0..b8175d3 100644
--- a/ovn/controller/ofctrl.c
+++ b/ovn/controller/ofctrl.c
@@ -14,6 +14,7 @@ 
  */
 
 #include <config.h>
+#include "bitmap.h"
 #include "byte-order.h"
 #include "dirs.h"
 #include "hash.h"
@@ -24,11 +25,13 @@ 
 #include "openvswitch/match.h"
 #include "openvswitch/ofp-actions.h"
 #include "openvswitch/ofp-msgs.h"
+#include "openvswitch/ofp-parse.h"
 #include "openvswitch/ofp-print.h"
 #include "openvswitch/ofp-util.h"
 #include "openvswitch/ofpbuf.h"
 #include "openvswitch/vlog.h"
 #include "ovn-controller.h"
+#include "ovn/lib/actions.h"
 #include "physical.h"
 #include "rconn.h"
 #include "socket-util.h"
@@ -63,6 +66,8 @@  static void queue_flow_mod(struct ofputil_flow_mod *);
 /* OpenFlow connection to the switch. */
 static struct rconn *swconn;
 
+static void queue_group_mod(struct ofputil_group_mod *);
+
 /* Last seen sequence number for 'swconn'.  When this differs from
  * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
 static unsigned int seqno;
@@ -95,6 +100,9 @@  static struct rconn_packet_counter *tx_counter;
  * installed in the switch. */
 static struct hmap installed_flows;
 
+/* A reference to the group_table. */
+static struct group_table *groups;
+
 /* MFF_* field ID for our Geneve option.  In S_TLV_TABLE_MOD_SENT, this is
  * the option we requested (we don't know whether we obtained it yet).  In
  * S_CLEAR_FLOWS or S_UPDATE_FLOWS, this is really the option we have. */
@@ -103,6 +111,9 @@  static enum mf_field_id mff_ovn_geneve;
 static void ovn_flow_table_clear(struct hmap *flow_table);
 static void ovn_flow_table_destroy(struct hmap *flow_table);
 
+static void ovn_group_table_clear(struct group_table *group_table,
+                                  bool existing);
+
 static void ofctrl_recv(const struct ofp_header *, enum ofptype);
 
 void
@@ -312,9 +323,23 @@  run_S_CLEAR_FLOWS(void)
     queue_flow_mod(&fm);
     VLOG_DBG("clearing all flows");
 
+    struct ofputil_group_mod gm;
+    memset(&gm, 0, sizeof gm);
+    gm.command = OFPGC11_DELETE;
+    gm.group_id = OFPG_ALL;
+    gm.command_bucket_id = OFPG15_BUCKET_ALL;
+    ovs_list_init(&gm.buckets);
+    queue_group_mod(&gm);
+    ofputil_bucket_list_destroy(&gm.buckets);
+
     /* Clear installed_flows, to match the state of the switch. */
     ovn_flow_table_clear(&installed_flows);
 
+    /* Clear existing groups, to match the state of the switch. */
+    if (groups) {
+        ovn_group_table_clear(groups, true);
+    }
+
     state = S_UPDATE_FLOWS;
 }
 
@@ -591,16 +616,71 @@  queue_flow_mod(struct ofputil_flow_mod *fm)
     queue_msg(ofputil_encode_flow_mod(fm, OFPUTIL_P_OF13_OXM));
 }
 
+
+/* group_table. */
+
+/* Finds and returns a group_info in 'existing_groups' whose key is identical
+ * to 'target''s key, or NULL if there is none. */
+static struct group_info *
+ovn_group_lookup(struct hmap *exisiting_groups,
+                 const struct group_info *target)
+{
+    struct group_info *e;
+
+    HMAP_FOR_EACH_WITH_HASH(e, hmap_node, target->hmap_node.hash,
+                            exisiting_groups) {
+        if (e->group_id == target->group_id) {
+            return e;
+        }
+   }
+    return NULL;
+}
+
+/* Clear either desired_groups or existing_groups in group_table. */
+static void
+ovn_group_table_clear(struct group_table *group_table, bool existing)
+{
+    struct group_info *g, *next;
+    struct hmap *target_group = existing
+                                ? &group_table->existing_groups
+                                : &group_table->desired_groups;
+
+    HMAP_FOR_EACH_SAFE (g, next, hmap_node, target_group) {
+        hmap_remove(target_group, &g->hmap_node);
+        bitmap_set0(group_table->group_ids, g->group_id);
+        ds_destroy(g->group);
+        free(g->group);
+        free(g);
+    }
+}
+
+static void
+queue_group_mod(struct ofputil_group_mod *gm)
+{
+    queue_msg(ofputil_encode_group_mod(OFP13_VERSION, gm));
+}
+
+
 /* Replaces the flow table on the switch, if possible, by the flows in
  * 'flow_table', which should have been added with ofctrl_add_flow().
  * Regardless of whether the flow table is updated, this deletes all of the
  * flows from 'flow_table' and frees them.  (The hmap itself isn't
  * destroyed.)
  *
+ * Replaces the group table on the switch, if possible, by the groups in
+ * 'group_table->desired_groups'. Regardless of whether the group table
+ * is updated, this deletes all the groups from the
+ * 'group_table->desired_groups' and frees them. (The hmap itself isn't
+ * destroyed.)
+ *
  * This called be called be ofctrl_run() within the main loop. */
 void
-ofctrl_put(struct hmap *flow_table)
+ofctrl_put(struct hmap *flow_table, struct group_table *group_table)
 {
+    if (!groups) {
+        groups = group_table;
+    }
+
     /* The flow table can be updated if the connection to the switch is up and
      * in the correct state and not backlogged with existing flow_mods.  (Our
      * criteria for being backlogged appear very conservative, but the socket
@@ -610,9 +690,37 @@  ofctrl_put(struct hmap *flow_table)
     if (state != S_UPDATE_FLOWS
         || rconn_packet_counter_n_packets(tx_counter)) {
         ovn_flow_table_clear(flow_table);
+        ovn_group_table_clear(group_table, false);
         return;
     }
 
+    /* Iterate through all the desired groups. If there are new ones,
+     * add them to the switch. */
+    struct group_info *desired;
+    HMAP_FOR_EACH(desired, hmap_node, &group_table->desired_groups) {
+        if (!ovn_group_lookup(&group_table->existing_groups, desired)) {
+            /* Create and install new group. */
+            struct ofputil_group_mod gm;
+            enum ofputil_protocol usable_protocols;
+            char *error;
+            struct ds group_string = DS_EMPTY_INITIALIZER;
+            ds_put_format(&group_string, "group_id=%u,%s",
+                          desired->group_id, ds_cstr(desired->group));
+
+            error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD,
+                                            ds_cstr(&group_string),
+                                            &usable_protocols);
+            if (!error) {
+                queue_group_mod(&gm);
+            } else {
+                VLOG_ERR("new group %s %s", error, ds_cstr(&group_string));
+                free(error);
+            }
+            ds_destroy(&group_string);
+            ofputil_bucket_list_destroy(&gm.buckets);
+        }
+    }
+
     /* Iterate through all of the installed flows.  If any of them are no
      * longer desired, delete them; if any of them should have different
      * actions, update them. */
@@ -682,4 +790,54 @@  ofctrl_put(struct hmap *flow_table)
         hmap_remove(flow_table, &d->hmap_node);
         hmap_insert(&installed_flows, &d->hmap_node, d->hmap_node.hash);
     }
+
+    /* Iterate through the installed groups from previous runs. If they
+     * are not needed delete them. */
+    struct group_info *installed, *next_group;
+    HMAP_FOR_EACH_SAFE(installed, next_group, hmap_node,
+                       &group_table->existing_groups) {
+        if (!ovn_group_lookup(&group_table->desired_groups, installed)) {
+            /* Delete the group. */
+            struct ofputil_group_mod gm;
+            enum ofputil_protocol usable_protocols;
+            char *error;
+            struct ds group_string = DS_EMPTY_INITIALIZER;
+            ds_put_format(&group_string, "group_id=%u", installed->group_id);
+
+            error = parse_ofp_group_mod_str(&gm, OFPGC11_DELETE,
+                                            ds_cstr(&group_string),
+                                            &usable_protocols);
+            if (!error) {
+                queue_group_mod(&gm);
+            } else {
+                VLOG_ERR("%s", error);
+                free(error);
+            }
+            ds_destroy(&group_string);
+            ofputil_bucket_list_destroy(&gm.buckets);
+
+            /* Remove 'installed' from 'group_table->existing_groups' */
+            hmap_remove(&group_table->existing_groups, &installed->hmap_node);
+            ds_destroy(installed->group);
+            free(installed->group);
+
+            /* Dealloc group_id. */
+            bitmap_set0(group_table->group_ids, installed->group_id);
+            free(installed);
+        }
+    }
+
+    /* Move the contents of desired_groups to existing_groups. */
+    HMAP_FOR_EACH_SAFE(desired, next_group, hmap_node,
+                       &group_table->desired_groups) {
+        hmap_remove(&group_table->desired_groups, &desired->hmap_node);
+        if (!ovn_group_lookup(&group_table->existing_groups, desired)) {
+            hmap_insert(&group_table->existing_groups, &desired->hmap_node,
+                        desired->hmap_node.hash);
+        } else {
+            ds_destroy(desired->group);
+            free(desired->group);
+            free(desired);
+        }
+    }
 }
diff --git a/ovn/controller/ofctrl.h b/ovn/controller/ofctrl.h
index bc9cfba..bf5dfd5 100644
--- a/ovn/controller/ofctrl.h
+++ b/ovn/controller/ofctrl.h
@@ -26,11 +26,12 @@  struct hmap;
 struct match;
 struct ofpbuf;
 struct ovsrec_bridge;
+struct group_table;
 
 /* Interface for OVN main loop. */
 void ofctrl_init(void);
 enum mf_field_id ofctrl_run(const struct ovsrec_bridge *br_int);
-void ofctrl_put(struct hmap *flows);
+void ofctrl_put(struct hmap *flows, struct group_table *group_table);
 void ofctrl_wait(void);
 void ofctrl_destroy(void);
 
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 47e6824..2e397f3 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -39,6 +39,7 @@ 
 #include "ofctrl.h"
 #include "openvswitch/vconn.h"
 #include "openvswitch/vlog.h"
+#include "ovn/lib/actions.h"
 #include "ovn/lib/ovn-sb-idl.h"
 #include "ovn/lib/ovn-util.h"
 #include "patch.h"
@@ -351,6 +352,13 @@  main(int argc, char *argv[])
     }
     unixctl_command_register("exit", "", 0, 0, ovn_controller_exit, &exiting);
 
+    /* Initialize group ids for loadbalancing. */
+    struct group_table group_table;
+    group_table.group_ids = bitmap_allocate(MAX_OVN_GROUPS);
+    bitmap_set1(group_table.group_ids, 0); /* Group id 0 is invalid. */
+    hmap_init(&group_table.desired_groups);
+    hmap_init(&group_table.existing_groups);
+
     daemonize_complete();
 
     ovsrec_init();
@@ -462,13 +470,14 @@  main(int argc, char *argv[])
 
             struct hmap flow_table = HMAP_INITIALIZER(&flow_table);
             lflow_run(&ctx, &lports, &mcgroups, &local_datapaths,
-                      &patched_datapaths, &ct_zones, &flow_table);
+                      &patched_datapaths, &group_table, &ct_zones,
+                      &flow_table);
             if (chassis_id) {
                 physical_run(&ctx, mff_ovn_geneve,
                              br_int, chassis_id, &ct_zones, &flow_table,
                              &local_datapaths, &patched_datapaths);
             }
-            ofctrl_put(&flow_table);
+            ofctrl_put(&flow_table, &group_table);
             hmap_destroy(&flow_table);
         }
 
@@ -528,6 +537,19 @@  main(int argc, char *argv[])
 
     simap_destroy(&ct_zones);
 
+    bitmap_free(group_table.group_ids);
+    hmap_destroy(&group_table.desired_groups);
+
+    struct group_info *installed, *next_group;
+    HMAP_FOR_EACH_SAFE(installed, next_group, hmap_node,
+                       &group_table.existing_groups) {
+        hmap_remove(&group_table.existing_groups, &installed->hmap_node);
+        ds_destroy(installed->group);
+        free(installed->group);
+        free(installed);
+    }
+    hmap_destroy(&group_table.existing_groups);
+
     ovsdb_idl_loop_destroy(&ovs_idl_loop);
     ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
 
diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
index 569970e..81d809f 100644
--- a/ovn/lib/actions.c
+++ b/ovn/lib/actions.c
@@ -18,10 +18,13 @@ 
 #include <stdarg.h>
 #include <stdbool.h>
 #include "actions.h"
+#include "bitmap.h"
 #include "byte-order.h"
 #include "compiler.h"
 #include "ovn-dhcp.h"
 #include "expr.h"
+#include "hash.h"
+#include "hmap.h"
 #include "lex.h"
 #include "logical-fields.h"
 #include "nx-match.h"
@@ -624,6 +627,136 @@  parse_put_dhcp_opts_action(struct action_context *ctx,
 }
 
 static void
+parse_ct_lb_action(struct action_context *ctx)
+{
+    uint8_t recirc_table;
+    if (ctx->ap->cur_ltable < ctx->ap->n_tables) {
+        recirc_table = ctx->ap->first_ptable + ctx->ap->cur_ltable + 1;
+    } else {
+        action_error(ctx, "\"ct_lb\" action not allowed in last table.");
+        return;
+    }
+
+    if (!lexer_match(ctx->lexer, LEX_T_LPAREN)) {
+        /* ct_lb without parenthesis means that this is an established
+         * connection and we just need to do a NAT. */
+        const size_t ct_offset = ctx->ofpacts->size;
+        ofpbuf_pull(ctx->ofpacts, ct_offset);
+
+        struct ofpact_conntrack *ct = ofpact_put_CT(ctx->ofpacts);
+        struct ofpact_nat *nat;
+        size_t nat_offset;
+        ct->zone_src.field = mf_from_id(MFF_LOG_CT_ZONE);
+        ct->zone_src.ofs = 0;
+        ct->zone_src.n_bits = 16;
+        ct->flags = 0;
+        ct->recirc_table = recirc_table;
+        ct->alg = 0;
+
+        add_prerequisite(ctx, "ip");
+
+        nat_offset = ctx->ofpacts->size;
+        ofpbuf_pull(ctx->ofpacts, nat_offset);
+
+        nat = ofpact_put_NAT(ctx->ofpacts);
+        nat->flags = 0;
+        nat->range_af = AF_UNSPEC;
+
+        ctx->ofpacts->header = ofpbuf_push_uninit(ctx->ofpacts, nat_offset);
+        ct = ctx->ofpacts->header;
+        ofpact_finish(ctx->ofpacts, &ct->ofpact);
+        ofpbuf_push_uninit(ctx->ofpacts, ct_offset);
+        return;
+    }
+
+    char *ips, *ip, *save = NULL;
+    uint32_t group_id = 0, bucket_id = 0, hash;
+    struct group_info *group_info;
+    struct ofpact_group *og;
+    struct ds *ds;
+
+    if (!lexer_get_string(ctx->lexer, &ips) || !ips[0]) {
+        action_error(ctx, "ct_lb has missing ip parameters.");
+        return;
+    }
+
+    ds = xmalloc(sizeof *ds);
+    ds_init(ds);
+    ds_put_format(ds, "type=select");
+
+    ip = strtok_r(ips, ",", &save);
+    ds_put_format(ds, ",bucket=bucket_id=%u,weight:100,actions="
+                  "ct(nat(dst=%s),commit,table=%d,zone=NXM_NX_REG5[0..15])"
+                  ,bucket_id, ip, recirc_table);
+    while ((ip = strtok_r(NULL, ",", &save)) != NULL) {
+        bucket_id++;
+        ds_put_format(ds, ",bucket=bucket_id=%u,weight:100,actions="
+                      "ct(nat(dst=%s),commit,table=%d,zone="
+                      "NXM_NX_REG5[0..15])", bucket_id, ip, recirc_table);
+    }
+
+    free(ips);
+    if (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
+        ds_destroy(ds);
+        free(ds);
+        action_syntax_error(ctx, "expecting `)'");
+        return;
+    }
+    add_prerequisite(ctx, "ip");
+
+    hash = hash_string(ds_cstr(ds), 0);
+
+    /* Check whether we have non installed but allocated group_id. */
+    HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash,
+                             &ctx->ap->group_table->desired_groups) {
+        if (!strcmp(ds_cstr(group_info->group), ds_cstr(ds))) {
+            group_id = group_info->group_id;
+            break;
+        }
+    }
+
+    if (!group_id) {
+        /* Check whether we already have an installed entry for this
+         * combination. */
+        HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash,
+                                 &ctx->ap->group_table->existing_groups) {
+            if (!strcmp(ds_cstr(group_info->group), ds_cstr(ds))) {
+                group_id = group_info->group_id;
+            }
+        }
+
+        if (!group_id) {
+            /* Reserve a new group_id. */
+            group_id = bitmap_scan(ctx->ap->group_table->group_ids, 0, 1,
+                                   MAX_OVN_GROUPS + 1);
+        }
+
+        if (group_id == MAX_OVN_GROUPS + 1) {
+            ds_destroy(ds);
+            free(ds);
+            action_error(ctx, "out of group ids.");
+            return;
+        }
+        bitmap_set1(ctx->ap->group_table->group_ids, group_id);
+
+        group_info = xmalloc(sizeof *group_info);
+        group_info->group = ds;
+        group_info->group_id = group_id;
+        group_info->hmap_node.hash = hash;
+
+        hmap_insert(&ctx->ap->group_table->desired_groups,
+                    &group_info->hmap_node, group_info->hmap_node.hash);
+    } else {
+        ds_destroy(ds);
+        free(ds);
+    }
+
+    /* Create an action to set the group. */
+    og = ofpact_put_GROUP(ctx->ofpacts);
+    og->group_id = group_id;
+}
+
+static void
 emit_ct(struct action_context *ctx, bool recirc_next, bool commit)
 {
     struct ofpact_conntrack *ct = ofpact_put_CT(ctx->ofpacts);
@@ -762,6 +895,8 @@  parse_action(struct action_context *ctx)
         parse_ct_nat(ctx, false);
     } else if (lexer_match_id(ctx->lexer, "ct_snat")) {
         parse_ct_nat(ctx, true);
+    } else if (lexer_match_id(ctx->lexer, "ct_lb")) {
+        parse_ct_lb_action(ctx);
     } else if (lexer_match_id(ctx->lexer, "arp")) {
         parse_arp_action(ctx);
     } else if (lexer_match_id(ctx->lexer, "get_arp")) {
diff --git a/ovn/lib/actions.h b/ovn/lib/actions.h
index f49e15e..7f3f478 100644
--- a/ovn/lib/actions.h
+++ b/ovn/lib/actions.h
@@ -20,6 +20,7 @@ 
 #include <stdbool.h>
 #include <stdint.h>
 #include "compiler.h"
+#include "hmap.h"
 #include "util.h"
 
 struct expr;
@@ -28,6 +29,22 @@  struct ofpbuf;
 struct shash;
 struct simap;
 
+#define MAX_OVN_GROUPS 65535
+
+struct group_table {
+    unsigned long *group_ids;  /* Used as a bitmap with value set
+                                * for allocated group ids in either
+                                * desired_groups or existing_groups. */
+    struct hmap desired_groups;
+    struct hmap existing_groups;
+};
+
+struct group_info {
+    struct hmap_node hmap_node;
+    struct ds *group;
+    uint32_t group_id;
+};
+
 enum action_opcode {
     /* "arp { ...actions... }".
      *
@@ -80,6 +97,9 @@  struct action_params {
     /* A map from a port name to its connection tracking zone. */
     const struct simap *ct_zones;
 
+    /* A struct to figure out the group_id for group actions. */
+    struct group_table *group_table;
+
     /* OVN maps each logical flow table (ltable), one-to-one, onto a physical
      * OpenFlow flow table (ptable).  A number of parameters describe this
      * mapping and data related to flow tables:
diff --git a/ovn/lib/lex.c b/ovn/lib/lex.c
index 556288f..529e352 100644
--- a/ovn/lib/lex.c
+++ b/ovn/lib/lex.c
@@ -828,3 +828,16 @@  lexer_get_int(struct lexer *lexer, int *value)
         return false;
     }
 }
+
+bool
+lexer_get_string(struct lexer *lexer, char **str)
+{
+    if (lexer->token.type == LEX_T_STRING) {
+        *str = xstrdup(lexer->token.s);
+        lexer_get(lexer);
+        return true;
+    } else {
+        *str = NULL;
+        return false;
+    }
+}
diff --git a/ovn/lib/lex.h b/ovn/lib/lex.h
index 578ef40..cb6abd6 100644
--- a/ovn/lib/lex.h
+++ b/ovn/lib/lex.h
@@ -121,5 +121,6 @@  bool lexer_match(struct lexer *, enum lex_type);
 bool lexer_match_id(struct lexer *, const char *id);
 bool lexer_is_int(const struct lexer *);
 bool lexer_get_int(struct lexer *, int *value);
+bool lexer_get_string(struct lexer *, char  **str);
 
 #endif /* ovn/lex.h */
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index f330374..61b6c25 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -1108,6 +1108,30 @@ 
             </code>
           </p>
         </dd>
+
+        <dt><code>ct_lb;</code></dt>
+        <dt><code>ct_lb("<var>ip1[:port1],ip2[:port2]</var>");</code></dt>
+        <dd>
+          <p>
+            <code>ct_lb("<var>ip1[:port1],ip2[:port2]</var>,..")</code>
+            commits the packet to the connection tracking table and DNATs the
+            packet's destination IP address (and port) to any one of the
+            provided IP address (and port) inside the parenthesis.  Each of
+            the provided IP address is given equal weight while picking the
+            DNAT address.  The processing automatically moves on to the next
+            table and the next tables will see the changes in the packet caused
+            by the connection tracker.  The connection tracking state is scoped
+            by the logical port, so overlapping addresses may be used.
+          </p>
+          <p>
+            <code>ct_lb</code> sends the packet to the connection tracking
+            table to NAT the packets.  If the packet is part of an established
+            connection that was previously committed to the connection tracker
+            via <code>ct_lb("<var>ip1[:port1],ip2[:port2]</var>,..")</code>
+            action, it will automatically get DNATed to the same IP address
+            as the first packet in that connection.
+          </p>
+        </dd>
       </dl>
 
       <p>
diff --git a/tests/ovn.at b/tests/ovn.at
index 297070c..02dc677 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -503,6 +503,14 @@  reg0[0..7] <-> ip.proto; => Field ip.proto is not modifiable.
 ip.ttl--; => actions=dec_ttl, prereqs=ip
 ip.ttl => Syntax error at end of input expecting `--'.
 
+# load balancing.
+ct_lb; => actions=ct(table=27,zone=NXM_NX_REG5[0..15],nat), prereqs=ip
+ct_lb(); => ct_lb has missing ip parameters.
+ct_lb(""); => ct_lb has missing ip parameters.
+ct_lb("192.168.1.2:80, 192.168.1.3:80"); => actions=group:1, prereqs=ip
+ct_lb("192.168.1.2, 192.168.1.3"); => actions=group:2, prereqs=ip
+ct_lb("foo"); => actions=group:3, prereqs=ip
+
 # conntrack
 ct_next; => actions=ct(table=27,zone=NXM_NX_REG5[0..15]), prereqs=ip
 ct_commit; => actions=ct(commit,zone=NXM_NX_REG5[0..15]), prereqs=ip
diff --git a/tests/test-ovn.c b/tests/test-ovn.c
index 18e5aca..1c468d6 100644
--- a/tests/test-ovn.c
+++ b/tests/test-ovn.c
@@ -1257,6 +1257,13 @@  test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED)
     create_symtab(&symtab);
     create_dhcp_opts(&dhcp_opts);
 
+    /* Initialize group ids. */
+    struct group_table group_table;
+    group_table.group_ids = bitmap_allocate(MAX_OVN_GROUPS);
+    bitmap_set1(group_table.group_ids, 0); /* Group id 0 is invalid. */
+    hmap_init(&group_table.desired_groups);
+    hmap_init(&group_table.existing_groups);
+
     simap_init(&ports);
     simap_put(&ports, "eth0", 5);
     simap_put(&ports, "eth1", 6);
@@ -1277,6 +1284,7 @@  test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED)
             .lookup_port = lookup_port_cb,
             .aux = &ports,
             .ct_zones = &ct_zones,
+            .group_table = &group_table,
 
             .n_tables = 16,
             .first_ptable = 16,